You are viewing a plain text version of this content. The canonical link for it is here.
Posted to scm@geronimo.apache.org by rm...@apache.org on 2014/08/26 20:17:09 UTC

svn commit: r1620683 [5/17] - in /geronimo/specs/trunk: ./ geronimo-javamail_1.5_spec/ geronimo-javamail_1.5_spec/src/ geronimo-javamail_1.5_spec/src/main/ geronimo-javamail_1.5_spec/src/main/java/ geronimo-javamail_1.5_spec/src/main/java/javax/ geroni...

Added: geronimo/specs/trunk/geronimo-javamail_1.5_spec/src/main/java/javax/mail/internet/AddressParser.java
URL: http://svn.apache.org/viewvc/geronimo/specs/trunk/geronimo-javamail_1.5_spec/src/main/java/javax/mail/internet/AddressParser.java?rev=1620683&view=auto
==============================================================================
--- geronimo/specs/trunk/geronimo-javamail_1.5_spec/src/main/java/javax/mail/internet/AddressParser.java (added)
+++ geronimo/specs/trunk/geronimo-javamail_1.5_spec/src/main/java/javax/mail/internet/AddressParser.java Tue Aug 26 18:17:06 2014
@@ -0,0 +1,1991 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package javax.mail.internet;
+
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+import java.util.List;
+
+class AddressParser {
+
+    // the validation strictness levels, from most lenient to most conformant.
+    static public final int NONSTRICT = 0;
+    static public final int PARSE_HEADER = 1;
+    static public final int STRICT = 2;
+
+    // different mailbox types
+    static protected final int UNKNOWN = 0;
+    static protected final int ROUTE_ADDR = 1;
+    static protected final int GROUP_ADDR = 2;
+    static protected final int SIMPLE_ADDR = 3;
+
+    // constants for token types.
+    static protected final int END_OF_TOKENS = '\0';
+    static protected final int PERIOD = '.';
+    static protected final int LEFT_ANGLE = '<';
+    static protected final int RIGHT_ANGLE = '>';
+    static protected final int COMMA = ',';
+    static protected final int AT_SIGN = '@';
+    static protected final int SEMICOLON = ';';
+    static protected final int COLON = ':';
+    static protected final int QUOTED_LITERAL = '"';
+    static protected final int DOMAIN_LITERAL = '[';
+    static protected final int COMMENT = '(';
+    static protected final int ATOM = 'A';
+    static protected final int WHITESPACE = ' ';
+
+
+    // the string we're parsing
+    private final String addresses;
+    // the current parsing position
+    private int    position;
+    // the end position of the string
+    private int    end;
+    // the strictness flag
+    private final int validationLevel;
+
+    public AddressParser(final String addresses, final int validation) {
+        this.addresses = addresses;
+        validationLevel = validation;
+    }
+
+
+    /**
+     * Parse an address list into an array of internet addresses.
+     *
+     * @return An array containing all of the non-null addresses in the list.
+     * @exception AddressException
+     *                   Thrown for any validation errors.
+     */
+    public InternetAddress[] parseAddressList() throws AddressException
+    {
+        // get the address as a set of tokens we can process.
+        final TokenStream tokens = tokenizeAddress();
+
+        // get an array list accumulator.
+        final ArrayList addressList = new ArrayList();
+
+        // we process sections of the token stream until we run out of tokens.
+        while (true) {
+            // parse off a single address.  Address lists can have null elements,
+            // so this might return a null value.  The null value does not get added
+            // to the address accumulator.
+            addressList.addAll(parseSingleAddress(tokens, false));
+            // This token should be either a "," delimiter or a stream terminator.  If we're
+            // at the end, time to get out.
+            final AddressToken token = tokens.nextToken();
+            if (token.type == END_OF_TOKENS) {
+                break;
+            }
+        }
+
+        return (InternetAddress [])addressList.toArray(new InternetAddress[0]);
+    }
+
+
+    /**
+     * Parse a single internet address.  This must be a single address,
+     * not an address list.
+     *
+     * @exception AddressException
+     */
+    public InternetAddress parseAddress() throws AddressException
+    {
+        // get the address as a set of tokens we can process.
+        final TokenStream tokens = tokenizeAddress();
+
+        // parse off a single address.  Address lists can have null elements,
+        // so this might return a null value.  The null value does not get added
+        // to the address accumulator.
+        final List addressList = parseSingleAddress(tokens, false);
+        // we must get exactly one address back from this.
+        if (addressList.isEmpty()) {
+            throw new AddressException("Null address", addresses, 0);
+        }
+        // this could be a simple list of blank delimited tokens.  Ensure we only got one back.
+        if (addressList.size() > 1) {
+            throw new AddressException("Illegal Address", addresses, 0);
+        }
+
+        // This token must be a stream stream terminator, or we have an error.
+        final AddressToken token = tokens.nextToken();
+        if (token.type != END_OF_TOKENS) {
+            illegalAddress("Illegal Address", token);
+        }
+
+        return (InternetAddress)addressList.get(0);
+    }
+
+
+    /**
+     * Validate an internet address.  This must be a single address,
+     * not a list of addresses.  The address also must not contain
+     * and personal information to be valid.
+     *
+     * @exception AddressException
+     */
+    public void validateAddress() throws AddressException
+    {
+        // get the address as a set of tokens we can process.
+        final TokenStream tokens = tokenizeAddress();
+
+        // parse off a single address.  Address lists can have null elements,
+        // so this might return a null value.  The null value does not get added
+        // to the address accumulator.
+        final List addressList = parseSingleAddress(tokens, false);
+        if (addressList.isEmpty()) {
+            throw new AddressException("Null address", addresses, 0);
+        }
+
+        // this could be a simple list of blank delimited tokens.  Ensure we only got one back.
+        if (addressList.size() > 1) {
+            throw new AddressException("Illegal Address", addresses, 0);
+        }
+
+        final InternetAddress address = (InternetAddress)addressList.get(0);
+
+        // validation occurs on an address that's already been split into personal and address
+        // data.
+        if (address.personal != null) {
+            throw new AddressException("Illegal Address", addresses, 0);
+        }
+        // This token must be a stream stream terminator, or we have an error.
+        final AddressToken token = tokens.nextToken();
+        if (token.type != END_OF_TOKENS) {
+            illegalAddress("Illegal Address", token);
+        }
+    }
+
+
+    /**
+     * Extract the set of address from a group Internet specification.
+     *
+     * @return An array containing all of the non-null addresses in the list.
+     * @exception AddressException
+     */
+    public InternetAddress[] extractGroupList() throws AddressException
+    {
+        // get the address as a set of tokens we can process.
+        final TokenStream tokens = tokenizeAddress();
+
+        // get an array list accumulator.
+        final ArrayList addresses = new ArrayList();
+
+        AddressToken token = tokens.nextToken();
+
+        // scan forward to the ':' that starts the group list.  If we don't find one,
+        // this is an exception.
+        while (token.type != COLON) {
+            if (token.type == END_OF_TOKENS) {
+                illegalAddress("Missing ':'", token);
+            }
+            token = tokens.nextToken();
+        }
+
+        // we process sections of the token stream until we run out of tokens.
+        while (true) {
+            // parse off a single address.  Address lists can have null elements,
+            // so this might return a null value.  The null value does not get added
+            // to the address accumulator.
+            addresses.addAll(parseSingleAddress(tokens, true));
+            // This token should be either a "," delimiter or a group terminator.  If we're
+            // at the end, this is an error.
+            token = tokens.nextToken();
+            if (token.type == SEMICOLON) {
+                break;
+            }
+            else if (token.type == END_OF_TOKENS) {
+                illegalAddress("Missing ';'", token);
+            }
+        }
+
+        return (InternetAddress [])addresses.toArray(new InternetAddress[0]);
+    }
+
+
+    /**
+     * Parse out a single address from a string from a string
+     * of address tokens, returning an InternetAddress object that
+     * represents the address.
+     *
+     * @param tokens The token source for this address.
+     *
+     * @return A parsed out and constructed InternetAddress object for
+     *         the next address.  Returns null if this is an "empty"
+     *         address in a list.
+     * @exception AddressException
+     */
+    private List parseSingleAddress(final TokenStream tokens, final boolean inGroup) throws AddressException
+    {
+        final List parsedAddresses = new ArrayList();
+
+        // index markers for personal information
+        AddressToken personalStart = null;
+        AddressToken personalEnd = null;
+
+        // and similar bits for the address information.
+        AddressToken addressStart = null;
+        AddressToken addressEnd = null;
+
+        // there is a fall-back set of rules allowed that will parse the address as a set of blank delimited
+        // tokens.  However, we do NOT allow this if we encounter any tokens that fall outside of these
+        // rules.  For example, comment fields and quoted strings will disallow the very lenient rule set.
+        boolean nonStrictRules = true;
+
+        // we don't know the type of address yet
+        int addressType = UNKNOWN;
+
+        // the parsing goes in two stages.  Stage one runs through the tokens locating the bounds
+        // of the address we're working on, resolving the personal information, and also validating
+        // some of the larger scale syntax features of an address (matched delimiters for routes and
+        // groups, invalid nesting checks, etc.).
+
+        // get the next token from the queue and save this.  We're going to scan ahead a bit to
+        // figure out what type of address we're looking at, then reset to do the actually parsing
+        // once we've figured out a form.
+        final AddressToken first = tokens.nextToken();
+        // push it back on before starting processing.
+        tokens.pushToken(first);
+
+        // scan ahead for a trigger token that tells us what we've got.
+        while (addressType == UNKNOWN) {
+
+            final AddressToken token = tokens.nextToken();
+            switch (token.type) {
+                // skip these for now...after we've processed everything and found that this is a simple
+                // address form, then we'll check for a leading comment token in the first position and use
+                // if as personal information.
+                case COMMENT:
+                    // comments do, however, denote that this must be parsed according to RFC822 rules.
+                    nonStrictRules = false;
+                    break;
+
+                // a semi-colon when processing a group is an address terminator.  we need to
+                // process this like a comma then
+                case SEMICOLON:
+                    if (inGroup) {
+                        // we need to push the terminator back on for the caller to see.
+                        tokens.pushToken(token);
+                        // if we've not tagged any tokens as being the address beginning, so this must be a
+                        // null address.
+                        if (addressStart == null) {
+                            // just return the empty list from this.
+                            return parsedAddresses;
+                        }
+                        // the end token is the back part.
+                        addressEnd = tokens.previousToken(token);
+                        // without a '<' for a route addr, we can't distinguish address tokens from personal data.
+                        // We'll use a leading comment, if there is one.
+                        personalStart = null;
+                        // this is just a simple form.
+                        addressType = SIMPLE_ADDR;
+                        break;
+                    }
+
+                // NOTE:  The above falls through if this is not a group.
+
+                // any of these tokens are a real token that can be the start of an address.  Many of
+                // them are not valid as first tokens in this context, but we flag them later if validation
+                // has been requested.  For now, we just mark these as the potential address start.
+                case DOMAIN_LITERAL:
+                case QUOTED_LITERAL:
+                    // this set of tokens require fuller RFC822 parsing, so turn off the flag.
+                    nonStrictRules = false;
+
+                case ATOM:
+                case AT_SIGN:
+                case PERIOD:
+                    // if we're not determined the start of the address yet, then check to see if we
+                    // need to consider this the personal start.
+                    if (addressStart == null) {
+                        if (personalStart == null) {
+                            personalStart = token;
+                        }
+                        // This is the first real token of the address, which at this point can
+                        // be either the personal info or the first token of the address.  If we hit
+                        // an address terminator without encountering either a route trigger or group
+                        // trigger, then this is the real address.
+                        addressStart = token;
+                    }
+                    break;
+
+                // a LEFT_ANGLE indicates we have a full RFC822 mailbox form.  The leading phrase
+                // is the personal info.  The address is inside the brackets.
+                case LEFT_ANGLE:
+                    // a route address automatically switches off the blank-delimited token mode.
+                    nonStrictRules = false;
+                    // this is a route address
+                    addressType = ROUTE_ADDR;
+                    // the address is placed in the InternetAddress object without the route
+                    // brackets, so our start is one past this.
+                    addressStart = tokens.nextRealToken();
+                    // push this back on the queue so the scanner picks it up properly.
+                    tokens.pushToken(addressStart);
+                    // make sure we flag the end of the personal section too.
+                    if (personalStart != null) {
+                        personalEnd = tokens.previousToken(token);
+                    }
+                    // scan the rest of a route address.
+                    addressEnd = scanRouteAddress(tokens, false);
+                    break;
+
+                // a COLON indicates this is a group specifier...parse the group.
+                case COLON:
+                    // Colons would not be valid in simple lists, so turn it off.
+                    nonStrictRules = false;
+                    // if we're scanning a group, we shouldn't encounter a ":".  This is a
+                    // recursion error if found.
+                    if (inGroup) {
+                        illegalAddress("Nested group element", token);
+                    }
+                    addressType = GROUP_ADDR;
+                    // groups don't have any personal sections.
+                    personalStart = null;
+                    // our real start was back at the beginning
+                    addressStart = first;
+                    addressEnd = scanGroupAddress(tokens);
+                    break;
+
+                // a semi colon can the same as a comma if we're processing a group.
+
+
+                // reached the end of string...this might be a null address, or one of the very simple name
+                // forms used for non-strict RFC822 versions.  Reset, and try that form
+                case END_OF_TOKENS:
+                    // if we're scanning a group, we shouldn't encounter an end token.  This is an
+                    // error if found.
+                    if (inGroup) {
+                        illegalAddress("Missing ';'", token);
+                    }
+
+                    // NOTE:  fall through from above.
+
+                // this is either a terminator for an address list or a a group terminator.
+                case COMMA:
+                    // we need to push the terminator back on for the caller to see.
+                    tokens.pushToken(token);
+                    // if we've not tagged any tokens as being the address beginning, so this must be a
+                    // null address.
+                    if (addressStart == null) {
+                        // just return the empty list from this.
+                        return parsedAddresses;
+                    }
+                    // the end token is the back part.
+                    addressEnd = tokens.previousToken(token);
+                    // without a '<' for a route addr, we can't distinguish address tokens from personal data.
+                    // We'll use a leading comment, if there is one.
+                    personalStart = null;
+                    // this is just a simple form.
+                    addressType = SIMPLE_ADDR;
+                    break;
+
+                // right angle tokens are pushed, because parsing of the bracketing is not necessarily simple.
+                // we need to flag these here.
+                case RIGHT_ANGLE:
+                    illegalAddress("Unexpected '>'", token);
+
+            }
+        }
+
+        String personal = null;
+
+        // if we have personal data, then convert it to a string value.
+        if (personalStart != null) {
+            final TokenStream personalTokens = tokens.section(personalStart, personalEnd);
+            personal = personalToString(personalTokens);
+        }
+        // if we have a simple address, then check the first token to see if it's a comment.  For simple addresses,
+        // we'll accept the first comment token as the personal information.
+        else {
+            if (addressType == SIMPLE_ADDR && first.type == COMMENT) {
+                personal = first.value;
+            }
+        }
+
+        final TokenStream addressTokens = tokens.section(addressStart, addressEnd);
+
+        // if this is one of the strictly RFC822 types, then we always validate the address.  If this is a
+        // a simple address, then we only validate if strict parsing rules are in effect or we've been asked
+        // to validate.
+        if (validationLevel != PARSE_HEADER) {
+            switch (addressType) {
+                case GROUP_ADDR:
+                    validateGroup(addressTokens);
+                    break;
+
+                case ROUTE_ADDR:
+                    validateRouteAddr(addressTokens, false);
+                    break;
+
+                case SIMPLE_ADDR:
+                    // this is a conditional validation
+                    validateSimpleAddress(addressTokens);
+                    break;
+            }
+        }
+
+        // more complex addresses and addresses containing tokens other than just simple addresses
+        // need proper handling.
+        if (validationLevel != NONSTRICT || addressType != SIMPLE_ADDR || !nonStrictRules) {
+            // we might have traversed this already when we validated, so reset the
+            // position before using this again.
+            addressTokens.reset();
+            final String address = addressToString(addressTokens);
+
+            // get the parsed out sections as string values.
+            final InternetAddress result = new InternetAddress();
+            result.setAddress(address);
+            try {
+                result.setPersonal(personal);
+            } catch (final UnsupportedEncodingException e) {
+            }
+            // even though we have a single address, we return this as an array.  Simple addresses
+            // can be produce an array of items, so we need to return everything.
+            parsedAddresses.add(result);
+            return parsedAddresses;
+        }
+        else {
+            addressTokens.reset();
+
+            TokenStream nextAddress = addressTokens.getBlankDelimitedToken();
+            while (nextAddress != null) {
+                final String address = addressToString(nextAddress);
+                // get the parsed out sections as string values.
+                final InternetAddress result = new InternetAddress();
+                result.setAddress(address);
+                parsedAddresses.add(result);
+                nextAddress = addressTokens.getBlankDelimitedToken();
+            }
+            return parsedAddresses;
+        }
+    }
+
+
+    /**
+     * Scan the token stream, parsing off a route addr spec.  This
+     * will do some basic syntax validation, but will not actually
+     * validate any of the address information.  Comments will be
+     * discarded.
+     *
+     * @param tokens The stream of tokens.
+     *
+     * @return The last token of the route address (the one preceeding the
+     *         terminating '>'.
+     */
+    private AddressToken scanRouteAddress(final TokenStream tokens, final boolean inGroup) throws AddressException {
+        // get the first token and ensure we have something between the "<" and ">".
+        AddressToken token = tokens.nextRealToken();
+        // the last processed non-whitespace token, which is the actual address end once the
+        // right angle bracket is encountered.
+
+        AddressToken previous = null;
+
+        // if this route-addr has route information, the first token after the '<' must be a '@'.
+        // this determines if/where a colon or comma can appear.
+        boolean inRoute = token.type == AT_SIGN;
+
+        // now scan until we reach the terminator.  The only validation is done on illegal characters.
+        while (true) {
+            switch (token.type) {
+                // The following tokens are all valid between the brackets, so just skip over them.
+                case ATOM:
+                case QUOTED_LITERAL:
+                case DOMAIN_LITERAL:
+                case PERIOD:
+                case AT_SIGN:
+                    break;
+
+                case COLON:
+                    // if not processing route information, this is illegal.
+                    if (!inRoute) {
+                        illegalAddress("Unexpected ':'", token);
+                    }
+                    // this is the end of the route information, the rules now change.
+                    inRoute = false;
+                    break;
+
+                case COMMA:
+                    // if not processing route information, this is illegal.
+                    if (!inRoute) {
+                        illegalAddress("Unexpected ','", token);
+                    }
+                    break;
+
+                case RIGHT_ANGLE:
+                    // if previous is null, we've had a route address which is "<>".  That's illegal.
+                    if (previous == null) {
+                        illegalAddress("Illegal address", token);
+                    }
+                    // step to the next token..this had better be either a comma for another address or
+                    // the very end of the address list .
+                    token = tokens.nextRealToken();
+                    // if we're scanning part of a group, then the allowed terminators are either ',' or ';'.
+                    if (inGroup) {
+                        if (token.type != COMMA && token.type != SEMICOLON) {
+                            illegalAddress("Illegal address", token);
+                        }
+                    }
+                    // a normal address should have either a ',' for a list or the end.
+                    else {
+                        if (token.type != COMMA && token.type != END_OF_TOKENS) {
+                            illegalAddress("Illegal address", token);
+                        }
+                    }
+                    // we need to push the termination token back on.
+                    tokens.pushToken(token);
+                    // return the previous token as the updated position.
+                    return previous;
+
+                case END_OF_TOKENS:
+                    illegalAddress("Missing '>'", token);
+
+                // now for the illegal ones in this context.
+                case SEMICOLON:
+                    illegalAddress("Unexpected ';'", token);
+
+                case LEFT_ANGLE:
+                    illegalAddress("Unexpected '<'", token);
+            }
+            // remember the previous token.
+            previous = token;
+            token = tokens.nextRealToken();
+        }
+    }
+
+
+    /**
+     * Scan the token stream, parsing off a group address.  This
+     * will do some basic syntax validation, but will not actually
+     * validate any of the address information.  Comments will be
+     * ignored.
+     *
+     * @param tokens The stream of tokens.
+     *
+     * @return The last token of the group address (the terminating ':").
+     */
+    private AddressToken scanGroupAddress(final TokenStream tokens) throws AddressException {
+        // A group does not require that there be anything between the ':' and ';".  This is
+        // just a group with an empty list.
+        AddressToken token = tokens.nextRealToken();
+
+        // now scan until we reach the terminator.  The only validation is done on illegal characters.
+        while (true) {
+            switch (token.type) {
+                // The following tokens are all valid in group addresses, so just skip over them.
+                case ATOM:
+                case QUOTED_LITERAL:
+                case DOMAIN_LITERAL:
+                case PERIOD:
+                case AT_SIGN:
+                case COMMA:
+                    break;
+
+                case COLON:
+                     illegalAddress("Nested group", token);
+
+                // route address within a group specifier....we need to at least verify the bracket nesting
+                // and higher level syntax of the route.
+                case LEFT_ANGLE:
+                    scanRouteAddress(tokens, true);
+                    break;
+
+                // the only allowed terminator is the ';'
+                case END_OF_TOKENS:
+                    illegalAddress("Missing ';'", token);
+
+                // now for the illegal ones in this context.
+                case SEMICOLON:
+                    // verify there's nothing illegal after this.
+                    final AddressToken next = tokens.nextRealToken();
+                    if (next.type != COMMA && next.type != END_OF_TOKENS) {
+                        illegalAddress("Illegal address", token);
+                    }
+                    // don't forget to put this back on...our caller will need it.
+                    tokens.pushToken(next);
+                    return token;
+
+                case RIGHT_ANGLE:
+                    illegalAddress("Unexpected '>'", token);
+            }
+            token = tokens.nextRealToken();
+        }
+    }
+
+
+    /**
+     * Parse the provided internet address into a set of tokens.  This
+     * phase only does a syntax check on the tokens.  The interpretation
+     * of the tokens is the next phase.
+     *
+     * @exception AddressException
+     */
+    private TokenStream tokenizeAddress() throws AddressException {
+
+        // get a list for the set of tokens
+        final TokenStream tokens = new TokenStream();
+
+        end = addresses.length();    // our parsing end marker
+
+        // now scan along the string looking for the special characters in an internet address.
+        while (moreCharacters()) {
+            final char ch = currentChar();
+
+            switch (ch) {
+                // start of a comment bit...ignore everything until we hit a closing paren.
+                case '(':
+                    scanComment(tokens);
+                    break;
+                // a closing paren found outside of normal processing.
+                case ')':
+                    syntaxError("Unexpected ')'", position);
+
+
+                // start of a quoted string
+                case '"':
+                    scanQuotedLiteral(tokens);
+                    break;
+                // domain literal
+                case '[':
+                    scanDomainLiteral(tokens);
+                    break;
+
+                // a naked closing bracket...not valid except as part of a domain literal.
+                case ']':
+                    syntaxError("Unexpected ']'", position);
+
+                // special character delimiters
+                case '<':
+                    tokens.addToken(new AddressToken(LEFT_ANGLE, position));
+                    nextChar();
+                    break;
+
+                // a naked closing bracket...not valid without a starting one, but
+                // we need to handle this in context.
+                case '>':
+                    tokens.addToken(new AddressToken(RIGHT_ANGLE, position));
+                    nextChar();
+                    break;
+                case ':':
+                    tokens.addToken(new AddressToken(COLON, position));
+                    nextChar();
+                    break;
+                case ',':
+                    tokens.addToken(new AddressToken(COMMA, position));
+                    nextChar();
+                    break;
+                case '.':
+                    tokens.addToken(new AddressToken(PERIOD, position));
+                    nextChar();
+                    break;
+                case ';':
+                    tokens.addToken(new AddressToken(SEMICOLON, position));
+                    nextChar();
+                    break;
+                case '@':
+                    tokens.addToken(new AddressToken(AT_SIGN, position));
+                    nextChar();
+                    break;
+
+                // white space characters.  These are mostly token delimiters, but there are some relaxed
+                // situations where they get processed, so we need to add a white space token for the first
+                // one we encounter in a span.
+                case ' ':
+                case '\t':
+                case '\r':
+                case '\n':
+                    // add a single white space token
+                    tokens.addToken(new AddressToken(WHITESPACE, position));
+
+                    nextChar();
+                    // step over any space characters, leaving us positioned either at the end
+                    // or the first
+                    while (moreCharacters()) {
+                        final char nextChar = currentChar();
+                        if (nextChar == ' ' || nextChar == '\t' || nextChar == '\r' || nextChar == '\n') {
+                            nextChar();
+                        }
+                        else {
+                            break;
+                        }
+                    }
+                    break;
+
+                // potentially an atom...if it starts with an allowed atom character, we
+                // parse out the token, otherwise this is invalid.
+                default:
+                    if (ch < 040 || ch >= 0177) {
+                        syntaxError("Illegal character in address", position);
+                    }
+
+                    scanAtom(tokens);
+                    break;
+            }
+        }
+
+        // for this end marker, give an end position.
+        tokens.addToken(new AddressToken(END_OF_TOKENS, addresses.length()));
+        return tokens;
+    }
+
+
+    /**
+     * Step to the next character position while parsing.
+     */
+    private void nextChar() {
+        position++;
+    }
+
+
+    /**
+     * Retrieve the character at the current parsing position.
+     *
+     * @return The current character.
+     */
+    private char currentChar() {
+        return addresses.charAt(position);
+    }
+
+    /**
+     * Test if there are more characters left to parse.
+     *
+     * @return True if we've hit the last character, false otherwise.
+     */
+    private boolean moreCharacters() {
+        return position < end;
+    }
+
+
+    /**
+     * Parse a quoted string as specified by the RFC822 specification.
+     *
+     * @param tokens The TokenStream where the parsed out token is added.
+     */
+    private void scanQuotedLiteral(final TokenStream tokens) throws AddressException {
+        final StringBuffer value = new StringBuffer();
+
+        // step over the quote delimiter.
+        nextChar();
+
+        while (moreCharacters()) {
+            final char ch = currentChar();
+
+            // is this an escape char?
+            if (ch == '\\') {
+                // step past this, and grab the following character
+                nextChar();
+                if (!moreCharacters()) {
+                    syntaxError("Missing '\"'", position);
+                }
+                value.append(currentChar());
+            }
+            // end of the string?
+            else if (ch == '"') {
+                // return the constructed string.
+                tokens.addToken(new AddressToken(value.toString(), QUOTED_LITERAL, position));
+                // step over the close delimiter for the benefit of the next token.
+                nextChar();
+                return;
+            }
+            // the RFC822 spec disallows CR characters.
+            else if (ch == '\r') {
+                syntaxError("Illegal line end in literal", position);
+            }
+            else
+            {
+                value.append(ch);
+            }
+            nextChar();
+        }
+        // missing delimiter
+        syntaxError("Missing '\"'", position);
+    }
+
+
+    /**
+     * Parse a domain literal as specified by the RFC822 specification.
+     *
+     * @param tokens The TokenStream where the parsed out token is added.
+     */
+    private void scanDomainLiteral(final TokenStream tokens) throws AddressException {
+        final StringBuffer value = new StringBuffer();
+
+        final int startPosition = position;
+        // step over the quote delimiter.
+        nextChar();
+
+        while (moreCharacters()) {
+            final char ch = currentChar();
+
+            // is this an escape char?
+            if (ch == '\\') {
+                // because domain literals don't get extra escaping, we render them
+                // with the escaped characters intact.  Therefore, append the '\' escape
+                // first, then append the escaped character without examination.
+                value.append(currentChar());
+                // step past this, and grab the following character
+                nextChar();
+                if (!moreCharacters()) {
+                    syntaxError("Missing '\"'", position);
+                }
+                value.append(currentChar());
+            }
+            // end of the string?
+            else if (ch == ']') {
+                // return the constructed string.
+                tokens.addToken(new AddressToken(value.toString(), DOMAIN_LITERAL, startPosition));
+                // step over the close delimiter for the benefit of the next token.
+                nextChar();
+                return;
+            }
+            // the RFC822 spec says no nesting
+            else if (ch == '[') {
+                syntaxError("Unexpected '['", position);
+            }
+            // carriage returns are similarly illegal.
+            else if (ch == '\r') {
+                syntaxError("Illegal line end in domain literal", position);
+            }
+            else
+            {
+                value.append(ch);
+            }
+            nextChar();
+        }
+        // missing delimiter
+        syntaxError("Missing ']'", position);
+    }
+
+    /**
+     * Scan an atom in an internet address, using the RFC822 rules
+     * for atom delimiters.
+     *
+     * @param tokens The TokenStream where the parsed out token is added.
+     */
+    private void scanAtom(final TokenStream tokens) throws AddressException {
+        final int start = position;
+        nextChar();
+        while (moreCharacters()) {
+
+            final char ch = currentChar();
+            if (isAtom(ch)) {
+                nextChar();
+            }
+            else {
+                break;
+            }
+        }
+
+        // return the scanned part of the string.
+        tokens.addToken(new AddressToken(addresses.substring(start, position), ATOM, start));
+    }
+
+
+    /**
+     * Parse an internet address comment field as specified by
+     * RFC822.  Includes support for quoted characters and nesting.
+     *
+     * @param tokens The TokenStream where the parsed out token is added.
+     */
+    private void scanComment(final TokenStream tokens) throws AddressException {
+        final StringBuffer value = new StringBuffer();
+
+        final int startPosition = position;
+        // step past the start character
+        nextChar();
+
+        // we're at the top nesting level on the comment.
+        int nest = 1;
+
+        // scan while we have more characters.
+        while (moreCharacters()) {
+            final char ch = currentChar();
+            // escape character?
+            if (ch == '\\') {
+                // step over this...if escaped, we must have at least one more character
+                // in the string.
+                nextChar();
+                if (!moreCharacters()) {
+                    syntaxError("Missing ')'", position);
+                }
+                value.append(currentChar());
+            }
+            // nested comment?
+            else if (ch == '(') {
+                // step the nesting level...we treat the comment as a single unit, with the delimiters
+                // for the nested comments embedded in the middle
+                nest++;
+                value.append(ch);
+            }
+            // is this the comment close?
+            else if (ch == ')') {
+                // reduce the nesting level.  If we still have more to process, add the delimiter character
+                // and keep going.
+                nest--;
+                if (nest > 0) {
+                    value.append(ch);
+                }
+                else {
+                    // step past this and return.  The outermost comment delimiter is not included in
+                    // the string value, since this is frequently used as personal data on the
+                    // InternetAddress objects.
+                    nextChar();
+                    tokens.addToken(new AddressToken(value.toString(), COMMENT, startPosition));
+                    return;
+                }
+            }
+            else if (ch == '\r') {
+                syntaxError("Illegal line end in comment", position);
+            }
+            else {
+                value.append(ch);
+            }
+            // step to the next character.
+            nextChar();
+        }
+        // ran out of data before seeing the closing bit, not good
+        syntaxError("Missing ')'", position);
+    }
+
+
+    /**
+     * Validate the syntax of an RFC822 group internet address specification.
+     *
+     * @param tokens The stream of tokens for the address.
+     *
+     * @exception AddressException
+     */
+    private void validateGroup(final TokenStream tokens) throws AddressException {
+        // we know already this is an address in the form "phrase:group;".  Now we need to validate the
+        // elements.
+
+        int phraseCount = 0;
+
+        AddressToken token = tokens.nextRealToken();
+        // now scan to the semi color, ensuring we have only word or comment tokens.
+        while (token.type != COLON) {
+            // only these tokens are allowed here.
+            if (token.type != ATOM && token.type != QUOTED_LITERAL) {
+                invalidToken(token);
+            }
+            phraseCount++;
+            token = tokens.nextRealToken();
+        }
+
+
+        // RFC822 groups require a leading phrase in group specifiers.
+        if (phraseCount == 0) {
+            illegalAddress("Missing group identifier phrase", token);
+        }
+
+        // now we do the remainder of the parsing using the initial phrase list as the sink...the entire
+        // address will be converted to a string later.
+
+        // ok, we only know this has been valid up to the ":", now we have some real checks to perform.
+        while (true) {
+            // go scan off a mailbox.  if everything goes according to plan, we should be positioned at either
+            // a comma or a semicolon.
+            validateGroupMailbox(tokens);
+
+            token = tokens.nextRealToken();
+
+            // we're at the end of the group.  Make sure this is truely the end.
+            if (token.type == SEMICOLON) {
+                token = tokens.nextRealToken();
+                if (token.type != END_OF_TOKENS) {
+                    illegalAddress("Illegal group address", token);
+                }
+                return;
+            }
+
+            // if not a semicolon, this better be a comma.
+            else if (token.type != COMMA) {
+                illegalAddress("Illegal group address", token);
+            }
+        }
+    }
+
+
+    /**
+     * Validate the syntax of single mailbox within a group address.
+     *
+     * @param tokens The stream of tokens representing the address.
+     *
+     * @exception AddressException
+     */
+    private void validateGroupMailbox(final TokenStream tokens) throws AddressException {
+        final AddressToken first = tokens.nextRealToken();
+        // is this just a null address in the list?  then push the terminator back and return.
+        if (first.type == COMMA || first.type == SEMICOLON) {
+            tokens.pushToken(first);
+            return;
+        }
+
+        // now we need to scan ahead to see if we can determine the type.
+        AddressToken token = first;
+
+
+        // we need to scan forward to figure out what sort of address this is.
+        while (first != null) {
+            switch (token.type) {
+                // until we know the context, these are all just ignored.
+                case QUOTED_LITERAL:
+                case ATOM:
+                    break;
+
+                // a LEFT_ANGLE indicates we have a full RFC822 mailbox form.  The leading phrase
+                // is the personal info.  The address is inside the brackets.
+                case LEFT_ANGLE:
+                    tokens.pushToken(first);
+                    validatePhrase(tokens, false);
+                    validateRouteAddr(tokens, true);
+                    return;
+
+                // we've hit a period as the first non-word token.  This should be part of a local-part
+                // of an address.
+                case PERIOD:
+                // we've hit an "@" as the first non-word token.  This is probably a simple address in
+                // the form "user@domain".
+                case AT_SIGN:
+                    tokens.pushToken(first);
+                    validateAddressSpec(tokens);
+                    return;
+
+                // reached the end of string...this might be a null address, or one of the very simple name
+                // forms used for non-strict RFC822 versions.  Reset, and try that form
+                case COMMA:
+                // this is the end of the group...handle it like a comma for now.
+                case SEMICOLON:
+                    tokens.pushToken(first);
+                    validateAddressSpec(tokens);
+                    return;
+
+                case END_OF_TOKENS:
+                    illegalAddress("Missing ';'", token);
+
+            }
+            token = tokens.nextRealToken();
+        }
+    }
+
+
+    /**
+     * Utility method for throwing an AddressException caused by an
+     * unexpected primitive token.
+     *
+     * @param token  The token causing the problem (must not be a value type token).
+     *
+     * @exception AddressException
+     */
+    private void invalidToken(final AddressToken token) throws AddressException {
+        illegalAddress("Unexpected '" + token.type + "'", token);
+    }
+
+
+    /**
+     * Raise an error about illegal syntax.
+     *
+     * @param message  The message used in the thrown exception.
+     * @param position The parsing position within the string.
+     *
+     * @exception AddressException
+     */
+    private void syntaxError(final String message, final int position) throws AddressException
+    {
+        throw new AddressException(message, addresses, position);
+    }
+
+
+    /**
+     * Throw an exception based on the position of an invalid token.
+     *
+     * @param message The exception message.
+     * @param token   The token causing the error.  This tokens position is used
+     *                in the exception information.
+     */
+    private void illegalAddress(final String message, final AddressToken token) throws AddressException {
+        throw new AddressException(message, addresses, token.position);
+    }
+
+
+    /**
+     * Validate that a required phrase exists.
+     *
+     * @param tokens   The set of tokens to validate. positioned at the phrase start.
+     * @param required A flag indicating whether the phrase is optional or required.
+     *
+     * @exception AddressException
+     */
+    private void validatePhrase(final TokenStream tokens, final boolean required) throws AddressException {
+        // we need to have at least one WORD token in the phrase...everything is optional
+        // after that.
+        AddressToken token = tokens.nextRealToken();
+        if (token.type != ATOM && token.type != QUOTED_LITERAL) {
+            if (required) {
+                illegalAddress("Missing group phrase", token);
+            }
+        }
+
+        // now scan forward to the end of the phrase
+        token = tokens.nextRealToken();
+        while (token.type == ATOM || token.type == QUOTED_LITERAL) {
+            token = tokens.nextRealToken();
+        }
+    }
+
+
+    /**
+     * validate a routeaddr specification
+     *
+     * @param tokens  The tokens representing the address portion (personal information
+     *                already removed).
+     * @param ingroup true indicates we're validating a route address inside a
+     *                group list.  false indicates we're validating a standalone
+     *                address.
+     *
+     * @exception AddressException
+     */
+    private void validateRouteAddr(final TokenStream tokens, final boolean ingroup) throws AddressException {
+        // get the next real token.
+        AddressToken token = tokens.nextRealToken();
+        // if this is an at sign, then we have a list of domains to parse.
+        if (token.type == AT_SIGN) {
+            // push the marker token back in for the route parser, and step past that part.
+            tokens.pushToken(token);
+            validateRoute(tokens);
+        }
+        else {
+            // we need to push this back on to validate the local part.
+            tokens.pushToken(token);
+        }
+
+        // now we expect to see an address spec.
+        validateAddressSpec(tokens);
+
+        token = tokens.nextRealToken();
+        if (ingroup) {
+            // if we're validating within a group specification, the angle brackets are still there (and
+            // required).
+            if (token.type != RIGHT_ANGLE) {
+                illegalAddress("Missing '>'", token);
+            }
+        }
+        else {
+            // the angle brackets were removed to make this an address, so we should be done.  Make sure we
+            // have a terminator here.
+            if (token.type != END_OF_TOKENS) {
+                illegalAddress("Illegal Address", token);
+            }
+        }
+    }
+
+
+
+    /**
+     * Validate a simple address in the form "user@domain".
+     *
+     * @param tokens The stream of tokens representing the address.
+     */
+    private void validateSimpleAddress(final TokenStream tokens) throws AddressException {
+
+        // the validation routines occur after addresses have been split into
+        // personal and address forms.  Therefore, our validation begins directly
+        // with the first token.
+        validateAddressSpec(tokens);
+
+        // get the next token and see if there is something here...anything but the terminator is an error
+        final AddressToken token = tokens.nextRealToken();
+        if (token.type != END_OF_TOKENS) {
+            illegalAddress("Illegal Address", token);
+        }
+    }
+
+    /**
+     * Validate the addr-spec portion of an address.  RFC822 requires
+     * this be of the form "local-part@domain".  However, javamail also
+     * allows simple address of the form "local-part".  We only require
+     * the domain if an '@' is encountered.
+     *
+     * @param tokens
+     */
+    private void validateAddressSpec(final TokenStream tokens) throws AddressException {
+        // all addresses, even the simple ones, must have at least a local part.
+        validateLocalPart(tokens);
+
+        // now see if we have a domain portion to look at.
+        final AddressToken token = tokens.nextRealToken();
+        if (token.type == AT_SIGN) {
+            validateDomain(tokens);
+        }
+        else {
+            // put this back for termination
+            tokens.pushToken(token);
+        }
+
+    }
+
+
+    /**
+     * Validate the route portion of a route-addr.  This is a list
+     * of domain values in the form 1#("@" domain) ":".
+     *
+     * @param tokens The token stream holding the address information.
+     */
+    private void validateRoute(final TokenStream tokens) throws AddressException {
+        while (true) {
+            final AddressToken token = tokens.nextRealToken();
+            // if this is the first part of the list, go parse off a domain
+            if (token.type == AT_SIGN) {
+                validateDomain(tokens);
+            }
+            // another element in the list?  Go around again
+            else if (token.type == COMMA) {
+                continue;
+            }
+            // the list is terminated by a colon...stop this part of the validation once we hit one.
+            else if (token.type == COLON) {
+                return;
+            }
+            // the list is terminated by a colon.  If this isn't one of those, we have an error.
+            else {
+                illegalAddress("Missing ':'", token);
+            }
+        }
+    }
+
+
+    /**
+     * Parse the local part of an address spec.  The local part
+     * is a series of "words" separated by ".".
+     */
+    private void validateLocalPart(final TokenStream tokens) throws AddressException {
+        while (true) {
+            // get the token.
+            AddressToken token = tokens.nextRealToken();
+
+            // this must be either an atom or a literal.
+            if (token.type != ATOM && token.type != QUOTED_LITERAL) {
+                illegalAddress("Invalid local part", token);
+            }
+
+            // get the next token (white space and comments ignored)
+            token = tokens.nextRealToken();
+            // if this is a period, we continue parsing
+            if (token.type != PERIOD) {
+                tokens.pushToken(token);
+                // return the token
+                return;
+            }
+        }
+    }
+
+
+
+    /**
+     * Parse a domain name of the form sub-domain *("." sub-domain).
+     * a sub-domain is either an atom or a domain-literal.
+     */
+    private void validateDomain(final TokenStream tokens) throws AddressException {
+        while (true) {
+            // get the token.
+            AddressToken token = tokens.nextRealToken();
+
+            // this must be either an atom or a domain literal.
+            if (token.type != ATOM && token.type != DOMAIN_LITERAL) {
+                illegalAddress("Invalid domain", token);
+            }
+
+            // get the next token (white space is ignored)
+            token = tokens.nextRealToken();
+            // if this is a period, we continue parsing
+            if (token.type != PERIOD) {
+                // return the token
+                tokens.pushToken(token);
+                return;
+            }
+        }
+    }
+
+    /**
+     * Convert a list of word tokens into a phrase string.  The
+     * rules for this are a little hard to puzzle out, but there
+     * is a logic to it.  If the list is empty, the phrase is
+     * just a null value.
+     *
+     * If we have a phrase, then the quoted strings need to
+     * handled appropriately.  In multi-token phrases, the
+     * quoted literals are concatenated with the quotes intact,
+     * regardless of content.  Thus a phrase that comes in like this:
+     *
+     * "Geronimo" Apache
+     *
+     * gets converted back to the same string.
+     *
+     * If there is just a single token in the phrase, AND the token
+     * is a quoted string AND the string does not contain embedded
+     * special characters ("\.,@<>()[]:;), then the phrase
+     * is expressed as an atom.  Thus the literal
+     *
+     *    "Geronimo"
+     *
+     * becomes
+     *
+     *    Geronimo
+     *
+     * but
+     *
+     *    "(Geronimo)"
+     *
+     * remains
+     *
+     *    "(Geronimo)"
+     *
+     * Note that we're generating a canonical form of the phrase,
+     * which removes comments and reduces linear whitespace down
+     * to a single separator token.
+     *
+     * @param phrase An array list of phrase tokens (which may be empty).
+     */
+    private String personalToString(final TokenStream tokens) {
+
+        // no tokens in the stream?  This is a null value.
+        AddressToken token = tokens.nextToken();
+
+        if (token.type == END_OF_TOKENS) {
+            return null;
+        }
+
+        final AddressToken next = tokens.nextToken();
+
+        // single element phrases get special treatment.
+        if (next.type == END_OF_TOKENS) {
+            // this can be used directly...if it contains special characters, quoting will be
+            // performed when it's converted to a string value.
+            return token.value;
+        }
+
+        // reset to the beginning
+        tokens.pushToken(token);
+
+        // have at least two tokens,
+        final StringBuffer buffer = new StringBuffer();
+
+        // get the first token.  After the first, we add these as blank delimited values.
+        token = tokens.nextToken();
+        addTokenValue(token, buffer);
+
+        token = tokens.nextToken();
+        while (token.type != END_OF_TOKENS) {
+            // add a blank separator
+            buffer.append(' ');
+            // now add the next tokens value
+            addTokenValue(token, buffer);
+            token = tokens.nextToken();
+        }
+        // and return the canonicalized value
+        return buffer.toString();
+    }
+
+
+    /**
+     * take a canonicalized set of address tokens and reformat it back into a string value,
+     * inserting whitespace where appropriate.
+     *
+     * @param tokens The set of tokens representing the address.
+     *
+     * @return The string value of the tokens.
+     */
+    private String addressToString(final TokenStream tokens) {
+        final StringBuffer buffer = new StringBuffer();
+
+        // this flag controls whether we insert a blank delimiter between tokens as
+        // we advance through the list.  Blanks are only inserted between consequtive value tokens.
+        // Initially, this is false, then we flip it to true whenever we add a value token, and
+        // back to false for any special character token.
+        boolean spaceRequired = false;
+
+        // we use nextToken rather than nextRealToken(), since we need to process the comments also.
+        AddressToken token = tokens.nextToken();
+
+        // now add each of the tokens
+        while (token.type != END_OF_TOKENS) {
+            switch (token.type) {
+                // the word tokens are the only ones where we need to worry about adding
+                // whitespace delimiters.
+                case ATOM:
+                case QUOTED_LITERAL:
+                    // was the last token also a word?  Insert a blank first.
+                    if (spaceRequired) {
+                        buffer.append(' ');
+                    }
+                    addTokenValue(token, buffer);
+                    // let the next iteration know we just added a word to the list.
+                    spaceRequired = true;
+                    break;
+
+                // these special characters are just added in.  The constants for the character types
+                // were carefully selected to be the character value in question.  This allows us to
+                // just append the value.
+                case LEFT_ANGLE:
+                case RIGHT_ANGLE:
+                case COMMA:
+                case COLON:
+                case AT_SIGN:
+                case SEMICOLON:
+                case PERIOD:
+                    buffer.append((char)token.type);
+                    // no spaces around specials
+                    spaceRequired = false;
+                    break;
+
+                // Domain literals self delimiting...we can just append them and turn off the space flag.
+                case DOMAIN_LITERAL:
+                    addTokenValue(token, buffer);
+                    spaceRequired = false;
+                    break;
+
+                // Comments are also self delimitin.
+                case COMMENT:
+                    addTokenValue(token, buffer);
+                    spaceRequired = false;
+                    break;
+            }
+            token = tokens.nextToken();
+        }
+        return buffer.toString();
+    }
+
+
+    /**
+     * Append a value token on to a string buffer used to create
+     * the canonicalized string value.
+     *
+     * @param token  The token we're adding.
+     * @param buffer The target string buffer.
+     */
+    private void addTokenValue(final AddressToken token, final StringBuffer buffer) {
+        // atom values can be added directly.
+        if (token.type == ATOM) {
+            buffer.append(token.value);
+        }
+        // a literal value?  Add this as a quoted string
+        else if (token.type == QUOTED_LITERAL) {
+            buffer.append(formatQuotedString(token.value));
+        }
+        // could be a domain literal of the form "[value]"
+        else if (token.type == DOMAIN_LITERAL) {
+            buffer.append('[');
+            buffer.append(token.value);
+            buffer.append(']');
+        }
+        // comments also have values
+        else if (token.type == COMMENT) {
+            buffer.append('(');
+            buffer.append(token.value);
+            buffer.append(')');
+        }
+    }
+
+
+
+    private static final byte[] CHARMAP = {
+        0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,  0x06, 0x02, 0x06, 0x02, 0x02, 0x06, 0x02, 0x02,
+        0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,  0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+        0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,  0x01, 0x01, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  0x00, 0x00, 0x01, 0x01, 0x01, 0x00, 0x01, 0x00,
+
+        0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02,
+    };
+
+    private static final byte FLG_SPECIAL = 1;
+    private static final byte FLG_CONTROL = 2;
+
+    /**
+     * Quick test to see if a character is an allowed atom character
+     * or not.
+     *
+     * @param ch     The test character.
+     *
+     * @return true if this character is allowed in atoms, false for any
+     *         control characters, special characters, or blanks.
+     */
+    public static boolean isAtom(final char ch) {
+        if (ch > '\u007f') {
+            return false;
+        }
+        else if (ch == ' ') {
+            return false;
+        }
+        else {
+            return (CHARMAP[ch] & (FLG_SPECIAL | FLG_CONTROL)) == 0;
+        }
+    }
+
+    /**
+     * Tests one string to determine if it contains any of the
+     * characters in a supplied test string.
+     *
+     * @param s      The string we're testing.
+     * @param chars  The set of characters we're testing against.
+     *
+     * @return true if any of the characters is found, false otherwise.
+     */
+    public static boolean containsCharacters(final String s, final String chars)
+    {
+        for (int i = 0; i < s.length(); i++) {
+            if (chars.indexOf(s.charAt(i)) >= 0) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+
+    /**
+     * Tests if a string contains any non-special characters that
+     * would require encoding the value as a quoted string rather
+     * than a simple atom value.
+     *
+     * @param s      The test string.
+     *
+     * @return True if the string contains only blanks or allowed atom
+     *         characters.
+     */
+    public static boolean containsSpecials(final String s)
+    {
+        for (int i = 0; i < s.length(); i++) {
+            final char ch = s.charAt(i);
+            // must be either a blank or an allowed atom char.
+            if (ch == ' ' || isAtom(ch)) {
+                continue;
+            }
+            else {
+                return true;
+            }
+        }
+        return false;
+    }
+
+
+    /**
+     * Tests if a string contains any non-special characters that
+     * would require encoding the value as a quoted string rather
+     * than a simple atom value.
+     *
+     * @param s      The test string.
+     *
+     * @return True if the string contains only blanks or allowed atom
+     *         characters.
+     */
+    public static boolean isAtom(final String s)
+    {
+        for (int i = 0; i < s.length(); i++) {
+            final char ch = s.charAt(i);
+            // must be an allowed atom character
+            if (!isAtom(ch)) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    /**
+     * Apply RFC822 quoting rules to a literal string value.  This
+     * will search the string to see if there are any characters that
+     * require special escaping, and apply the escapes.  If the
+     * string is just a string of blank-delimited atoms, the string
+     * value is returned without quotes.
+     *
+     * @param s      The source string.
+     *
+     * @return A version of the string as a valid RFC822 quoted literal.
+     */
+    public static String quoteString(final String s) {
+
+        // only backslash and double quote require escaping.  If the string does not
+        // contain any of these, then we can just slap on some quotes and go.
+        if (s.indexOf('\\') == -1 && s.indexOf('"') == -1) {
+            // if the string is an atom (or a series of blank-delimited atoms), we can just return it directly.
+            if (!containsSpecials(s)) {
+                return s;
+            }
+            final StringBuffer buffer = new StringBuffer(s.length() + 2);
+            buffer.append('"');
+            buffer.append(s);
+            buffer.append('"');
+            return buffer.toString();
+        }
+
+        // get a buffer sufficiently large for the string, two quote characters, and a "reasonable"
+        // number of escaped values.
+        final StringBuffer buffer = new StringBuffer(s.length() + 10);
+        buffer.append('"');
+
+        // now check all of the characters.
+        for (int i = 0; i < s.length(); i++) {
+            final char ch = s.charAt(i);
+            // character requiring escaping?
+            if (ch == '\\' || ch == '"') {
+                // add an extra backslash
+                buffer.append('\\');
+            }
+            // and add on the character
+            buffer.append(ch);
+        }
+        buffer.append('"');
+        return buffer.toString();
+    }
+
+    /**
+     * Apply RFC822 quoting rules to a literal string value.  This
+     * will search the string to see if there are any characters that
+     * require special escaping, and apply the escapes.  The returned
+     * value is enclosed in quotes.
+     *
+     * @param s      The source string.
+     *
+     * @return A version of the string as a valid RFC822 quoted literal.
+     */
+    public static String formatQuotedString(final String s) {
+        // only backslash and double quote require escaping.  If the string does not
+        // contain any of these, then we can just slap on some quotes and go.
+        if (s.indexOf('\\') == -1 && s.indexOf('"') == -1) {
+            final StringBuffer buffer = new StringBuffer(s.length() + 2);
+            buffer.append('"');
+            buffer.append(s);
+            buffer.append('"');
+            return buffer.toString();
+        }
+
+        // get a buffer sufficiently large for the string, two quote characters, and a "reasonable"
+        // number of escaped values.
+        final StringBuffer buffer = new StringBuffer(s.length() + 10);
+        buffer.append('"');
+
+        // now check all of the characters.
+        for (int i = 0; i < s.length(); i++) {
+            final char ch = s.charAt(i);
+            // character requiring escaping?
+            if (ch == '\\' || ch == '"') {
+                // add an extra backslash
+                buffer.append('\\');
+            }
+            // and add on the character
+            buffer.append(ch);
+        }
+        buffer.append('"');
+        return buffer.toString();
+    }
+
+    public class TokenStream {
+        // the set of tokens in the parsed address list, as determined by RFC822 syntax rules.
+        private final List tokens;
+
+        // the current token position
+        int currentToken = 0;
+
+
+        /**
+         * Default constructor for a TokenStream.  This creates an
+         * empty TokenStream for purposes of tokenizing an address.
+         * It is the creator's responsibility to terminate the stream
+         * with a terminator token.
+         */
+        public TokenStream() {
+            tokens = new ArrayList();
+        }
+
+
+        /**
+         * Construct a TokenStream from a list of tokens.  A terminator
+         * token is added to the end.
+         *
+         * @param tokens An existing token list.
+         */
+        public TokenStream(final List tokens) {
+            this.tokens = tokens;
+            tokens.add(new AddressToken(END_OF_TOKENS, -1));
+        }
+
+        /**
+         * Add an address token to the token list.
+         *
+         * @param t      The new token to add to the list.
+         */
+        public void addToken(final AddressToken token) {
+            tokens.add(token);
+        }
+
+        /**
+         * Get the next token at the cursor position, advancing the
+         * position accordingly.
+         *
+         * @return The token at the current token position.
+         */
+        public AddressToken nextToken() {
+            AddressToken token = (AddressToken)tokens.get(currentToken++);
+            // we skip over white space tokens when operating in this mode, so
+            // check the token and iterate until we get a non-white space.
+            while (token.type == WHITESPACE) {
+                token = (AddressToken)tokens.get(currentToken++);
+            }
+            return token;
+        }
+
+
+        /**
+         * Get the next token at the cursor position, without advancing the
+         * position.
+         *
+         * @return The token at the current token position.
+         */
+        public AddressToken currentToken() {
+            // return the current token and step the cursor
+            return (AddressToken)tokens.get(currentToken);
+        }
+
+
+        /**
+         * Get the next non-comment token from the string.  Comments are ignored, except as personal information
+         * for very simple address specifications.
+         *
+         * @return A token guaranteed not to be a whitespace token.
+         */
+        public AddressToken nextRealToken()
+        {
+            AddressToken token = nextToken();
+            if (token.type == COMMENT) {
+                token = nextToken();
+            }
+            return token;
+        }
+
+        /**
+         * Push a token back on to the queue, making the index of this
+         * token the current cursor position.
+         *
+         * @param token  The token to push.
+         */
+        public void pushToken(final AddressToken token) {
+            // just reset the cursor to the token's index position.
+            currentToken = tokenIndex(token);
+        }
+
+        /**
+         * Get the next token after a given token, without advancing the
+         * token position.
+         *
+         * @param token  The token we're retrieving a token relative to.
+         *
+         * @return The next token in the list.
+         */
+        public AddressToken nextToken(final AddressToken token) {
+            return (AddressToken)tokens.get(tokenIndex(token) + 1);
+        }
+
+
+        /**
+         * Return the token prior to a given token.
+         *
+         * @param token  The token used for the index.
+         *
+         * @return The token prior to the index token in the list.
+         */
+        public AddressToken previousToken(final AddressToken token) {
+            return (AddressToken)tokens.get(tokenIndex(token) - 1);
+        }
+
+
+        /**
+         * Retrieve a token at a given index position.
+         *
+         * @param index  The target index.
+         */
+        public AddressToken getToken(final int index)
+        {
+            return (AddressToken)tokens.get(index);
+        }
+
+
+        /**
+         * Retrieve the index of a particular token in the stream.
+         *
+         * @param token  The target token.
+         *
+         * @return The index of the token within the stream.  Returns -1 if this
+         *         token is somehow not in the stream.
+         */
+        public int tokenIndex(final AddressToken token) {
+            return tokens.indexOf(token);
+        }
+
+
+        /**
+         * Extract a new TokenStream running from the start token to the
+         * token preceeding the end token.
+         *
+         * @param start  The starting token of the section.
+         * @param end    The last token (+1) for the target section.
+         *
+         * @return A new TokenStream object for processing this section of tokens.
+         */
+        public TokenStream section(final AddressToken start, final AddressToken end) {
+            final int startIndex = tokenIndex(start);
+            final int endIndex = tokenIndex(end);
+
+            // List.subList() returns a list backed by the original list.  Since we need to add a
+            // terminator token to this list when we take the sublist, we need to manually copy the
+            // references so we don't end up munging the original list.
+            final ArrayList list = new ArrayList(endIndex - startIndex + 2);
+
+            for (int i = startIndex; i <= endIndex; i++) {
+                list.add(tokens.get(i));
+            }
+            return new TokenStream(list);
+        }
+
+
+        /**
+         * Reset the token position back to the beginning of the
+         * stream.
+         */
+        public void reset() {
+            currentToken = 0;
+        }
+
+        /**
+         * Scan forward looking for a non-blank token.
+         *
+         * @return The first non-blank token in the stream.
+         */
+        public AddressToken getNonBlank()
+        {
+            AddressToken token = currentToken();
+            while (token.type == WHITESPACE) {
+                currentToken++;
+                token = currentToken();
+            }
+            return token;
+        }
+
+
+        /**
+         * Extract a blank delimited token from a TokenStream.  A blank
+         * delimited token is the set of tokens up to the next real whitespace
+         * token (comments not included).
+         *
+         * @return A TokenStream object with the new set of tokens.
+         */
+        public TokenStream getBlankDelimitedToken()
+        {
+            // get the next non-whitespace token.
+            final AddressToken first = getNonBlank();
+            // if this is the end, we return null.
+            if (first.type == END_OF_TOKENS) {
+                return null;
+            }
+
+            AddressToken last = first;
+
+            // the methods for retrieving tokens skip over whitespace, so we're going to process this
+            // by index.
+            currentToken++;
+
+            AddressToken token = currentToken();
+            while (true) {
+                // if this is our marker, then pluck out the section and return it.
+                if (token.type == END_OF_TOKENS || token.type == WHITESPACE) {
+                    return section(first, last);
+                }
+                last = token;
+                currentToken++;
+                // we accept any and all tokens here.
+                token = currentToken();
+            }
+        }
+
+        /**
+         * Return the index of the current cursor position.
+         *
+         * @return The integer index of the current token.
+         */
+        public int currentIndex() {
+            return currentToken;
+        }
+
+        public void dumpTokens()
+        {
+            System.out.println(">>>>>>>>> Start dumping TokenStream tokens");
+            for (int i = 0; i < tokens.size(); i++) {
+                System.out.println("-------- Token: " + tokens.get(i));
+            }
+
+            System.out.println("++++++++ cursor position=" + currentToken);
+            System.out.println(">>>>>>>>> End dumping TokenStream tokens");
+        }
+    }
+
+
+    /**
+     * Simple utility class for representing address tokens.
+     */
+    public class AddressToken {
+
+        // the token type
+        int type;
+
+        // string value of the token (can be null)
+        String value;
+
+        // position of the token within the address string.
+        int position;
+
+        AddressToken(final int type, final int position)
+        {
+            this.type = type;
+            this.value = null;
+            this.position = position;
+        }
+
+        AddressToken(final String value, final int type, final int position)
+        {
+            this.type = type;
+            this.value = value;
+            this.position = position;
+        }
+
+        @Override
+        public String toString()
+        {
+            if (type == END_OF_TOKENS) {
+                return "AddressToken:  type=END_OF_TOKENS";
+            }
+            if (value == null) {
+                return "AddressToken:  type=" + (char)type;
+            }
+            else {
+                return "AddressToken:  type=" + (char)type + " value=" + value;
+            }
+        }
+    }
+}
+

Added: geronimo/specs/trunk/geronimo-javamail_1.5_spec/src/main/java/javax/mail/internet/ContentDisposition.java
URL: http://svn.apache.org/viewvc/geronimo/specs/trunk/geronimo-javamail_1.5_spec/src/main/java/javax/mail/internet/ContentDisposition.java?rev=1620683&view=auto
==============================================================================
--- geronimo/specs/trunk/geronimo-javamail_1.5_spec/src/main/java/javax/mail/internet/ContentDisposition.java (added)
+++ geronimo/specs/trunk/geronimo-javamail_1.5_spec/src/main/java/javax/mail/internet/ContentDisposition.java Tue Aug 26 18:17:06 2014
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package javax.mail.internet;
+
+// http://www.faqs.org/rfcs/rfc2183.html
+
+/**
+ * @version $Rev$ $Date$
+ */
+public class ContentDisposition {
+    private String _disposition;
+    private ParameterList _list;
+
+    public ContentDisposition() {
+        setDisposition(null);
+        setParameterList(null);
+    }
+
+    public ContentDisposition(final String disposition) throws ParseException {
+        // get a token parser for the type information
+        final HeaderTokenizer tokenizer = new HeaderTokenizer(disposition, HeaderTokenizer.MIME);
+
+        // get the first token, which must be an ATOM
+        final HeaderTokenizer.Token token = tokenizer.next();
+        if (token.getType() != HeaderTokenizer.Token.ATOM) {
+            throw new ParseException("Invalid content disposition");
+        }
+
+        _disposition = token.getValue();
+
+        // the remainder is parameters, which ParameterList will take care of parsing.
+        final String remainder = tokenizer.getRemainder();
+        if (remainder != null) {
+            _list = new ParameterList(remainder);
+        }
+    }
+
+    public ContentDisposition(final String disposition, final ParameterList list) {
+        setDisposition(disposition);
+        setParameterList(list);
+    }
+
+    public String getDisposition() {
+        return _disposition;
+    }
+
+    public String getParameter(final String name) {
+        if (_list == null) {
+            return null;
+        } else {
+            return _list.get(name);
+        }
+    }
+
+    public ParameterList getParameterList() {
+        return _list;
+    }
+
+    public void setDisposition(final String string) {
+        _disposition = string;
+    }
+
+    public void setParameter(final String name, final String value) {
+        if (_list == null) {
+            _list = new ParameterList();
+        }
+        _list.set(name, value);
+    }
+
+    public void setParameterList(final ParameterList list) {
+        if (list == null) {
+            _list = new ParameterList();
+        } else {
+            _list = list;
+        }
+    }
+
+    /**
+     * Retrieve a RFC2045 style string representation of
+     * this ContentDisposition. Returns an empty string if
+     * the conversion failed.
+     *
+     * @return  RFC2045 style string
+     * @since       JavaMail 1.2
+     */
+    @Override
+    public String toString() {
+        
+        /* Since JavaMail 1.5:
+           The general contract of Object.toString is that it never returns null.
+           The toString methods of ContentType and ContentDisposition were defined
+           to return null in certain error cases.  Given the general toString contract
+           it seems unlikely that anyone ever depended on these special cases, and
+           it would be more useful for these classes to obey the general contract.
+           These methods have been changed to return an empty string in these error
+           cases.
+        */
+        
+
+        // it is possible we might have a parameter list, but this is meaningless if
+        // there is no disposition string.  Return a failure.
+        if (_disposition == null) {
+            return "";
+        }
+
+
+        // no parameter list?  Just return the disposition string
+        if (_list == null) {
+            return _disposition;
+        }
+
+        // format this for use on a Content-Disposition header, which means we need to
+        // account for the length of the header part too.
+        return _disposition + _list.toString("Content-Disposition".length() + _disposition.length());
+    }
+}

Added: geronimo/specs/trunk/geronimo-javamail_1.5_spec/src/main/java/javax/mail/internet/ContentType.java
URL: http://svn.apache.org/viewvc/geronimo/specs/trunk/geronimo-javamail_1.5_spec/src/main/java/javax/mail/internet/ContentType.java?rev=1620683&view=auto
==============================================================================
--- geronimo/specs/trunk/geronimo-javamail_1.5_spec/src/main/java/javax/mail/internet/ContentType.java (added)
+++ geronimo/specs/trunk/geronimo-javamail_1.5_spec/src/main/java/javax/mail/internet/ContentType.java Tue Aug 26 18:17:06 2014
@@ -0,0 +1,175 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package javax.mail.internet;
+
+
+// can be in the form major/minor; charset=jobby
+
+/**
+ * @version $Rev$ $Date$
+ */
+public class ContentType {
+    private ParameterList _list;
+    private String _minor;
+    private String _major;
+
+    public ContentType() {
+        // the Sun version makes everything null here.
+    }
+
+    public ContentType(final String major, final String minor, final ParameterList list) {
+        _major = major;
+        _minor = minor;
+        _list = list;
+    }
+
+    public ContentType(final String type) throws ParseException {
+        // get a token parser for the type information
+        final HeaderTokenizer tokenizer = new HeaderTokenizer(type, HeaderTokenizer.MIME);
+
+        // get the first token, which must be an ATOM
+        HeaderTokenizer.Token token = tokenizer.next();
+        if (token.getType() != HeaderTokenizer.Token.ATOM) {
+            throw new ParseException("Invalid content type");
+        }
+
+        _major = token.getValue();
+
+        // the MIME type must be major/minor
+        token = tokenizer.next();
+        if (token.getType() != '/') {
+            throw new ParseException("Invalid content type");
+        }
+
+
+        // this must also be an atom.  Content types are not permitted to be wild cards.
+        token = tokenizer.next();
+        if (token.getType() != HeaderTokenizer.Token.ATOM) {
+            throw new ParseException("Invalid content type");
+        }
+
+        _minor = token.getValue();
+
+        // the remainder is parameters, which ParameterList will take care of parsing.
+        final String remainder = tokenizer.getRemainder();
+        if (remainder != null) {
+            _list = new ParameterList(remainder);
+        }
+    }
+
+    public String getPrimaryType() {
+        return _major;
+    }
+
+    public String getSubType() {
+        return _minor;
+    }
+
+    public String getBaseType() {
+        return _major + "/" + _minor;
+    }
+
+    public String getParameter(final String name) {
+        return (_list == null ? null : _list.get(name));
+    }
+
+    public ParameterList getParameterList() {
+        return _list;
+    }
+
+    public void setPrimaryType(final String major) {
+        _major = major;
+    }
+
+    public void setSubType(final String minor) {
+        _minor = minor;
+    }
+
+    public void setParameter(final String name, final String value) {
+        if (_list == null) {
+            _list = new ParameterList();
+        }
+        _list.set(name, value);
+    }
+
+    public void setParameterList(final ParameterList list) {
+        _list = list;
+    }
+
+    /**
+     * Retrieve a RFC2045 style string representation of
+     * this Content-Type. Returns an empty string if
+     * the conversion failed.
+     *
+     * @return  RFC2045 style string
+     */
+    @Override
+    public String toString() {
+
+        /* Since JavaMail 1.5:
+        The general contract of Object.toString is that it never returns null.
+        The toString methods of ContentType and ContentDisposition were defined
+        to return null in certain error cases.  Given the general toString contract
+        it seems unlikely that anyone ever depended on these special cases, and
+        it would be more useful for these classes to obey the general contract.
+        These methods have been changed to return an empty string in these error
+        cases.
+        */      
+        
+        if (_major == null || _minor == null) {
+            return "";
+        }
+        
+        // We need to format this as if we're doing it to set into the Content-Type
+        // header.  So the parameter list gets added on as if the header name was 
+        // also included. 
+        String baseType = getBaseType(); 
+        
+        if ( baseType == null) {
+            return "";
+        }
+             
+        if (_list != null) {
+            baseType += _list.toString(baseType.length() + "Content-Type: ".length()); 
+        }
+        
+        return baseType;
+    }
+
+    public boolean match(final ContentType other) {
+    	
+    	if(_major == null || _minor == null) {
+    		return false;
+    	}
+    	
+        return _major.equalsIgnoreCase(other._major)
+                && (_minor.equalsIgnoreCase(other._minor)
+                || _minor.equals("*")
+                || other._minor.equals("*"));
+    }
+
+    public boolean match(final String contentType) {
+        try {
+            return match(new ContentType(contentType));
+        } catch (final ParseException e) {
+            return false;
+        }
+    }
+}