You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@flex.apache.org by cd...@apache.org on 2016/04/21 13:35:47 UTC

[17/19] git commit: [flex-falcon] [refs/heads/feature/maven-migration-test] - - Renamed the "generate" goal to "generate-extern" - Replicated the changes to the node extern to match the develop branch - Fixed the replace commands in js to generate an ide

http://git-wip-us.apache.org/repos/asf/flex-falcon/blob/dd503343/compiler/src/main/java/org/apache/flex/compiler/internal/parsing/as/StreamingASTokenizer.java
----------------------------------------------------------------------
diff --cc compiler/src/main/java/org/apache/flex/compiler/internal/parsing/as/StreamingASTokenizer.java
index 61b6e26,0000000..1c90e94
mode 100644,000000..100644
--- a/compiler/src/main/java/org/apache/flex/compiler/internal/parsing/as/StreamingASTokenizer.java
+++ b/compiler/src/main/java/org/apache/flex/compiler/internal/parsing/as/StreamingASTokenizer.java
@@@ -1,1871 -1,0 +1,1884 @@@
 +/*
 + *
 + *  Licensed to the Apache Software Foundation (ASF) under one or more
 + *  contributor license agreements.  See the NOTICE file distributed with
 + *  this work for additional information regarding copyright ownership.
 + *  The ASF licenses this file to You under the Apache License, Version 2.0
 + *  (the "License"); you may not use this file except in compliance with
 + *  the License.  You may obtain a copy of the License at
 + *
 + *      http://www.apache.org/licenses/LICENSE-2.0
 + *
 + *  Unless required by applicable law or agreed to in writing, software
 + *  distributed under the License is distributed on an "AS IS" BASIS,
 + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + *  See the License for the specific language governing permissions and
 + *  limitations under the License.
 + *
 + */
 +
 +package org.apache.flex.compiler.internal.parsing.as;
 +
 +import java.io.Closeable;
 +import java.io.FileNotFoundException;
 +import java.io.IOException;
 +import java.io.Reader;
 +import java.util.ArrayList;
 +import java.util.HashMap;
 +import java.util.Iterator;
 +import java.util.List;
 +import java.util.Map;
 +import java.util.Stack;
 +import java.util.regex.Matcher;
 +import java.util.regex.Pattern;
 +
 +import org.apache.flex.compiler.clients.ASC;
 +import org.apache.flex.compiler.constants.IASKeywordConstants;
 +import org.apache.flex.compiler.filespecs.IFileSpecification;
 +import org.apache.flex.compiler.internal.parsing.ITokenStreamFilter;
 +import org.apache.flex.compiler.internal.parsing.SourceFragmentsReader;
 +import org.apache.flex.compiler.internal.parsing.TokenBase;
 +import org.apache.flex.compiler.internal.units.ASCompilationUnit;
 +import org.apache.flex.compiler.parsing.IASToken;
 +import org.apache.flex.compiler.parsing.IASTokenizer;
 +import org.apache.flex.compiler.parsing.IASToken.ASTokenKind;
 +import org.apache.flex.compiler.problems.CyclicalIncludesProblem;
 +import org.apache.flex.compiler.problems.ExpectXmlBeforeNamespaceProblem;
 +import org.apache.flex.compiler.problems.FileNotFoundProblem;
 +import org.apache.flex.compiler.problems.ICompilerProblem;
 +import org.apache.flex.compiler.problems.InternalCompilerProblem2;
 +import org.apache.flex.compiler.problems.UnexpectedTokenProblem;
 +import org.apache.flex.utils.ILengthAwareReader;
 +import org.apache.flex.utils.NonLockingStringReader;
 +import org.apache.flex.utils.ILengthAwareReader.InputType;
 +import com.google.common.collect.ImmutableList;
 +import com.google.common.collect.ImmutableMap;
 +
 +/**
 + * This Tokenizer provides tokens to be used by various clients, most notably
 + * the ASParser. Given the nature of ambiguities in the ActionScript 3 language,
 + * this tokenizer also serves to disambiguate tokens based on a combination of
 + * look behind and lookahead. For all cases of ambiguity, only one token is
 + * needed for look behind, and in our worst case, n tokens forwards where n is
 + * the number of tokens that can be produced. Some other state is kept in order
 + * to know which type of container we may exist in (function, class, interface,
 + * etc). We buffer LA token results to avoid unneeded lookahead
 + */
 +public class StreamingASTokenizer implements ASTokenTypes, IASTokenizer, Closeable
 +{
 +    private static final String FOR_EACH = "for each";
 +    private static final String XML = "xml";
 +    private static final String DEFAULT_XML_NAMESPACE = "default xml namespace";
 +    private static final String ZERO = "0";
 +
 +    /**
 +     * Map from keyword text to token type.
 +     * <p>
 +     * We use a HashMap here to avoid slowing down the performance of the
 +     * underlying lexer. We are avoid the "longest match" problem, requiring a
 +     * lot of rescanning on the lexer level to determine keywords from
 +     * identifiers. And since hash map lookup is constant, this is (in theory)
 +     * faster than doing this in the scanner since we're not bound by i/o or
 +     * state machine back-tracing.
 +     */
 +    private static final Map<String, Integer> keywordToTokenMap = new ImmutableMap.Builder<String, Integer>()
 +            .put(IASKeywordConstants.AS, TOKEN_KEYWORD_AS)
 +            .put(IASKeywordConstants.IS, TOKEN_KEYWORD_IS)
 +            .put(IASKeywordConstants.INSTANCEOF, TOKEN_KEYWORD_INSTANCEOF)
 +            .put(IASKeywordConstants.IN, TOKEN_KEYWORD_IN)
 +            .put(IASKeywordConstants.DELETE, TOKEN_KEYWORD_DELETE)
 +            .put(IASKeywordConstants.TYPEOF, TOKEN_KEYWORD_TYPEOF)
 +            .put(IASKeywordConstants.CONST, TOKEN_KEYWORD_CONST)
 +            .put(IASKeywordConstants.GET, TOKEN_RESERVED_WORD_GET)
 +            .put(IASKeywordConstants.IMPLEMENTS, TOKEN_RESERVED_WORD_IMPLEMENTS)
 +            .put(IASKeywordConstants.IMPORT, TOKEN_KEYWORD_IMPORT)
 +            .put(IASKeywordConstants.USE, TOKEN_KEYWORD_USE)
 +            .put(IASKeywordConstants.EXTENDS, TOKEN_RESERVED_WORD_EXTENDS)
 +            .put(IASKeywordConstants.NEW, TOKEN_KEYWORD_NEW)
 +            .put(IASKeywordConstants.DYNAMIC, TOKEN_MODIFIER_DYNAMIC)
 +            .put(IASKeywordConstants.FINAL, TOKEN_MODIFIER_FINAL)
 +            .put(IASKeywordConstants.NATIVE, TOKEN_MODIFIER_NATIVE)
 +            .put(IASKeywordConstants.OVERRIDE, TOKEN_MODIFIER_OVERRIDE)
 +            .put(IASKeywordConstants.STATIC, TOKEN_MODIFIER_STATIC)
 +            .put(IASKeywordConstants.VIRTUAL, TOKEN_MODIFIER_VIRTUAL)
 +            .put(IASKeywordConstants.SET, TOKEN_RESERVED_WORD_SET)
 +            // Keywords with special token types that affect subsequent blocks
 +            .put(IASKeywordConstants.CATCH, TOKEN_KEYWORD_CATCH)
 +            .put(IASKeywordConstants.CLASS, TOKEN_KEYWORD_CLASS)
 +            .put(IASKeywordConstants.FUNCTION, TOKEN_KEYWORD_FUNCTION)
 +            .put(IASKeywordConstants.INTERFACE, TOKEN_KEYWORD_INTERFACE)
 +            .put(IASKeywordConstants.PACKAGE, TOKEN_KEYWORD_PACKAGE)
 +            // #120009: allow "var" inside parameter list, even though it's not 
 +            // valid AS (don't turn the subsequent function block open into a block open
 +            .put(IASKeywordConstants.VAR, TOKEN_KEYWORD_VAR)
 +            .put(IASKeywordConstants.FALSE, TOKEN_KEYWORD_FALSE)
 +            .put(IASKeywordConstants.NULL, TOKEN_KEYWORD_NULL)
 +            .put(IASKeywordConstants.TRUE, TOKEN_KEYWORD_TRUE)
 +            .put(IASKeywordConstants.PUBLIC, HIDDEN_TOKEN_BUILTIN_NS)
 +            .put(IASKeywordConstants.PRIVATE, HIDDEN_TOKEN_BUILTIN_NS)
 +            .put(IASKeywordConstants.PROTECTED, HIDDEN_TOKEN_BUILTIN_NS)
 +            .put(IASKeywordConstants.INTERNAL, HIDDEN_TOKEN_BUILTIN_NS)
 +            .put(IASKeywordConstants.INCLUDE, TOKEN_KEYWORD_INCLUDE)
 +            // Keywords for statements that affect subsequent blocks
 +            .put(IASKeywordConstants.DO, TOKEN_KEYWORD_DO)
 +            .put(IASKeywordConstants.WHILE, TOKEN_KEYWORD_WHILE)
 +            .put(IASKeywordConstants.BREAK, TOKEN_KEYWORD_BREAK)
 +            .put(IASKeywordConstants.CONTINUE, TOKEN_KEYWORD_CONTINUE)
 +            .put(IASKeywordConstants.GOTO, TOKEN_RESERVED_WORD_GOTO)
 +            .put(IASKeywordConstants.FOR, TOKEN_KEYWORD_FOR)
 +            .put(StreamingASTokenizer.FOR_EACH, TOKEN_KEYWORD_FOR)
 +            .put(IASKeywordConstants.EACH, TOKEN_RESERVED_WORD_EACH)
 +            .put(IASKeywordConstants.WITH, TOKEN_KEYWORD_WITH)
 +            .put(IASKeywordConstants.ELSE, TOKEN_KEYWORD_ELSE)
 +            .put(IASKeywordConstants.IF, TOKEN_KEYWORD_IF)
 +            .put(IASKeywordConstants.SWITCH, TOKEN_KEYWORD_SWITCH)
 +            .put(IASKeywordConstants.CASE, TOKEN_KEYWORD_CASE)
 +            .put(IASKeywordConstants.DEFAULT, TOKEN_KEYWORD_DEFAULT)
 +            .put(IASKeywordConstants.TRY, TOKEN_KEYWORD_TRY)
 +            .put(IASKeywordConstants.FINALLY, TOKEN_KEYWORD_FINALLY)
 +            // Keywords with a generic keyword token type that have no effect 
 +            // on subsequent blocks.
 +            .put(IASKeywordConstants.NAMESPACE, TOKEN_RESERVED_WORD_NAMESPACE)
 +            .put(IASKeywordConstants.CONFIG, TOKEN_RESERVED_WORD_CONFIG)
 +            .put(IASKeywordConstants.THROW, TOKEN_KEYWORD_THROW)
 +            .put(IASKeywordConstants.SUPER, TOKEN_KEYWORD_SUPER)
 +            .put(IASKeywordConstants.THIS, TOKEN_KEYWORD_THIS)
 +            .put(IASKeywordConstants.VOID, TOKEN_KEYWORD_VOID)
 +            .put(IASKeywordConstants.RETURN, TOKEN_KEYWORD_RETURN)
 +            .build();
 +
 +    /**
 +     * Configuration for out tokenizer
 +     */
 +    private static final class TokenizerConfig
 +    {
 +        /**
 +         * Flag that lets us ignore keywords for more general string parsing
 +         */
 +        public boolean ignoreKeywords = false;
 +
 +        /**
 +         * Flag that lets us be aware of metadata
 +         */
 +        public boolean findMetadata = true;
 +
 +        /**
 +         * Flag indicating that we are tokenizing full content/files, and not
 +         * segments
 +         */
 +        public boolean completeContent = true;
 +
 +        /**
 +         * IFilter for old APIs
 +         */
 +        public ITokenStreamFilter filter;
 +
 +        /**
 +         * Flag indicating we should collect comments
 +         */
 +        public boolean collectComments = false;
 +
 +        /**
 +         * Flag indicating we follow include statements, including their tokens
 +         */
 +        public boolean followIncludes = true;
 +    }
 +
 +    private Reader reader;
 +
 +    //underlying lexer
 +    private RawASTokenizer tokenizer;
 +
 +    //last exception to prevent us from looping forever
 +    private Exception lastException = null;
 +
 +    //LA buffer
 +    private final List<ASToken> lookAheadBuffer;
 +    private int bufferSize = 0; //maintain size ourselves since it's faster
 +
 +    //last token we encountered, used for lookback
 +    private ASToken lastToken;
 +
 +    private int offsetAdjustment; //for offset adjustment
 +    private int lineAdjustment = 0;
 +    private int columnAdjustment = 0;
 +
 +    private IncludeHandler includeHandler;
 +
 +    /**
 +     * The forked tokenizer for included files. If not null, {@link #next()}
 +     * will return a token from this tokenizer.
 +     * <p>
 +     * After all the tokens are returned from the included source file,
 +     * {@link #closeIncludeTokenizer()} closes the tokenizer and set this field
 +     * to null.
 +     */
 +    private StreamingASTokenizer forkIncludeTokenizer;
 +
 +    /**
 +     * Flag to indicate if we have followed include statements
 +     */
 +    private boolean hasEncounteredIncludeStatements = false;
 +
 +    private TokenizerConfig config;
 +
 +    /**
 +     * Source file handler. This is used by resolving included file path.
 +     * {@link #StreamingASTokenizer(IFileSpecification)} and
 +     * {@link #StreamingASTokenizer(IFileSpecification, Stack)} sets the value.
 +     */
 +    private String sourcePath;
 +
 +    /**
 +     * Lexer problems.
 +     * */
 +    private final List<ICompilerProblem> problems = new ArrayList<ICompilerProblem>();
 +
 +    /**
 +     * Imaginary tokens generated for {@code asc -in} option.
 +     */
 +    private Iterator<ASToken> ascIncludeImaginaryTokens;
 +
 +    /**
 +     * You should probably not use this constructor. There is some legacy code
 +     * that uses this constructor, but that code should be updated to use one of
 +     * the static create methods below.
 +     * <p>
 +     * TODO: make this private.
 +     */
 +    public StreamingASTokenizer(final Reader reader)
 +    {
 +        this();
 +        setReader(reader);
 +    }
 +
 +    /**
 +     * A pool to reduce duplicated string literals created
 +     */
 +    private final HashMap<String, String> stringPool;
 +
 +    /**
 +     * You should probably not use this constructor. There is a lot of code that
 +     * uses this constructor, but that code should be updated to use one of the
 +     * static create methods below.
 +     * <p>
 +     * TODO: make this private.
 +     */
 +    public StreamingASTokenizer()
 +    {
 +        tokenizer = new RawASTokenizer();
 +        config = new TokenizerConfig();
 +        lookAheadBuffer = new ArrayList<ASToken>(5);
 +        includeHandler = IncludeHandler.creatDefaultIncludeHandler();
 +        stringPool = new HashMap<String, String>();
 +
 +        // Initialize string pool with keyword strings. The keyword strings 
 +        // are declared as constants which are automatically "interned".
 +        for (final String keyword : keywordToTokenMap.keySet())
 +        {
 +            stringPool.put(keyword, keyword);
 +        }
 +    }
 +
 +    /**
 +     * Creates a tokenizer suitable for the mxml indexing code. fragments the
 +     * new tokenizer will tokenize.
 +     * 
 +     * @return A new tokenizer suitable for tokenizing script fragments in an
 +     * mxml document that is being tokenized for the full text search index.
 +     */
 +    public static StreamingASTokenizer createForMXMLIndexing(String fileName)
 +    {
 +        StreamingASTokenizer result = new StreamingASTokenizer();
 +        result.setPath(fileName);
 +        result.includeHandler.enterFile(result.sourcePath);
 +        return result;
 +    }
 +
 +    /**
 +     * Fork a new tokenizer when an "include" directive is found. This method
 +     * will pass the {@code StructureTracker} of the current tokenizer down to
 +     * the forked tokenizer.
 +     * 
 +     * @param currentTokenizer Current tokenizer.
 +     * @param fileSpec File specification of the included file.
 +     * @param includeHandler Include handler.
 +     * @return A tokenizer for the included file.
 +     * @throws FileNotFoundException Error.
 +     */
 +    private static StreamingASTokenizer createForIncludeFile(
 +            final StreamingASTokenizer currentTokenizer,
 +            final IFileSpecification fileSpec,
 +            final IncludeHandler includeHandler)
 +            throws FileNotFoundException
 +    {
 +        final StreamingASTokenizer tokenizer = create(fileSpec, includeHandler);
 +        return tokenizer;
 +    }
 +
 +    /**
 +     * Create a tokenizer from a source file. This is the lexer entry-point used
 +     * by {@link ASCompilationUnit}.
 +     * 
 +     * @param fileSpec File specification provides the reader and the file path.
 +     * @param includeHandler Include handler.
 +     * @throws FileNotFoundException error
 +     */
 +    protected static StreamingASTokenizer create(
 +            final IFileSpecification fileSpec,
 +            final IncludeHandler includeHandler)
 +            throws FileNotFoundException
 +    {
 +        assert fileSpec != null : "File specification can't be null.";
 +        assert includeHandler != null : "Include handler can't be null.";
 +
 +        final StreamingASTokenizer tokenizer = new StreamingASTokenizer();
 +        tokenizer.setReader(fileSpec.createReader());
 +        tokenizer.setPath(fileSpec.getPath());
 +        tokenizer.includeHandler = includeHandler;
 +        tokenizer.includeHandler.enterFile(tokenizer.sourcePath);
 +        return tokenizer;
 +    }
 +
 +    /**
 +     * Create a tokenizer for {@code ASParser#parseFile()}.
 +     * 
 +     * @param fileSpec File specification provides the reader and the file path.
 +     * @param includeHandler Include handler.
 +     * @param followIncludes True if included files are also parsed.
 +     * @param includedFiles A list of included file paths.
 +     * @return Lexer.
 +     * @throws FileNotFoundException error
 +     */
 +    protected static StreamingASTokenizer createForASParser(
 +            final IFileSpecification fileSpec,
 +            final IncludeHandler includeHandler,
 +            final boolean followIncludes,
 +            final List<String> includedFiles)
 +            throws FileNotFoundException
 +    {
 +        final StreamingASTokenizer tokenizer = create(fileSpec, includeHandler);
 +        tokenizer.setFollowIncludes(followIncludes);
 +
 +        final ImmutableList.Builder<ASToken> imaginaryTokensBuilder =
 +                new ImmutableList.Builder<ASToken>();
 +        for (final String filename : includedFiles)
 +        {
 +            imaginaryTokensBuilder.add(new ASToken(
 +                    ASTokenTypes.TOKEN_KEYWORD_INCLUDE,
 +                    0,
 +                    0,
 +                    0,
 +                    0,
 +                    "include"));
 +            imaginaryTokensBuilder.add(new ASToken(
 +                    ASTokenTypes.TOKEN_LITERAL_STRING,
 +                    0,
 +                    0,
 +                    0,
 +                    0,
 +                    '"' + filename + '"'));
 +        }
 +        tokenizer.ascIncludeImaginaryTokens = imaginaryTokensBuilder.build().iterator();
 +        return tokenizer;
 +    }
 +
 +    /**
 +     * This creator doesn't "enter file" on creation.
 +     */
 +    protected static StreamingASTokenizer createForInlineScriptScopeBuilding(
 +            final Reader reader,
 +            final String path,
 +            final IncludeHandler includeHandler,
 +            final int offsetAdjustment,
 +            final int lineAdjustment,
 +            final int columnAdjustment)
 +    {
 +        assert reader != null : "Reader can't be null";
 +        assert path != null : "Path can't be null";
 +        assert includeHandler != null : "IncludeHandler can't be null";
 +
 +        final StreamingASTokenizer tokenizer = new StreamingASTokenizer();
 +        tokenizer.setReader(reader);
 +        tokenizer.setPath(path);
 +        tokenizer.includeHandler = includeHandler;
 +        tokenizer.setSourcePositionAdjustment(
 +                offsetAdjustment, lineAdjustment, columnAdjustment);
 +        return tokenizer;
 +    }
 +
 +    /**
 +     * Create a tokenizer to parse an Expression.
 +     */
 +    protected static StreamingASTokenizer createForInlineExpressionParsing(
 +            final Reader reader,
 +            final String path
 +            )
 +    {
 +        assert reader != null : "Reader can't be null";
 +        assert path != null : "Path can't be null";
 +
 +        final StreamingASTokenizer tokenizer = new StreamingASTokenizer();
 +        tokenizer.setReader(reader);
 +        tokenizer.setPath(path);
 +        tokenizer.includeHandler.enterFile(path);
 +
 +        // Have to do this to get the tokenizer to work right - some things, like function expressions,
 +        // won't tokenize correctly unless the last token is '=' or some other special tokens.
 +        tokenizer.lastToken = new ASToken(ASTokenTypes.TOKEN_OPERATOR_ASSIGNMENT, -1, -1, -1, -1, "=");
 +
 +        return tokenizer;
 +    }
 +
 +    /**
 +     * This method can create a {@code StreamingASTokenizer} with optional
 +     * "follow includes". If {@code IncludeHandler} is not null, it will follow
 +     * {@code include} directives.
 +     * 
 +     * @param reader Input to the tokenizer.
 +     * @param path File path of the input.
 +     * @param includeHandler If not null, the created tokenizer will follow
 +     * {@code include} directives.
 +     * @return A {@code StreamingASTokenizer}.
 +     */
 +    public static StreamingASTokenizer createForRepairingASTokenizer(
 +            final Reader reader,
 +            final String path,
 +            final IncludeHandler includeHandler)
 +    {
 +        assert path != null || includeHandler == null : "We need a source path to follow includes";
 +        final StreamingASTokenizer tokenizer = new StreamingASTokenizer();
 +        tokenizer.setReader(reader);
 +        tokenizer.setPath(path);
 +        if (includeHandler != null)
 +        {
 +            tokenizer.includeHandler = includeHandler;
 +            includeHandler.enterFile(path);
 +        }
 +        return tokenizer;
 +    }
 +
 +    /**
 +     * Sets the {@link Reader} that supplies the content to this tokenizer. It
 +     * is up to the client to close any previous readers that have been in use.
 +     * It is also up to the client to close the reader once it has been used
 +     * 
 +     * @param reader a {@link Reader}
 +     */
 +    public void setReader(final Reader reader)
 +    {
 +        setReader(reader, 0, 0, 0);
 +    }
 +
 +    /**
 +     * Sets the {@link Reader} that supplies the content to this tokenizer. It
 +     * is up to the client to close any previous readers that have been in use.
 +     * It is also up to the client to close the reader once it has been used
 +     * 
 +     * @param reader a {@link Reader}
 +     * @param offset Offset adjustment. If the specified reader is reading from
 +     * a string extracted from a source file, this should be the offset of the
 +     * first character read from the reader in the source file.
 +     * @param line Line adjustment.
 +     * @param column Column adjustment
 +     */
 +    public void setReader(final Reader reader, int offset, int line, int column)
 +    {
 +        this.reader = reader;
 +        tokenizer = new RawASTokenizer();
 +        tokenizer.yyreset(reader);
 +        tokenizer.setCollectComments(config.collectComments);
 +        setSourcePositionAdjustment(offset, line, column);
 +    }
 +
 +    /**
 +     * Sets the path to the file this tokenizer is scanning
 +     * 
 +     * @param path a file path
 +     */
 +    @Override
 +    public void setPath(String path)
 +    {
 +        assert path != null : "path of tokenizer shouldn't be null";
 +        sourcePath = path;
 +        tokenizer.setSourcePath(path);
 +    }
 +
 +    /**
 +     * Allows for the adjustment of offset, line and column information when
 +     * parsing subsequences of text. This should be called before tokenization
 +     * has started
 +     * 
 +     * @param offset The offset where the fragment starts.
 +     * @param line The line where the fragment starts. This should be a
 +     * ZERO-based line number
 +     * @param column The column where the fragment starts. This should be a
 +     * ZERO-based column number
 +     */
 +    public void setSourcePositionAdjustment(int offset, int line, int column)
 +    {
 +        offsetAdjustment = offset;
 +        lineAdjustment = line;
 +        columnAdjustment = column;
 +    }
 +
 +    /**
 +     * Sets whether we comments are collected: single line and multi-line.
 +     * Default is <code>false</code>
 +     * 
 +     * @param collect true if we should collect comments
 +     */
 +    @Override
 +    public void setCollectComments(final boolean collect)
 +    {
 +        config.collectComments = collect;
 +        
 +        if (tokenizer != null)
 +            tokenizer.setCollectComments(collect);
 +    }
 +
 +    /**
 +     * Sets whether we follow include statements, including their tokens.
 +     * Default is <code>true</code>
 +     * 
 +     * @param followIncludes true if we should follow includes
 +     */
 +    @Override
 +    public void setFollowIncludes(final boolean followIncludes)
 +    {
 +        config.followIncludes = followIncludes;
 +    }
 +
 +    /**
 +     * Closes the underlying reader
 +     */
 +    @Override
 +    public void close() throws IOException
 +    {
 +        if (tokenizer != null)
 +        {
 +            tokenizer.reset();
 +            tokenizer.yyclose(); //close the reader
 +        }
 +    }
 +
 +    /**
 +     * Sets whether we ignore keywords while scanning. Default is
 +     * <code>false</code>
 +     * 
 +     * @param ignore true if we should ignore keywords
 +     */
 +    public void setIgnoreKeywords(final boolean ignore)
 +    {
 +        config.ignoreKeywords = ignore;
 +    }
 +
 +    /**
 +     * Sets whether we are scanning a full file, or a fragment. Default is
 +     * <code>true</code>
 +     * 
 +     * @param full true if we are scanning a full file.
 +     */
 +    public void setScanningFullContent(final boolean full)
 +    {
 +        config.completeContent = full;
 +    }
 +
 +    /**
 +     * Sets whether we will find metadata constructs Default is
 +     * <code>true</code>
 +     * 
 +     * @param aware true if we will find metadata
 +     */
 +    public void setIsMetadataAware(final boolean aware)
 +    {
 +        config.findMetadata = aware;
 +    }
 +
 +    /**
 +     * Sets the {@link ITokenStreamFilter} used to filter out unwanted tokens
 +     * 
 +     * @param filter the token filter to alter the stream returned from the
 +     * tokenizer
 +     */
 +    public void setTokenFilter(ITokenStreamFilter filter)
 +    {
 +        config.filter = filter;
 +    }
 +
 +    /**
 +     * Sets the include handler used by this tokenizer to get
 +     * {@link IFileSpecification} for included files.
 +     * 
 +     * @param handler {@link IncludeHandler} this tokenizer should use.
 +     */
 +    public void setIncludeHandler(IncludeHandler handler)
 +    {
 +        includeHandler = handler;
 +    }
 +
 +    /**
 +     * Indicated that we have tokenization problems. Can be called once scanning
 +     * has begun
 +     * 
 +     * @return true if problems have been encountered
 +     */
 +    public boolean hasTokenizationProblems()
 +    {
 +        return tokenizer.hasProblems() || problems.size() > 0;
 +    }
 +
 +    /**
 +     * Indicated whether this tokenizer has encountered include statements,
 +     * regardless of whether it is set to follow them or not
 +     * 
 +     * @return true if we have encountered includes
 +     */
 +    public boolean hasEncounteredIncludeStatements()
 +    {
 +        return hasEncounteredIncludeStatements;
 +    }
 +
 +    /**
 +     * Returns a collection of problems that have been encountered while
 +     * scanning.
 +     * 
 +     * @return a list of problems, never null
 +     */
 +    public List<ICompilerProblem> getTokenizationProblems()
 +    {
 +        ArrayList<ICompilerProblem> problems = new ArrayList<ICompilerProblem>(this.problems);
 +        problems.addAll(tokenizer.getProblems());
 +        return problems;
 +    }
 +
 +    public ASToken[] getTokens(final Reader reader, ITokenStreamFilter filter)
 +    {
 +        setReader(reader);
 +        List<ASToken> tokenList = initializeTokenList(reader);
 +        ASToken token = null;
 +        do
 +        {
 +            token = next();
 +            if (token != null && filter.accept(token))
 +                tokenList.add(token.clone()); //make a copy because of object pool
 +        }
 +        while (token != null);
 +        return tokenList.toArray(new ASToken[0]);
 +    }
 +
 +    @Override
 +    public ASToken[] getTokens(final Reader reader)
 +    {
 +        if (config.filter != null)
 +            return getTokens(reader, config.filter);
 +        setReader(reader);
 +        List<ASToken> tokenList = initializeTokenList(reader);
 +        ASToken token = null;
 +        do
 +        {
 +            token = next();
 +            if (token != null)
 +                tokenList.add(token.clone()); //copy ctor because of object pool
 +        }
 +        while (token != null);
 +        return tokenList.toArray(new ASToken[0]);
 +    }
 +
 +    /**
 +     * @param reader
 +     * @return
 +     */
 +    private List<ASToken> initializeTokenList(final Reader reader)
 +    {
 +        List<ASToken> tokenList;
 +        int listSize = 8012;
 +        if (reader instanceof NonLockingStringReader)
 +        {
 +            //we know the length of this string.  For string of length x, their are roughly x/5 tokens that
 +            //can be constructed from that string.  size the array appropriately.
 +            listSize = 5;
 +            if (((NonLockingStringReader)reader).getLength() > 0)
 +            {
 +                listSize = Math.max((int)((NonLockingStringReader)reader).getLength() / 5, 5);
 +            }
 +
 +        }
 +        else if (reader instanceof ILengthAwareReader && ((ILengthAwareReader)reader).getInputType() == InputType.FILE)
 +        {
 +            listSize = 9;
 +            if (((ILengthAwareReader)reader).getLength() > 0)
 +            {
 +                listSize = Math.max((int)((ILengthAwareReader)reader).getLength() / 9, 9);
 +
 +            }
 +        }
 +        tokenList = new ArrayList<ASToken>(listSize);
 +        return tokenList;
 +    }
 +
 +    @Override
 +    public IASToken[] getTokens(final String range)
 +    {
 +        return getTokens(new NonLockingStringReader(range));
 +    }
 +
 +    /**
 +     * Returns the next token that can be produced from the underlying reader
 +     * 
 +     * @param filter an {@link ITokenStreamFilter} to restrict the tokens that
 +     * are returned
 +     * @return an ASToken, or null if no more tokens can be produced
 +     */
 +    public final ASToken next(final ITokenStreamFilter filter)
 +    {
 +        ASToken retVal = null;
 +        while (true)
 +        {
 +            retVal = next();
 +            if (retVal == null || filter.accept(retVal))
 +            {
 +                break;
 +            }
 +        }
 +        return retVal;
 +    }
 +
 +    /**
 +     * Returns the next token that can be produced from the underlying reader.
 +     * <p>
 +     * If the forked "include file tokenizer" is open (not null), return the
 +     * next token from it. If the forked tokenizer reaches the end of the
 +     * included file, close (set to null) the forked tokenizer and return token
 +     * from the main source file.
 +     * 
 +     * @return an ASToken, or null if no more tokens can be produced
 +     */
 +    public final ASToken next()
 +    {
 +        ASToken retVal = null;
 +        // If the lexer for the included file is open, read from the included tokenizer.
 +        boolean consumeSemi = false;
 +        try
 +        {
 +            // Return token from the main file.
 +            if (forkIncludeTokenizer != null)
 +            {
 +                retVal = forkIncludeTokenizer.next();
 +
 +                // Check if the forked tokenizer reached EOF. 
 +                if (retVal == null)
 +                {
 +                    closeIncludeTokenizer();
 +                    // We should consume the next semicolon we find.
 +                    // Most include statements are terminated with a semicolon,
 +                    // and because we read the contents of the included file,
 +                    // this could cause problems with a semicolon in a place
 +                    // we don't want it.
 +                    consumeSemi = true; 
 +                }
 +                else
 +                    return retVal;
 +            }
 +            if (bufferSize > 0)
 +            {
 +                retVal = lookAheadBuffer.remove(0);
 +                bufferSize--;
 +            }
 +            else
 +            {
 +                retVal = nextTokenFromReader();
 +
 +            }
 +            if (retVal == null)
 +                return null;
 +            final int tokenType = retVal.getType();
 +
 +            switch (tokenType)
 +            {
 +                // if we're seeing each in this part of the loop, it's not a
 +                // syntactic keyword
 +                // since we do lookahead when we see "for", checking for "each"
 +                case TOKEN_RESERVED_WORD_EACH:
 +                    treatKeywordAsIdentifier(retVal);
 +                    processUserDefinedNamespace(retVal, 0);
 +                    return retVal;
 +                case TOKEN_KEYWORD_INCLUDE:
 +                {
 +                    // "followIncludes=false" is usually used for code model
 +                    // partitioner. They want the "include" token.
 +                    if (!config.followIncludes)
 +                        return retVal;
 +
 +                    final ASToken token = LT(1);
 +
 +                    // "include" at EOF is always a keyword
 +                    if (token == null)
 +                        return retVal;
 +
 +                    if (!matches(token, TOKEN_LITERAL_STRING))
 +                    {
 +                        treatKeywordAsIdentifier(retVal); // it's an identifier
 +                        processUserDefinedNamespace(retVal, 0);
 +                    }
 +                    else
 +                    {
 +                        hasEncounteredIncludeStatements = true;
 +                        // Consume the file path after the include token.
 +                        consume(1);
 +                        final String filenameTokenText = token.getText();
 +                        final String includeString = filenameTokenText.substring(1, filenameTokenText.length() - 1);
 +
 +                        if (sourcePath == null)
 +                            throw new NullPointerException("Source file is needed for resolving included file path.");
 +                        IFileSpecification includedFileSpec = null;
 +                        //respond to problems from our file handler
 +                        includedFileSpec = includeHandler.getFileSpecificationForInclude(sourcePath, includeString);
 +                        //
 +                        if (includedFileSpec == null)
 +                        {
 +                            ICompilerProblem problem = new FileNotFoundProblem(token, filenameTokenText); //the text will be the path not found
 +                            problems.add(problem);
 +                            retVal = next();
 +                            return retVal;
 +                        }
 +                        if (includeHandler.isCyclicInclude(includedFileSpec.getPath()))
 +                        {
 +                            ICompilerProblem problem = new CyclicalIncludesProblem(token);
 +                            problems.add(problem);
 +                            retVal = next();
 +                            return retVal;
 +                        }
 +                        else
 +                        {
 +                            // Fork a tokenizer for the included file
 +                            try
 +                            {
 +                                forkIncludeTokenizer = createForIncludeFile(this, includedFileSpec, includeHandler);
 +                                retVal = forkIncludeTokenizer.next();
 +                            }
 +                            catch (FileNotFoundException fnfe)
 +                            {
 +                                includeHandler.handleFileNotFound(includedFileSpec);
 +                                ICompilerProblem problem = new FileNotFoundProblem(token, includedFileSpec.getPath());
 +                                problems.add(problem);
 +                                retVal = next();
 +                                return retVal;
 +                            }
 +                        }
 +                    }
 +
 +                    // Recover from compiler problems and continue.
 +                    if (retVal == null)
 +                    {
 +                        // Included file is empty. 
 +                        closeIncludeTokenizer();
 +                        // Fall back to main source.
 +                        retVal = this.next();
 +                    }
 +                    return retVal;
 +                }
 +                case TOKEN_RESERVED_WORD_CONFIG:
 +                    if (matches(LT(1), TOKEN_RESERVED_WORD_NAMESPACE))
 +                    { //we config namespace
 +                        retVal.setType(TOKEN_RESERVED_WORD_CONFIG);
 +                        return retVal;
 +                    }
 +                    treatKeywordAsIdentifier(retVal); //identifier
 +                    processUserDefinedNamespace(retVal, 0);
 +                    return retVal;
 +                case HIDDEN_TOKEN_BUILTIN_NS:
 +                    if (matches(LT(1), TOKEN_OPERATOR_NS_QUALIFIER))
 +                    { //we have public:: and this structure is not an annotation but a name ref
 +                        retVal.setType(TOKEN_NAMESPACE_NAME);
 +                        return retVal;
 +                    }
 +                    retVal.setType(TOKEN_NAMESPACE_ANNOTATION);
 +                    return retVal;
 +                case TOKEN_MODIFIER_DYNAMIC:
 +                case TOKEN_MODIFIER_FINAL:
 +                case TOKEN_MODIFIER_NATIVE:
 +                case TOKEN_MODIFIER_OVERRIDE:
 +                case TOKEN_MODIFIER_STATIC:
 +                case TOKEN_MODIFIER_VIRTUAL:
 +                {
 +                    // previous token is either a modifier or a namespace, or if
 +                    // null, assume keyword
 +                    // next token is from a definition or a modifier or a namespace
 +                    final ASToken nextToken = LT(1);
 +                    if (nextToken != null)
 +                    {
 +                        switch (nextToken.getType())
 +                        {
 +                            case TOKEN_KEYWORD_CLASS:
 +                            case TOKEN_KEYWORD_FUNCTION:
 +                            case TOKEN_KEYWORD_INTERFACE:
 +                            case TOKEN_RESERVED_WORD_NAMESPACE:
 +                            case TOKEN_KEYWORD_VAR:
 +                            case TOKEN_KEYWORD_CONST:
 +                            case TOKEN_MODIFIER_DYNAMIC:
 +                            case TOKEN_MODIFIER_FINAL:
 +                            case TOKEN_MODIFIER_NATIVE:
 +                            case TOKEN_MODIFIER_OVERRIDE:
 +                            case TOKEN_MODIFIER_STATIC:
 +                            case TOKEN_MODIFIER_VIRTUAL:
 +                            case TOKEN_NAMESPACE_ANNOTATION:
 +                            case TOKEN_NAMESPACE_NAME:
 +                            case HIDDEN_TOKEN_BUILTIN_NS:
 +                                return retVal;
 +                            case TOKEN_IDENTIFIER:
 +                                if (isUserDefinedNamespace(nextToken, 1)) // we're already looking ahead one so make sure we look ahead one further
 +                                    return retVal;
 +                            default:
 +                                // Not applicable to other token types.
 +                                break;
 +                        }
 +                    }
 +                    treatKeywordAsIdentifier(retVal);
 +                    processUserDefinedNamespace(retVal, 0);
 +                    return retVal;
 +                }
 +                    //we combine +/- for numeric literals here
 +                case TOKEN_OPERATOR_MINUS:
 +                case TOKEN_OPERATOR_PLUS:
 +                {
 +                    if (lastToken == null || !lastToken.canPreceedSignedOperator())
 +                    {
 +                        final ASToken nextToken = LT(1);
 +                        if (nextToken != null)
 +                        {
 +                            switch (nextToken.getType())
 +                            {
 +                                case TOKEN_LITERAL_NUMBER:
 +                                case TOKEN_LITERAL_HEX_NUMBER:
 +                                    retVal.setEnd(nextToken.getEnd());
 +                                    final StringBuilder builder = new StringBuilder(retVal.getText());
 +                                    builder.append(nextToken.getText());
 +                                    retVal.setText(poolString(builder.toString()));
 +                                    consume(1);
 +                                    retVal.setType(nextToken.getType());
 +                                    break;
 +                                default:
 +                                    // ignore other tokens
 +                                    break;
 +                            }
 +                        }
 +                    }
 +
 +                    return retVal;
 +                }
 +                    //RECOGNIZE: for each
 +                case TOKEN_KEYWORD_FOR:
 +                {
 +                    final ASToken token = LT(1);
 +                    if (matches(token, TOKEN_RESERVED_WORD_EACH))
 +                    {
 +                        retVal.setEnd(token.getEnd());
 +                        retVal.setText(FOR_EACH);
 +                        consume(1);
 +                        return retVal;
 +                    }
 +                    if (lastToken != null)
 +                    {
 +                        int lastTokenType = lastToken.getType();
 +                        switch (lastTokenType)
 +                        {
 +                            case TOKEN_KEYWORD_VAR:
 +                            case TOKEN_KEYWORD_FUNCTION:
 +                            case TOKEN_RESERVED_WORD_GET:
 +                            case TOKEN_RESERVED_WORD_SET:
 +                            case TOKEN_OPERATOR_MEMBER_ACCESS:
 +                                retVal.setType(TOKEN_IDENTIFIER);
 +                        }
 +                    }
 +                    return retVal;
 +                }
 +                    //RECOGNIZE: default xml namespace
 +                    //default xml namespace must exist on the same line
 +                case TOKEN_KEYWORD_DEFAULT:
 +                {
 +                    final ASToken maybeNS = LT(2);
 +                    final boolean foundTokenNamespace = maybeNS != null &&
 +                                                        maybeNS.getType() == TOKEN_RESERVED_WORD_NAMESPACE;
 +                    final ASToken maybeXML = LT(1);
 +                    if (foundTokenNamespace)
 +                    {
 +                        final boolean foundTokenXML = maybeXML != null &&
 +                                                      maybeXML.getType() == TOKEN_IDENTIFIER &&
 +                                                      XML.equals(maybeXML.getText());
 +                        if (!foundTokenXML)
 +                        {
 +                            final ICompilerProblem problem =
 +                                    new ExpectXmlBeforeNamespaceProblem(maybeNS);
 +                            problems.add(problem);
 +                        }
 +
 +                        //combine all of these tokens together
 +                        retVal.setEnd(maybeNS.getEnd());
 +                        retVal.setText(DEFAULT_XML_NAMESPACE);
 +                        retVal.setType(TOKEN_DIRECTIVE_DEFAULT_XML);
 +                        consume(2);
 +                    }
 +                    // if this isn't "default xml namespace" then
 +                    // see if it is the default case in a switch
 +                    // otherwise, assume it is an identiferName
 +                    else if (maybeXML != null && 
 +                            maybeXML.getType() != TOKEN_COLON)
 +                        retVal.setType(TOKEN_IDENTIFIER);
++                    else if (lastToken != null)
++                    {
++                        int lastTokenType = lastToken.getType();
++                        switch (lastTokenType)
++                        {
++                            case TOKEN_KEYWORD_VAR:
++                            case TOKEN_KEYWORD_FUNCTION:
++                            case TOKEN_RESERVED_WORD_GET:
++                            case TOKEN_RESERVED_WORD_SET:
++                            case TOKEN_OPERATOR_MEMBER_ACCESS:
++                                retVal.setType(TOKEN_IDENTIFIER);
++                        }
++                    }
 +                    return retVal;
 +                }
 +                case TOKEN_KEYWORD_VOID:
 +                {
 +                    //check for void 0
 +                    final ASToken token = LT(1);
 +                    if (matches(token, TOKEN_LITERAL_NUMBER) && ZERO.equals(token.getText()))
 +                    {
 +                        retVal.setType(TOKEN_VOID_0);
 +                        combineText(retVal, token);
 +                        consume(1);
 +                    }
 +                    //check for void(0)
 +                    else if (matches(token, TOKEN_PAREN_OPEN))
 +                    {
 +                        final ASToken zeroT = LT(2);
 +                        if (matches(zeroT, TOKEN_LITERAL_NUMBER) && ZERO.equals(zeroT.getText()))
 +                        {
 +                            final ASToken closeParenT = LT(3);
 +                            if (matches(closeParenT, TOKEN_PAREN_CLOSE))
 +                            {
 +                                combineText(retVal, token);
 +                                combineText(retVal, zeroT);
 +                                combineText(retVal, closeParenT);
 +                                retVal.setType(TOKEN_VOID_0);
 +                                consume(3);
 +                            }
 +                        }
 +                    }
 +                    return retVal;
 +                }
 +                case TOKEN_IDENTIFIER:
 +                {
 +                    //check for user-defined namespace before we return anything
 +                    processUserDefinedNamespace(retVal, 0);
 +                    return retVal;
 +                }
 +                    //this is for metadata processing
 +                case TOKEN_SQUARE_OPEN:
 +                {
 +                    retVal = tryParseMetadata(retVal);
 +                    return retVal;
 +                }
 +                case HIDDEN_TOKEN_STAR_ASSIGNMENT:
 +                {
 +                    //this is to solve an ambiguous case, where we can't tell the difference between 
 +                    //var foo:*=null and foo *= null;
 +                    retVal.setType(TOKEN_OPERATOR_STAR);
 +                    retVal.setEnd(retVal.getEnd() - 1);
 +                    retVal.setText("*");
 +                    //add the equals
 +                    final ASToken nextToken = tokenizer.buildToken(TOKEN_OPERATOR_ASSIGNMENT,
 +                                retVal.getEnd() + 1, retVal.getEnd() + 2,
 +                                retVal.getLine(), retVal.getColumn(), "=");
 +                    nextToken.setSourcePath(sourcePath);
 +                    addTokenToBuffer(nextToken);
 +                    return retVal;
 +                }
 +                case TOKEN_SEMICOLON:
 +                    if (consumeSemi)
 +                    {
 +                        return next();
 +                    }
 +                    return retVal;
 +                case TOKEN_VOID_0:
 +                case TOKEN_LITERAL_REGEXP:
 +                case TOKEN_COMMA:
 +                case TOKEN_COLON:
 +                case TOKEN_PAREN_OPEN:
 +                case TOKEN_PAREN_CLOSE:
 +                case TOKEN_SQUARE_CLOSE:
 +                case TOKEN_ELLIPSIS:
 +                case TOKEN_OPERATOR_PLUS_ASSIGNMENT:
 +                case TOKEN_OPERATOR_MINUS_ASSIGNMENT:
 +                case TOKEN_OPERATOR_MULTIPLICATION_ASSIGNMENT:
 +                case TOKEN_OPERATOR_DIVISION_ASSIGNMENT:
 +                case TOKEN_OPERATOR_MODULO_ASSIGNMENT:
 +                case TOKEN_OPERATOR_BITWISE_AND_ASSIGNMENT:
 +                case TOKEN_OPERATOR_BITWISE_OR_ASSIGNMENT:
 +                case TOKEN_OPERATOR_BITWISE_XOR_ASSIGNMENT:
 +                case TOKEN_OPERATOR_BITWISE_LEFT_SHIFT_ASSIGNMENT:
 +                case TOKEN_OPERATOR_BITWISE_RIGHT_SHIFT_ASSIGNMENT:
 +                case TOKEN_OPERATOR_BITWISE_UNSIGNED_RIGHT_SHIFT_ASSIGNMENT:
 +                case TOKEN_OPERATOR_STAR:
 +                case TOKEN_OPERATOR_NS_QUALIFIER:
 +                case TOKEN_ASDOC_COMMENT:
 +                case TOKEN_OPERATOR_DIVISION:
 +                case TOKEN_OPERATOR_MODULO:
 +                case TOKEN_OPERATOR_BITWISE_LEFT_SHIFT:
 +                case TOKEN_OPERATOR_BITWISE_RIGHT_SHIFT:
 +                case TOKEN_OPERATOR_BITWISE_UNSIGNED_RIGHT_SHIFT:
 +                case TOKEN_OPERATOR_LESS_THAN:
 +                case TOKEN_OPERATOR_GREATER_THAN:
 +                case TOKEN_OPERATOR_LESS_THAN_EQUALS:
 +                case TOKEN_OPERATOR_GREATER_THAN_EQUALS:
 +                case TOKEN_OPERATOR_EQUAL:
 +                case TOKEN_OPERATOR_NOT_EQUAL:
 +                case TOKEN_OPERATOR_STRICT_EQUAL:
 +                case TOKEN_OPERATOR_STRICT_NOT_EQUAL:
 +                case TOKEN_OPERATOR_BITWISE_AND:
 +                case TOKEN_OPERATOR_BITWISE_XOR:
 +                case TOKEN_OPERATOR_BITWISE_OR:
 +                case TOKEN_OPERATOR_LOGICAL_AND:
 +                case TOKEN_OPERATOR_LOGICAL_OR:
 +                case TOKEN_OPERATOR_LOGICAL_AND_ASSIGNMENT:
 +                case TOKEN_OPERATOR_LOGICAL_OR_ASSIGNMENT:
 +                case TOKEN_TYPED_COLLECTION_OPEN:
 +                case TOKEN_TYPED_COLLECTION_CLOSE:
 +                case TOKEN_OPERATOR_MEMBER_ACCESS:
 +                case TOKEN_RESERVED_WORD_NAMESPACE:
 +                case TOKEN_RESERVED_WORD_GET:
 +                case TOKEN_RESERVED_WORD_SET:
 +                case TOKEN_OPERATOR_ASSIGNMENT:
 +                case TOKEN_TYPED_LITERAL_CLOSE:
 +                case TOKEN_TYPED_LITERAL_OPEN:
 +                case TOKEN_OPERATOR_TERNARY:
 +                case TOKEN_OPERATOR_DECREMENT:
 +                case TOKEN_OPERATOR_INCREMENT:
 +                case TOKEN_OPERATOR_ATSIGN:
 +                case TOKEN_OPERATOR_BITWISE_NOT:
 +                case TOKEN_OPERATOR_LOGICAL_NOT:
 +                case TOKEN_E4X_BINDING_CLOSE:
 +                case TOKEN_E4X_BINDING_OPEN:
 +                case TOKEN_OPERATOR_DESCENDANT_ACCESS:
 +                case TOKEN_NAMESPACE_ANNOTATION:
 +                case TOKEN_NAMESPACE_NAME:
 +                case TOKEN_BLOCK_OPEN:
 +                case TOKEN_BLOCK_CLOSE:
 +                case TOKEN_KEYWORD_FUNCTION:
 +                    return retVal;
 +                case HIDDEN_TOKEN_MULTI_LINE_COMMENT:
 +                case HIDDEN_TOKEN_SINGLE_LINE_COMMENT:
 +                    if (tokenizer.isCollectingComments())
 +                    {
 +                        return retVal;
 +                    }
 +                    assert (false);
 +                    return null;
 +                case TOKEN_KEYWORD_INSTANCEOF:
 +                case TOKEN_KEYWORD_AS:
 +                case TOKEN_KEYWORD_IN:
 +                case TOKEN_KEYWORD_IS:
 +                    if (lastToken != null)
 +                    {
 +                        int lastTokenType = lastToken.getType();
 +                        switch (lastTokenType)
 +                        {
 +                            case TOKEN_SEMICOLON:
 +                            case TOKEN_BLOCK_OPEN:
 +                            case TOKEN_COMMA:
 +                                retVal.setType(TOKEN_IDENTIFIER);
 +                                return retVal;
 +                        }
 +                    }
 +                    else 
 +                    {
 +                        // we are first token so assume identifier
 +                        retVal.setType(TOKEN_IDENTIFIER);
 +                        return retVal;
 +                    }
 +                    // and fall through
 +                case TOKEN_KEYWORD_DELETE:
 +                    ASToken nextToken = LT(1);
 +                    if (nextToken != null)
 +                    {
 +                        int nextTokenType = nextToken.getType();
 +                        switch (nextTokenType)
 +                        {
 +                            // if followed by a token assume it is the
 +                            // keyword and not the identiferName;
 +                            case TOKEN_IDENTIFIER:
 +                                return retVal;
 +                            // followed by a comma or semicolon
 +                            // probably being used in an expression
 +                            case TOKEN_COMMA:
 +                            case TOKEN_SEMICOLON:
 +                                retVal.setType(TOKEN_IDENTIFIER);
 +                                return retVal;
 +                        }
 +                    }
 +                    // and fall through
 +                case TOKEN_KEYWORD_BREAK:
 +                case TOKEN_KEYWORD_CASE:
 +                case TOKEN_KEYWORD_CATCH:
 +                case TOKEN_KEYWORD_CLASS:
 +                case TOKEN_KEYWORD_CONST:
 +                case TOKEN_KEYWORD_CONTINUE:
 +                case TOKEN_KEYWORD_DO:
 +                case TOKEN_KEYWORD_ELSE:
 +                case TOKEN_KEYWORD_FALSE:
 +                case TOKEN_KEYWORD_FINALLY:
 +                case TOKEN_KEYWORD_IF:
 +                case TOKEN_KEYWORD_IMPORT:
 +                case TOKEN_KEYWORD_INTERFACE:
 +                case TOKEN_KEYWORD_NULL:
 +                case TOKEN_KEYWORD_PACKAGE:
 +                case TOKEN_KEYWORD_SUPER:
 +                case TOKEN_KEYWORD_SWITCH:
 +                case TOKEN_KEYWORD_THIS:
 +                case TOKEN_KEYWORD_TRUE:
 +                case TOKEN_KEYWORD_TRY:
 +                case TOKEN_KEYWORD_TYPEOF:
 +                case TOKEN_KEYWORD_USE:
 +                case TOKEN_KEYWORD_VAR:
 +                case TOKEN_KEYWORD_WHILE:
 +                case TOKEN_KEYWORD_WITH:
 +                case TOKEN_KEYWORD_RETURN:
 +                case TOKEN_KEYWORD_THROW:
 +                case TOKEN_KEYWORD_NEW:
 +                    if (lastToken != null)
 +                    {
 +                        int lastTokenType = lastToken.getType();
 +                        switch (lastTokenType)
 +                        {
 +                            case TOKEN_KEYWORD_VAR:
 +                            case TOKEN_KEYWORD_FUNCTION:
 +                            case TOKEN_RESERVED_WORD_GET:
 +                            case TOKEN_RESERVED_WORD_SET:
 +                            case TOKEN_OPERATOR_MEMBER_ACCESS:
 +                                retVal.setType(TOKEN_IDENTIFIER);
 +                        }
 +                    }
 +                    return retVal;
 +                default:
 +                    if (ASToken.isE4X(tokenType))
 +                        return retVal;
 +
 +                    if (retVal.isKeywordOrContextualReservedWord() || retVal.isLiteral())
 +                        return retVal;
 +
 +                    // If we reach here, the token fails to match any processing logic.
 +                    final UnexpectedTokenProblem problem = new UnexpectedTokenProblem(
 +                            retVal,
 +                            ASTokenKind.UNKNOWN);
 +                    problems.add(problem);
 +            }
 +        }
 +        catch (final Exception e)
 +        {
 +            if (lastException != null)
 +            {
 +                if (lastException.getClass().isInstance(e))
 +                {
 +                    ICompilerProblem problem = new InternalCompilerProblem2(sourcePath, e, "StreamingASTokenizer");
 +                    problems.add(problem);
 +                    return null;
 +                }
 +            }
 +            else
 +            {
 +                lastException = e;
 +                retVal = null;
 +                return next();
 +            }
 +        }
 +        finally
 +        {
 +            consumeSemi = false;
 +            lastToken = retVal;
 +        }
 +        return null;
 +    }
 +
 +    /**
 +     * Error recovery: convert the given keyword token into an identifier token,
 +     * and log a syntax error.
 +     * 
 +     * @param token Keyword token.
 +     */
 +    private void treatKeywordAsIdentifier(final ASToken token)
 +    {
 +        assert token != null : "token can't be null";
 +        assert token.isKeywordOrContextualReservedWord() : "only transfer reserved words";
 +
 +        if (token.isKeyword())
 +        {
 +            final UnexpectedTokenProblem problem = new UnexpectedTokenProblem(token, ASTokenKind.IDENTIFIER);
 +            problems.add(problem);
 +        }
 +        token.setType(TOKEN_IDENTIFIER);
 +    }
 +
 +    /**
 +     * Decide within the current context whether the following content can be
 +     * parsed as a metadata tag token.
 +     * 
 +     * @param nextToken The next token coming from
 +     * {@link #nextTokenFromReader()}.
 +     * @return If the following content can be a metadata tag, the result is a
 +     * token of type {@link ASTokenTypes#TOKEN_ATTRIBUTE}. Otherwise, the
 +     * argument {@code nextToken} is returned.
 +     * @throws Exception Parsing error.
 +     */
 +    private ASToken tryParseMetadata(ASToken nextToken) throws Exception
 +    {
 +        // Do not initialize this variable so that Java flow-analysis can check if
 +        // the following rules cover all the possibilities.
 +        final boolean isNextMetadata;
 +
 +        if (!config.findMetadata)
 +        {
 +            // The lexer is configured to not recognize metadata.
 +            isNextMetadata = false;
 +        }
 +        else if (lastToken == null)
 +        {
 +            // An "[" at the beginning of a script is always a part of a metadata.
 +            isNextMetadata = true;
 +        }
 +        else
 +        {
 +            switch (lastToken.getType())
 +            {
 +                case TOKEN_ASDOC_COMMENT:
 +                case TOKEN_SEMICOLON:
 +                case TOKEN_ATTRIBUTE:
 +                case TOKEN_BLOCK_OPEN:
 +                    // "[" after these tokens are always part of a metadata token.
 +                    isNextMetadata = true;
 +                    break;
 +
 +                case TOKEN_SQUARE_CLOSE:
 +                case TOKEN_IDENTIFIER:
 +                    // "[" following a "]" is an array access.
 +                    // "[" following an identifier is an array access.
 +                    isNextMetadata = false;
 +                    break;
 +                    
 +                case TOKEN_KEYWORD_INCLUDE:
 +                case TOKEN_BLOCK_CLOSE:
 +                case TOKEN_OPERATOR_STAR:
 +                    // "[" after these tokens are part of a metadata token, if
 +                    // the "[" is on a new line.
 +                    isNextMetadata = !lastToken.matchesLine(nextToken);
 +                    break;
 +
 +                default:
 +                    // If we are lexing an entire file
 +                    // then at this point we "know" that the next token
 +                    // is not meta-data.
 +                    if (config.completeContent)
 +                    {
 +                        isNextMetadata = false;
 +                    }
 +                    else
 +                    {
 +                        // In "fragment" mode which is used by the syntax coloring code
 +                        // in builder, we assume the following list of tokens can not
 +                        // precede meta-data because they all start or occur in expressions.
 +                        switch (lastToken.getType())
 +                        {
 +                            case TOKEN_OPERATOR_EQUAL:
 +                            case TOKEN_OPERATOR_TERNARY:
 +                            case TOKEN_COLON:
 +                            case TOKEN_OPERATOR_PLUS:
 +                            case TOKEN_OPERATOR_MINUS:
 +                            case TOKEN_OPERATOR_STAR:
 +                            case TOKEN_OPERATOR_DIVISION:
 +                            case TOKEN_OPERATOR_MODULO:
 +                            case TOKEN_OPERATOR_BITWISE_AND:
 +                            case TOKEN_OPERATOR_BITWISE_OR:
 +                            case TOKEN_KEYWORD_AS:
 +                            case TOKEN_OPERATOR_BITWISE_XOR:
 +                            case TOKEN_OPERATOR_LOGICAL_AND:
 +                            case TOKEN_OPERATOR_LOGICAL_OR:
 +                            case TOKEN_PAREN_OPEN:
 +                            case TOKEN_COMMA:
 +                            case TOKEN_OPERATOR_BITWISE_NOT:
 +                            case TOKEN_OPERATOR_LOGICAL_NOT:
 +                            case TOKEN_OPERATOR_ASSIGNMENT:
 +                            case TOKEN_OPERATOR_BITWISE_LEFT_SHIFT:
 +                            case TOKEN_OPERATOR_BITWISE_RIGHT_SHIFT:
 +                            case TOKEN_OPERATOR_BITWISE_UNSIGNED_RIGHT_SHIFT:
 +                            case TOKEN_OPERATOR_LESS_THAN:
 +                            case TOKEN_OPERATOR_GREATER_THAN:
 +                            case TOKEN_OPERATOR_LESS_THAN_EQUALS:
 +                            case TOKEN_OPERATOR_GREATER_THAN_EQUALS:
 +                            case TOKEN_OPERATOR_NOT_EQUAL:
 +                            case TOKEN_OPERATOR_STRICT_EQUAL:
 +                            case TOKEN_OPERATOR_STRICT_NOT_EQUAL:
 +                            case TOKEN_OPERATOR_PLUS_ASSIGNMENT:
 +                            case TOKEN_OPERATOR_MINUS_ASSIGNMENT:
 +                            case TOKEN_OPERATOR_MULTIPLICATION_ASSIGNMENT:
 +                            case TOKEN_OPERATOR_DIVISION_ASSIGNMENT:
 +                            case TOKEN_OPERATOR_MODULO_ASSIGNMENT:
 +                            case TOKEN_OPERATOR_BITWISE_AND_ASSIGNMENT:
 +                            case TOKEN_OPERATOR_BITWISE_OR_ASSIGNMENT:
 +                            case TOKEN_OPERATOR_BITWISE_XOR_ASSIGNMENT:
 +                            case TOKEN_OPERATOR_BITWISE_LEFT_SHIFT_ASSIGNMENT:
 +                            case TOKEN_OPERATOR_BITWISE_RIGHT_SHIFT_ASSIGNMENT:
 +                            case TOKEN_OPERATOR_BITWISE_UNSIGNED_RIGHT_SHIFT_ASSIGNMENT:
 +                                isNextMetadata = false;
 +                                break;
 +                            default:
 +                                isNextMetadata = true;
 +                                break;
 +                        }
 +                    }
 +                    break;
 +            }
 +        }
 +
 +        final ASToken result;
 +        if (isNextMetadata)
 +            result = consumeMetadata(nextToken);
 +        else
 +            result = nextToken;
 +
 +        return result;
 +    }
 +
 +    /**
 +     * Close the forked include file tokenizer, and set it to null.
 +     */
 +    private void closeIncludeTokenizer()
 +    {
 +        if (forkIncludeTokenizer == null)
 +            return;
 +
 +        try
 +        {
 +            problems.addAll(forkIncludeTokenizer.problems);
 +            forkIncludeTokenizer.close();
 +        }
 +        catch (IOException e)
 +        {
 +            throw new RuntimeException(e);
 +        }
 +        includeHandler.leaveFile(forkIncludeTokenizer.getEndOffset());
 +        forkIncludeTokenizer = null;
 +    }
 +
 +    /**
 +     * @throws Exception
 +     */
 +    private final ASToken consumeMetadata(final ASToken startToken) throws Exception
 +    {
 +        final ASToken originalToken = new ASToken(startToken);
 +        MetaDataPayloadToken payload = new MetaDataPayloadToken(originalToken);
 +        final ArrayList<ASToken> safetyNet = new ArrayList<ASToken>(5);
 +        boolean isMetadata = true;
 +        while (true)
 +        {
 +            tokenizer.setReuseLastToken();
 +            final ASToken next = LT(1);
 +            if (next == null)
 +            {
 +                break;
 +            }
 +            safetyNet.add(new ASToken(next)); //sadly, we have to deal with the extra object creation if we're wrong
 +            payload.addToken(next); //here too
 +
 +            if (!next.canExistInMetadata())
 +            {
 +                isMetadata = false;
 +                //consume the last token we saw so that we don't get ourselves into an infinite loop
 +                //it was the last token of the metadata, and this makes "next" the current token.
 +                consume(1);
 +                break;
 +            }
 +            consume(1);
 +            if (next.getType() == TOKEN_SQUARE_CLOSE)
 +            {
 +                break;
 +            }
 +        }
 +        if (!isMetadata)
 +        { //we're wrong, so let's add back the tokens to our lookahead buffer
 +            lookAheadBuffer.addAll(safetyNet);
 +            bufferSize = lookAheadBuffer.size();
 +            return originalToken;
 +        }
 +        return payload;
 +
 +    }
 +
 +    private final void fill(final int distance) throws Exception
 +    {
 +        int pos = 0;
 +        while (pos < distance)
 +        {
 +            addTokenToBuffer(nextTokenFromReader());
 +            pos++;
 +        }
 +    }
 +
 +    /**
 +     * @param nextToken
 +     */
 +    private final void addTokenToBuffer(final ASToken nextToken)
 +    {
 +        bufferSize++;
 +        lookAheadBuffer.add(nextToken);
 +        // at EOF, nextToken can be null.
 +        if (nextToken != null)
 +            nextToken.lock();
 +    }
 +
 +    /**
 +     * Get the pooled version of a given string.
 +     * 
 +     * @param text String literal.
 +     * @return Pooled string.
 +     */
 +    private final String poolString(final String text)
 +    {
 +        String pooledString = stringPool.get(text);
 +        if (pooledString == null)
 +        {
 +            stringPool.put(text, text);
 +            pooledString = text;
 +        }
 +        return pooledString;
 +    }
 +
 +    /**
 +     * Get the next token from the source input. If this tokenizer is created
 +     * for a source file by {@link ASC}, and there are files included by
 +     * {@code -in} option, the tokenizer will return the
 +     * "injected include tokens" before real tokens coming from the JFlex
 +     * generated tokenizer.
 +     * 
 +     * @return next token from the source input
 +     * @throws IOException error
 +     * @see ASCompilationUnit#createMainCompilationUnitForASC()
 +     */
 +    private final ASToken nextTokenFromReader() throws IOException
 +    {
 +        final ASToken nextToken;
 +        if (ascIncludeImaginaryTokens != null && ascIncludeImaginaryTokens.hasNext())
 +            nextToken = ascIncludeImaginaryTokens.next();
 +        else if (tokenizer.hasBufferToken())
 +            nextToken = tokenizer.getBufferToken();
 +        else
 +            nextToken = tokenizer.nextToken();
 +
 +        if (nextToken != null)
 +        {
 +            // Converting unicode on-the-fly in the lexer is much slower than
 +            // converting it here after the token is made, especially for 
 +            // identifiers.
 +            switch (nextToken.getType())
 +            {
 +                case TOKEN_LITERAL_NUMBER:
 +                    nextToken.setText(poolString(nextToken.getText()));
 +                    break;
 +                case TOKEN_LITERAL_REGEXP:
 +                    // Any "backslash-u" entities left after "convertUnicode"
 +                    // are invalid unicode escape sequences. According to AS3
 +                    // behavior, the backslash character is dropped.
 +                    nextToken.setText(poolString(convertUnicode(nextToken.getText()).replaceAll("\\\\u", "u")));
 +                    break;
 +                case TOKEN_IDENTIFIER:
 +                    // Intern 'identifiers' and 'keywords'. 
 +                    // 'keywords' were 'identifiers' before they are analyzed.
 +                    final String originalIdentifierName = nextToken.getText();
 +                    final String normalizedIdentifierName = poolString(convertUnicode(originalIdentifierName));
 +                    nextToken.setText(normalizedIdentifierName);
 +                    if (!config.ignoreKeywords)
 +                    {
 +                        /**
 +                         * If the identifier has escaped unicode sequence, it
 +                         * can't be a keyword.
 +                         * <p>
 +                         * According to ASL syntax spec chapter 3.4:
 +                         * <blockquote> Unicode escape sequences may be used to
 +                         * spell the names of identifiers that would otherwise
 +                         * be keywords. This is in contrast to ECMAScript.
 +                         * </blockquote>
 +                         */
 +                        if (originalIdentifierName.equals(normalizedIdentifierName))
 +                        {
 +                            // do keyword analysis here
 +                            final Integer info = keywordToTokenMap.get(nextToken.getText());
 +                            if (info != null)
 +                                nextToken.setType(info);
 +                        }
 +                    }
 +                    break;
 +                default:
 +                    // Ignore other tokens.
 +                    break;
 +            }
 +
 +            //so we want to adjust all of our offsets here, BUT
 +            //the column is really only valid for the first line, which is line 0.
 +            //if we're not the first line, don't bother
 +            nextToken.adjustLocation(
 +                    offsetAdjustment,
 +                    lineAdjustment,
 +                    nextToken.getLine() == 0 ? columnAdjustment : 0);
 +            nextToken.storeLocalOffset();
 +
 +            if (includeHandler != null)
 +            {
 +                nextToken.setSourcePath(includeHandler.getIncludeStackTop());
 +                includeHandler.onNextToken(nextToken);
 +            }
 +
 +            if (nextToken.getSourcePath() == null)
 +                nextToken.setSourcePath(sourcePath);
 +
 +            if (reader instanceof SourceFragmentsReader)
 +                ((SourceFragmentsReader)reader).adjustLocation(nextToken);
 +        }
 +        return nextToken;
 +    }
 +
 +    /**
 +     * Consume tokens in the buffer
 +     * 
 +     * @param distance the number of tokens to consume
 +     */
 +    private final void consume(int distance)
 +    {
 +        if (bufferSize >= distance)
 +        {
 +            for (; distance > 0; distance--)
 +            {
 +                lookAheadBuffer.remove(bufferSize - 1);
 +                bufferSize--;
 +            }
 +        }
 +    }
 +
 +    /**
 +     * Returns the next token that will be produced by the underlying lexer
 +     * 
 +     * @param distance distance to look ahead
 +     * @return an {@link ASToken}
 +     * @throws Exception
 +     */
 +    private final ASToken LT(final int distance) throws Exception
 +    {
 +        if (bufferSize < distance)
 +        {
 +            fill(distance - bufferSize);
 +        }
 +        return lookAheadBuffer.get(distance - 1);
 +    }
 +
 +    private static final boolean matches(final ASToken token, final int type)
 +    {
 +        return token != null && token.getType() == type;
 +    }
 +
 +    /**
 +     * Retrieve the end offset of the file.
 +     * <p>
 +     * The result is the end offset of the file, not the offset of the last
 +     * token, this allows any trailing space to be included so that the parser
 +     * can span the result {@code FileNode} to the entire file.
 +     * 
 +     * @return the end offset of the input file
 +     */
 +    public final int getEndOffset()
 +    {
 +        return tokenizer.getOffset() + offsetAdjustment;
 +    }
 +
 +    /**
 +     * Computers whether the following token is a user-defined namespace. This
 +     * method calls processUserDefinedNamespace which will change token types
 +     * 
 +     * @param token token to start our analysis
 +     * @param lookaheadOffset offset of the tokens to look at
 +     * @return true if we're a user-defined namespace
 +     * @throws Exception
 +     */
 +    private final boolean isUserDefinedNamespace(final ASToken token, final int lookaheadOffset) throws Exception
 +    {
 +        processUserDefinedNamespace(token, lookaheadOffset);
 +        return token.getType() == TOKEN_NAMESPACE_ANNOTATION || token.getType() == TOKEN_NAMESPACE_NAME;
 +    }
 +
 +    /**
 +     * Because AS3 supports qualified/unqualified namespaces as decorators on
 +     * definitions, we need to detect them before we even make it to the parser.
 +     * These look exactly like names/qnames, and so if they're on the same line
 +     * as a definition they might be a namespace name instead of a standard
 +     * identifier. This method will detect these cases, and change token types
 +     * accordingly
 +     * 
 +     * @param token token token to start our analysis
 +     * @param lookaheadOffset offset of the tokens to look at
 +     * @throws Exception
 +     */
 +    private final void processUserDefinedNamespace(final ASToken token, final int lookaheadOffset) throws Exception
 +    {
 +        token.lock();
 +
 +        //determine if we have a user-defined namespace
 +        //our first token will be an identifier, and the cases we're looking for are:
 +        //1.) user_namespace (function|var|dynamic|static|final|native|override)
 +        //2.) my.pack.user_namespace (function|var|dynamic|static|final|native|override)
 +        //option number 1 is probably the 99% case so optimize for it
 +        ASToken nextToken = LT(1 + lookaheadOffset);
 +        if (token.matchesLine(nextToken))
 +        {
 +            // If the next token is an identifier check to see if it should
 +            // be modified to a TOKEN_NAMESPACE_ANNOTATION
 +            // This is so that code like:
 +            //    ns1 ns2 var x;
 +            // gets parsed correctly (2 namespace annotations, which is an error)
 +            if (nextToken.getType() == TOKEN_IDENTIFIER)
 +                processUserDefinedNamespace(nextToken, 1 + lookaheadOffset);
 +
 +            switch (nextToken.getType())
 +            {
 +                case TOKEN_KEYWORD_FUNCTION:
 +                case TOKEN_KEYWORD_VAR:
 +                case TOKEN_KEYWORD_CONST:
 +                case TOKEN_RESERVED_WORD_NAMESPACE:
 +                case TOKEN_MODIFIER_DYNAMIC:
 +                case TOKEN_MODIFIER_FINAL:
 +                case TOKEN_MODIFIER_NATIVE:
 +                case TOKEN_MODIFIER_OVERRIDE:
 +                case TOKEN_MODIFIER_STATIC:
 +                case TOKEN_MODIFIER_VIRTUAL:
 +                case TOKEN_KEYWORD_CLASS:
 +                case TOKEN_KEYWORD_INTERFACE:
 +                case TOKEN_NAMESPACE_ANNOTATION:
 +                case HIDDEN_TOKEN_BUILTIN_NS:
 +                    token.setType(TOKEN_NAMESPACE_ANNOTATION);
 +                    return;
 +                case TOKEN_OPERATOR_NS_QUALIFIER: //simple name with a :: binding after it.  has to be a NS
 +                    token.setType(TOKEN_NAMESPACE_NAME);
 +                    return;
 +            }
 +            if (nextToken.getType() == TOKEN_OPERATOR_MEMBER_ACCESS)
 +            {
 +                int nextValidPart = TOKEN_IDENTIFIER;
 +                final ArrayList<ASToken> toTransform = new ArrayList<ASToken>(3);
 +                toTransform.add(token);
 +                toTransform.add(nextToken);
 +                int laDistance = lookaheadOffset + 1;
 +                while (true)
 +                {
 +                    nextToken = LT(++laDistance);
 +                    if (token.matchesLine(nextToken))
 +                    {
 +                        if (nextToken.getType() == nextValidPart)
 +                        {
 +                            nextValidPart = (nextToken.getType() == TOKEN_IDENTIFIER) ? TOKEN_OPERATOR_MEMBER_ACCESS : TOKEN_IDENTIFIER;
 +                            toTransform.add(nextToken);
 +                        }
 +                        else if (nextValidPart != TOKEN_IDENTIFIER && nextToken.canFollowUserNamespace())
 +                        {
 +                            // Next token is in the follow set of a namespace,
 +                            // so all the buffered tokens need to be converted
 +                            // into namespace tokens.
 +                            for (final ASToken ttToken : toTransform)
 +                            {
 +                                if (ttToken.getType() == TOKEN_IDENTIFIER)
 +                                    ttToken.setType(TOKEN_NAMESPACE_ANNOTATION);
 +                                else
 +                                    ttToken.setType(TOKEN_OPERATOR_MEMBER_ACCESS);
 +                            }
 +                            break;
 +                        }
 +                        else
 +                        {
 +                            break;
 +                        }
 +                    }
 +                    else
 +                    {
 +                        break;
 +                    }
 +                }
 +            }
 +        }
 +    }
 +
 +    /**
 +     * Combines the text of two tokens, adding whitespace between them and
 +     * adjusting offsets appropriately
 +     * 
 +     * @param target the base token that we will add the next to
 +     * @param source the source of the text to add
 +     */
 +    private final void combineText(TokenBase target, TokenBase source)
 +    {
 +        StringBuilder text = new StringBuilder();
 +        text.append(target.getText());
 +        //add whitespace for gaps between tokens
 +        for (int i = 0; i < (source.getStart() - target.getEnd()); i++)
 +        {
 +            text.append(" ");
 +        }
 +        text.append(source.getText());
 +        target.setText(poolString(text.toString()));
 +        target.setEnd(target.getStart() + text.length());
 +    }
 +
 +    /**
 +     * Unicode pattern for {@code \u0000}.
 +     */
 +    private static final Pattern UNICODE_PATTERN = Pattern.compile(BaseRawASTokenizer.PATTERN_U4);
 +
 +    /**
 +     * Leading characters of a unicode pattern.
 +     */
 +    private static final String UNICODE_LEADING_CHARS = "\\u";
 +
 +    /**
 +     * Convert escaped unicode sequence in a string. For example:
 +     * {@code foo\u0051bar} is converted into {@code fooQbar}.
 +     * 
 +     * @param text input string
 +     * @return converted text
 +     */
 +    static String convertUnicode(final String text)
 +    {
 +        // Calling Pattern.matcher() is much slower than String.contains(), so
 +        // we need this predicate to skip unnecessary RegEx computation.
 +        if (text.contains(UNICODE_LEADING_CHARS))
 +        {
 +            final StringBuilder result = new StringBuilder();
 +            final Matcher matcher = UNICODE_PATTERN.matcher(text);
 +            int start = 0;
 +            while (matcher.find())
 +            {
 +                result.append(text, start, matcher.start());
 +                result.append(Character.toChars(BaseRawASTokenizer.decodeEscapedUnicode(matcher.group())));
 +                start = matcher.end();
 +            }
 +            result.append(text, start, text.length());
 +            return result.toString();
 +        }
 +        else
 +        {
 +            return text;
 +        }
 +    }
 +
 +    /**
 +     * Gets the source path to the file being tokenized.
 +     */
 +    public String getSourcePath()
 +    {
 +        return sourcePath;
 +    }
 +}