You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@flex.apache.org by cd...@apache.org on 2016/04/21 13:35:47 UTC
[17/19] git commit: [flex-falcon]
[refs/heads/feature/maven-migration-test] - - Renamed the "generate" goal to
"generate-extern" - Replicated the changes to the node extern to match the
develop branch - Fixed the replace commands in js to generate an ide
http://git-wip-us.apache.org/repos/asf/flex-falcon/blob/dd503343/compiler/src/main/java/org/apache/flex/compiler/internal/parsing/as/StreamingASTokenizer.java
----------------------------------------------------------------------
diff --cc compiler/src/main/java/org/apache/flex/compiler/internal/parsing/as/StreamingASTokenizer.java
index 61b6e26,0000000..1c90e94
mode 100644,000000..100644
--- a/compiler/src/main/java/org/apache/flex/compiler/internal/parsing/as/StreamingASTokenizer.java
+++ b/compiler/src/main/java/org/apache/flex/compiler/internal/parsing/as/StreamingASTokenizer.java
@@@ -1,1871 -1,0 +1,1884 @@@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.flex.compiler.internal.parsing.as;
+
+import java.io.Closeable;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.Reader;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Stack;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.flex.compiler.clients.ASC;
+import org.apache.flex.compiler.constants.IASKeywordConstants;
+import org.apache.flex.compiler.filespecs.IFileSpecification;
+import org.apache.flex.compiler.internal.parsing.ITokenStreamFilter;
+import org.apache.flex.compiler.internal.parsing.SourceFragmentsReader;
+import org.apache.flex.compiler.internal.parsing.TokenBase;
+import org.apache.flex.compiler.internal.units.ASCompilationUnit;
+import org.apache.flex.compiler.parsing.IASToken;
+import org.apache.flex.compiler.parsing.IASTokenizer;
+import org.apache.flex.compiler.parsing.IASToken.ASTokenKind;
+import org.apache.flex.compiler.problems.CyclicalIncludesProblem;
+import org.apache.flex.compiler.problems.ExpectXmlBeforeNamespaceProblem;
+import org.apache.flex.compiler.problems.FileNotFoundProblem;
+import org.apache.flex.compiler.problems.ICompilerProblem;
+import org.apache.flex.compiler.problems.InternalCompilerProblem2;
+import org.apache.flex.compiler.problems.UnexpectedTokenProblem;
+import org.apache.flex.utils.ILengthAwareReader;
+import org.apache.flex.utils.NonLockingStringReader;
+import org.apache.flex.utils.ILengthAwareReader.InputType;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+
+/**
+ * This Tokenizer provides tokens to be used by various clients, most notably
+ * the ASParser. Given the nature of ambiguities in the ActionScript 3 language,
+ * this tokenizer also serves to disambiguate tokens based on a combination of
+ * look behind and lookahead. For all cases of ambiguity, only one token is
+ * needed for look behind, and in our worst case, n tokens forwards where n is
+ * the number of tokens that can be produced. Some other state is kept in order
+ * to know which type of container we may exist in (function, class, interface,
+ * etc). We buffer LA token results to avoid unneeded lookahead
+ */
+public class StreamingASTokenizer implements ASTokenTypes, IASTokenizer, Closeable
+{
+ private static final String FOR_EACH = "for each";
+ private static final String XML = "xml";
+ private static final String DEFAULT_XML_NAMESPACE = "default xml namespace";
+ private static final String ZERO = "0";
+
+ /**
+ * Map from keyword text to token type.
+ * <p>
+ * We use a HashMap here to avoid slowing down the performance of the
+ * underlying lexer. We are avoid the "longest match" problem, requiring a
+ * lot of rescanning on the lexer level to determine keywords from
+ * identifiers. And since hash map lookup is constant, this is (in theory)
+ * faster than doing this in the scanner since we're not bound by i/o or
+ * state machine back-tracing.
+ */
+ private static final Map<String, Integer> keywordToTokenMap = new ImmutableMap.Builder<String, Integer>()
+ .put(IASKeywordConstants.AS, TOKEN_KEYWORD_AS)
+ .put(IASKeywordConstants.IS, TOKEN_KEYWORD_IS)
+ .put(IASKeywordConstants.INSTANCEOF, TOKEN_KEYWORD_INSTANCEOF)
+ .put(IASKeywordConstants.IN, TOKEN_KEYWORD_IN)
+ .put(IASKeywordConstants.DELETE, TOKEN_KEYWORD_DELETE)
+ .put(IASKeywordConstants.TYPEOF, TOKEN_KEYWORD_TYPEOF)
+ .put(IASKeywordConstants.CONST, TOKEN_KEYWORD_CONST)
+ .put(IASKeywordConstants.GET, TOKEN_RESERVED_WORD_GET)
+ .put(IASKeywordConstants.IMPLEMENTS, TOKEN_RESERVED_WORD_IMPLEMENTS)
+ .put(IASKeywordConstants.IMPORT, TOKEN_KEYWORD_IMPORT)
+ .put(IASKeywordConstants.USE, TOKEN_KEYWORD_USE)
+ .put(IASKeywordConstants.EXTENDS, TOKEN_RESERVED_WORD_EXTENDS)
+ .put(IASKeywordConstants.NEW, TOKEN_KEYWORD_NEW)
+ .put(IASKeywordConstants.DYNAMIC, TOKEN_MODIFIER_DYNAMIC)
+ .put(IASKeywordConstants.FINAL, TOKEN_MODIFIER_FINAL)
+ .put(IASKeywordConstants.NATIVE, TOKEN_MODIFIER_NATIVE)
+ .put(IASKeywordConstants.OVERRIDE, TOKEN_MODIFIER_OVERRIDE)
+ .put(IASKeywordConstants.STATIC, TOKEN_MODIFIER_STATIC)
+ .put(IASKeywordConstants.VIRTUAL, TOKEN_MODIFIER_VIRTUAL)
+ .put(IASKeywordConstants.SET, TOKEN_RESERVED_WORD_SET)
+ // Keywords with special token types that affect subsequent blocks
+ .put(IASKeywordConstants.CATCH, TOKEN_KEYWORD_CATCH)
+ .put(IASKeywordConstants.CLASS, TOKEN_KEYWORD_CLASS)
+ .put(IASKeywordConstants.FUNCTION, TOKEN_KEYWORD_FUNCTION)
+ .put(IASKeywordConstants.INTERFACE, TOKEN_KEYWORD_INTERFACE)
+ .put(IASKeywordConstants.PACKAGE, TOKEN_KEYWORD_PACKAGE)
+ // #120009: allow "var" inside parameter list, even though it's not
+ // valid AS (don't turn the subsequent function block open into a block open
+ .put(IASKeywordConstants.VAR, TOKEN_KEYWORD_VAR)
+ .put(IASKeywordConstants.FALSE, TOKEN_KEYWORD_FALSE)
+ .put(IASKeywordConstants.NULL, TOKEN_KEYWORD_NULL)
+ .put(IASKeywordConstants.TRUE, TOKEN_KEYWORD_TRUE)
+ .put(IASKeywordConstants.PUBLIC, HIDDEN_TOKEN_BUILTIN_NS)
+ .put(IASKeywordConstants.PRIVATE, HIDDEN_TOKEN_BUILTIN_NS)
+ .put(IASKeywordConstants.PROTECTED, HIDDEN_TOKEN_BUILTIN_NS)
+ .put(IASKeywordConstants.INTERNAL, HIDDEN_TOKEN_BUILTIN_NS)
+ .put(IASKeywordConstants.INCLUDE, TOKEN_KEYWORD_INCLUDE)
+ // Keywords for statements that affect subsequent blocks
+ .put(IASKeywordConstants.DO, TOKEN_KEYWORD_DO)
+ .put(IASKeywordConstants.WHILE, TOKEN_KEYWORD_WHILE)
+ .put(IASKeywordConstants.BREAK, TOKEN_KEYWORD_BREAK)
+ .put(IASKeywordConstants.CONTINUE, TOKEN_KEYWORD_CONTINUE)
+ .put(IASKeywordConstants.GOTO, TOKEN_RESERVED_WORD_GOTO)
+ .put(IASKeywordConstants.FOR, TOKEN_KEYWORD_FOR)
+ .put(StreamingASTokenizer.FOR_EACH, TOKEN_KEYWORD_FOR)
+ .put(IASKeywordConstants.EACH, TOKEN_RESERVED_WORD_EACH)
+ .put(IASKeywordConstants.WITH, TOKEN_KEYWORD_WITH)
+ .put(IASKeywordConstants.ELSE, TOKEN_KEYWORD_ELSE)
+ .put(IASKeywordConstants.IF, TOKEN_KEYWORD_IF)
+ .put(IASKeywordConstants.SWITCH, TOKEN_KEYWORD_SWITCH)
+ .put(IASKeywordConstants.CASE, TOKEN_KEYWORD_CASE)
+ .put(IASKeywordConstants.DEFAULT, TOKEN_KEYWORD_DEFAULT)
+ .put(IASKeywordConstants.TRY, TOKEN_KEYWORD_TRY)
+ .put(IASKeywordConstants.FINALLY, TOKEN_KEYWORD_FINALLY)
+ // Keywords with a generic keyword token type that have no effect
+ // on subsequent blocks.
+ .put(IASKeywordConstants.NAMESPACE, TOKEN_RESERVED_WORD_NAMESPACE)
+ .put(IASKeywordConstants.CONFIG, TOKEN_RESERVED_WORD_CONFIG)
+ .put(IASKeywordConstants.THROW, TOKEN_KEYWORD_THROW)
+ .put(IASKeywordConstants.SUPER, TOKEN_KEYWORD_SUPER)
+ .put(IASKeywordConstants.THIS, TOKEN_KEYWORD_THIS)
+ .put(IASKeywordConstants.VOID, TOKEN_KEYWORD_VOID)
+ .put(IASKeywordConstants.RETURN, TOKEN_KEYWORD_RETURN)
+ .build();
+
+ /**
+ * Configuration for out tokenizer
+ */
+ private static final class TokenizerConfig
+ {
+ /**
+ * Flag that lets us ignore keywords for more general string parsing
+ */
+ public boolean ignoreKeywords = false;
+
+ /**
+ * Flag that lets us be aware of metadata
+ */
+ public boolean findMetadata = true;
+
+ /**
+ * Flag indicating that we are tokenizing full content/files, and not
+ * segments
+ */
+ public boolean completeContent = true;
+
+ /**
+ * IFilter for old APIs
+ */
+ public ITokenStreamFilter filter;
+
+ /**
+ * Flag indicating we should collect comments
+ */
+ public boolean collectComments = false;
+
+ /**
+ * Flag indicating we follow include statements, including their tokens
+ */
+ public boolean followIncludes = true;
+ }
+
+ private Reader reader;
+
+ //underlying lexer
+ private RawASTokenizer tokenizer;
+
+ //last exception to prevent us from looping forever
+ private Exception lastException = null;
+
+ //LA buffer
+ private final List<ASToken> lookAheadBuffer;
+ private int bufferSize = 0; //maintain size ourselves since it's faster
+
+ //last token we encountered, used for lookback
+ private ASToken lastToken;
+
+ private int offsetAdjustment; //for offset adjustment
+ private int lineAdjustment = 0;
+ private int columnAdjustment = 0;
+
+ private IncludeHandler includeHandler;
+
+ /**
+ * The forked tokenizer for included files. If not null, {@link #next()}
+ * will return a token from this tokenizer.
+ * <p>
+ * After all the tokens are returned from the included source file,
+ * {@link #closeIncludeTokenizer()} closes the tokenizer and set this field
+ * to null.
+ */
+ private StreamingASTokenizer forkIncludeTokenizer;
+
+ /**
+ * Flag to indicate if we have followed include statements
+ */
+ private boolean hasEncounteredIncludeStatements = false;
+
+ private TokenizerConfig config;
+
+ /**
+ * Source file handler. This is used by resolving included file path.
+ * {@link #StreamingASTokenizer(IFileSpecification)} and
+ * {@link #StreamingASTokenizer(IFileSpecification, Stack)} sets the value.
+ */
+ private String sourcePath;
+
+ /**
+ * Lexer problems.
+ * */
+ private final List<ICompilerProblem> problems = new ArrayList<ICompilerProblem>();
+
+ /**
+ * Imaginary tokens generated for {@code asc -in} option.
+ */
+ private Iterator<ASToken> ascIncludeImaginaryTokens;
+
+ /**
+ * You should probably not use this constructor. There is some legacy code
+ * that uses this constructor, but that code should be updated to use one of
+ * the static create methods below.
+ * <p>
+ * TODO: make this private.
+ */
+ public StreamingASTokenizer(final Reader reader)
+ {
+ this();
+ setReader(reader);
+ }
+
+ /**
+ * A pool to reduce duplicated string literals created
+ */
+ private final HashMap<String, String> stringPool;
+
+ /**
+ * You should probably not use this constructor. There is a lot of code that
+ * uses this constructor, but that code should be updated to use one of the
+ * static create methods below.
+ * <p>
+ * TODO: make this private.
+ */
+ public StreamingASTokenizer()
+ {
+ tokenizer = new RawASTokenizer();
+ config = new TokenizerConfig();
+ lookAheadBuffer = new ArrayList<ASToken>(5);
+ includeHandler = IncludeHandler.creatDefaultIncludeHandler();
+ stringPool = new HashMap<String, String>();
+
+ // Initialize string pool with keyword strings. The keyword strings
+ // are declared as constants which are automatically "interned".
+ for (final String keyword : keywordToTokenMap.keySet())
+ {
+ stringPool.put(keyword, keyword);
+ }
+ }
+
+ /**
+ * Creates a tokenizer suitable for the mxml indexing code. fragments the
+ * new tokenizer will tokenize.
+ *
+ * @return A new tokenizer suitable for tokenizing script fragments in an
+ * mxml document that is being tokenized for the full text search index.
+ */
+ public static StreamingASTokenizer createForMXMLIndexing(String fileName)
+ {
+ StreamingASTokenizer result = new StreamingASTokenizer();
+ result.setPath(fileName);
+ result.includeHandler.enterFile(result.sourcePath);
+ return result;
+ }
+
+ /**
+ * Fork a new tokenizer when an "include" directive is found. This method
+ * will pass the {@code StructureTracker} of the current tokenizer down to
+ * the forked tokenizer.
+ *
+ * @param currentTokenizer Current tokenizer.
+ * @param fileSpec File specification of the included file.
+ * @param includeHandler Include handler.
+ * @return A tokenizer for the included file.
+ * @throws FileNotFoundException Error.
+ */
+ private static StreamingASTokenizer createForIncludeFile(
+ final StreamingASTokenizer currentTokenizer,
+ final IFileSpecification fileSpec,
+ final IncludeHandler includeHandler)
+ throws FileNotFoundException
+ {
+ final StreamingASTokenizer tokenizer = create(fileSpec, includeHandler);
+ return tokenizer;
+ }
+
+ /**
+ * Create a tokenizer from a source file. This is the lexer entry-point used
+ * by {@link ASCompilationUnit}.
+ *
+ * @param fileSpec File specification provides the reader and the file path.
+ * @param includeHandler Include handler.
+ * @throws FileNotFoundException error
+ */
+ protected static StreamingASTokenizer create(
+ final IFileSpecification fileSpec,
+ final IncludeHandler includeHandler)
+ throws FileNotFoundException
+ {
+ assert fileSpec != null : "File specification can't be null.";
+ assert includeHandler != null : "Include handler can't be null.";
+
+ final StreamingASTokenizer tokenizer = new StreamingASTokenizer();
+ tokenizer.setReader(fileSpec.createReader());
+ tokenizer.setPath(fileSpec.getPath());
+ tokenizer.includeHandler = includeHandler;
+ tokenizer.includeHandler.enterFile(tokenizer.sourcePath);
+ return tokenizer;
+ }
+
+ /**
+ * Create a tokenizer for {@code ASParser#parseFile()}.
+ *
+ * @param fileSpec File specification provides the reader and the file path.
+ * @param includeHandler Include handler.
+ * @param followIncludes True if included files are also parsed.
+ * @param includedFiles A list of included file paths.
+ * @return Lexer.
+ * @throws FileNotFoundException error
+ */
+ protected static StreamingASTokenizer createForASParser(
+ final IFileSpecification fileSpec,
+ final IncludeHandler includeHandler,
+ final boolean followIncludes,
+ final List<String> includedFiles)
+ throws FileNotFoundException
+ {
+ final StreamingASTokenizer tokenizer = create(fileSpec, includeHandler);
+ tokenizer.setFollowIncludes(followIncludes);
+
+ final ImmutableList.Builder<ASToken> imaginaryTokensBuilder =
+ new ImmutableList.Builder<ASToken>();
+ for (final String filename : includedFiles)
+ {
+ imaginaryTokensBuilder.add(new ASToken(
+ ASTokenTypes.TOKEN_KEYWORD_INCLUDE,
+ 0,
+ 0,
+ 0,
+ 0,
+ "include"));
+ imaginaryTokensBuilder.add(new ASToken(
+ ASTokenTypes.TOKEN_LITERAL_STRING,
+ 0,
+ 0,
+ 0,
+ 0,
+ '"' + filename + '"'));
+ }
+ tokenizer.ascIncludeImaginaryTokens = imaginaryTokensBuilder.build().iterator();
+ return tokenizer;
+ }
+
+ /**
+ * This creator doesn't "enter file" on creation.
+ */
+ protected static StreamingASTokenizer createForInlineScriptScopeBuilding(
+ final Reader reader,
+ final String path,
+ final IncludeHandler includeHandler,
+ final int offsetAdjustment,
+ final int lineAdjustment,
+ final int columnAdjustment)
+ {
+ assert reader != null : "Reader can't be null";
+ assert path != null : "Path can't be null";
+ assert includeHandler != null : "IncludeHandler can't be null";
+
+ final StreamingASTokenizer tokenizer = new StreamingASTokenizer();
+ tokenizer.setReader(reader);
+ tokenizer.setPath(path);
+ tokenizer.includeHandler = includeHandler;
+ tokenizer.setSourcePositionAdjustment(
+ offsetAdjustment, lineAdjustment, columnAdjustment);
+ return tokenizer;
+ }
+
+ /**
+ * Create a tokenizer to parse an Expression.
+ */
+ protected static StreamingASTokenizer createForInlineExpressionParsing(
+ final Reader reader,
+ final String path
+ )
+ {
+ assert reader != null : "Reader can't be null";
+ assert path != null : "Path can't be null";
+
+ final StreamingASTokenizer tokenizer = new StreamingASTokenizer();
+ tokenizer.setReader(reader);
+ tokenizer.setPath(path);
+ tokenizer.includeHandler.enterFile(path);
+
+ // Have to do this to get the tokenizer to work right - some things, like function expressions,
+ // won't tokenize correctly unless the last token is '=' or some other special tokens.
+ tokenizer.lastToken = new ASToken(ASTokenTypes.TOKEN_OPERATOR_ASSIGNMENT, -1, -1, -1, -1, "=");
+
+ return tokenizer;
+ }
+
+ /**
+ * This method can create a {@code StreamingASTokenizer} with optional
+ * "follow includes". If {@code IncludeHandler} is not null, it will follow
+ * {@code include} directives.
+ *
+ * @param reader Input to the tokenizer.
+ * @param path File path of the input.
+ * @param includeHandler If not null, the created tokenizer will follow
+ * {@code include} directives.
+ * @return A {@code StreamingASTokenizer}.
+ */
+ public static StreamingASTokenizer createForRepairingASTokenizer(
+ final Reader reader,
+ final String path,
+ final IncludeHandler includeHandler)
+ {
+ assert path != null || includeHandler == null : "We need a source path to follow includes";
+ final StreamingASTokenizer tokenizer = new StreamingASTokenizer();
+ tokenizer.setReader(reader);
+ tokenizer.setPath(path);
+ if (includeHandler != null)
+ {
+ tokenizer.includeHandler = includeHandler;
+ includeHandler.enterFile(path);
+ }
+ return tokenizer;
+ }
+
+ /**
+ * Sets the {@link Reader} that supplies the content to this tokenizer. It
+ * is up to the client to close any previous readers that have been in use.
+ * It is also up to the client to close the reader once it has been used
+ *
+ * @param reader a {@link Reader}
+ */
+ public void setReader(final Reader reader)
+ {
+ setReader(reader, 0, 0, 0);
+ }
+
+ /**
+ * Sets the {@link Reader} that supplies the content to this tokenizer. It
+ * is up to the client to close any previous readers that have been in use.
+ * It is also up to the client to close the reader once it has been used
+ *
+ * @param reader a {@link Reader}
+ * @param offset Offset adjustment. If the specified reader is reading from
+ * a string extracted from a source file, this should be the offset of the
+ * first character read from the reader in the source file.
+ * @param line Line adjustment.
+ * @param column Column adjustment
+ */
+ public void setReader(final Reader reader, int offset, int line, int column)
+ {
+ this.reader = reader;
+ tokenizer = new RawASTokenizer();
+ tokenizer.yyreset(reader);
+ tokenizer.setCollectComments(config.collectComments);
+ setSourcePositionAdjustment(offset, line, column);
+ }
+
+ /**
+ * Sets the path to the file this tokenizer is scanning
+ *
+ * @param path a file path
+ */
+ @Override
+ public void setPath(String path)
+ {
+ assert path != null : "path of tokenizer shouldn't be null";
+ sourcePath = path;
+ tokenizer.setSourcePath(path);
+ }
+
+ /**
+ * Allows for the adjustment of offset, line and column information when
+ * parsing subsequences of text. This should be called before tokenization
+ * has started
+ *
+ * @param offset The offset where the fragment starts.
+ * @param line The line where the fragment starts. This should be a
+ * ZERO-based line number
+ * @param column The column where the fragment starts. This should be a
+ * ZERO-based column number
+ */
+ public void setSourcePositionAdjustment(int offset, int line, int column)
+ {
+ offsetAdjustment = offset;
+ lineAdjustment = line;
+ columnAdjustment = column;
+ }
+
+ /**
+ * Sets whether we comments are collected: single line and multi-line.
+ * Default is <code>false</code>
+ *
+ * @param collect true if we should collect comments
+ */
+ @Override
+ public void setCollectComments(final boolean collect)
+ {
+ config.collectComments = collect;
+
+ if (tokenizer != null)
+ tokenizer.setCollectComments(collect);
+ }
+
+ /**
+ * Sets whether we follow include statements, including their tokens.
+ * Default is <code>true</code>
+ *
+ * @param followIncludes true if we should follow includes
+ */
+ @Override
+ public void setFollowIncludes(final boolean followIncludes)
+ {
+ config.followIncludes = followIncludes;
+ }
+
+ /**
+ * Closes the underlying reader
+ */
+ @Override
+ public void close() throws IOException
+ {
+ if (tokenizer != null)
+ {
+ tokenizer.reset();
+ tokenizer.yyclose(); //close the reader
+ }
+ }
+
+ /**
+ * Sets whether we ignore keywords while scanning. Default is
+ * <code>false</code>
+ *
+ * @param ignore true if we should ignore keywords
+ */
+ public void setIgnoreKeywords(final boolean ignore)
+ {
+ config.ignoreKeywords = ignore;
+ }
+
+ /**
+ * Sets whether we are scanning a full file, or a fragment. Default is
+ * <code>true</code>
+ *
+ * @param full true if we are scanning a full file.
+ */
+ public void setScanningFullContent(final boolean full)
+ {
+ config.completeContent = full;
+ }
+
+ /**
+ * Sets whether we will find metadata constructs Default is
+ * <code>true</code>
+ *
+ * @param aware true if we will find metadata
+ */
+ public void setIsMetadataAware(final boolean aware)
+ {
+ config.findMetadata = aware;
+ }
+
+ /**
+ * Sets the {@link ITokenStreamFilter} used to filter out unwanted tokens
+ *
+ * @param filter the token filter to alter the stream returned from the
+ * tokenizer
+ */
+ public void setTokenFilter(ITokenStreamFilter filter)
+ {
+ config.filter = filter;
+ }
+
+ /**
+ * Sets the include handler used by this tokenizer to get
+ * {@link IFileSpecification} for included files.
+ *
+ * @param handler {@link IncludeHandler} this tokenizer should use.
+ */
+ public void setIncludeHandler(IncludeHandler handler)
+ {
+ includeHandler = handler;
+ }
+
+ /**
+ * Indicated that we have tokenization problems. Can be called once scanning
+ * has begun
+ *
+ * @return true if problems have been encountered
+ */
+ public boolean hasTokenizationProblems()
+ {
+ return tokenizer.hasProblems() || problems.size() > 0;
+ }
+
+ /**
+ * Indicated whether this tokenizer has encountered include statements,
+ * regardless of whether it is set to follow them or not
+ *
+ * @return true if we have encountered includes
+ */
+ public boolean hasEncounteredIncludeStatements()
+ {
+ return hasEncounteredIncludeStatements;
+ }
+
+ /**
+ * Returns a collection of problems that have been encountered while
+ * scanning.
+ *
+ * @return a list of problems, never null
+ */
+ public List<ICompilerProblem> getTokenizationProblems()
+ {
+ ArrayList<ICompilerProblem> problems = new ArrayList<ICompilerProblem>(this.problems);
+ problems.addAll(tokenizer.getProblems());
+ return problems;
+ }
+
+ public ASToken[] getTokens(final Reader reader, ITokenStreamFilter filter)
+ {
+ setReader(reader);
+ List<ASToken> tokenList = initializeTokenList(reader);
+ ASToken token = null;
+ do
+ {
+ token = next();
+ if (token != null && filter.accept(token))
+ tokenList.add(token.clone()); //make a copy because of object pool
+ }
+ while (token != null);
+ return tokenList.toArray(new ASToken[0]);
+ }
+
+ @Override
+ public ASToken[] getTokens(final Reader reader)
+ {
+ if (config.filter != null)
+ return getTokens(reader, config.filter);
+ setReader(reader);
+ List<ASToken> tokenList = initializeTokenList(reader);
+ ASToken token = null;
+ do
+ {
+ token = next();
+ if (token != null)
+ tokenList.add(token.clone()); //copy ctor because of object pool
+ }
+ while (token != null);
+ return tokenList.toArray(new ASToken[0]);
+ }
+
+ /**
+ * @param reader
+ * @return
+ */
+ private List<ASToken> initializeTokenList(final Reader reader)
+ {
+ List<ASToken> tokenList;
+ int listSize = 8012;
+ if (reader instanceof NonLockingStringReader)
+ {
+ //we know the length of this string. For string of length x, their are roughly x/5 tokens that
+ //can be constructed from that string. size the array appropriately.
+ listSize = 5;
+ if (((NonLockingStringReader)reader).getLength() > 0)
+ {
+ listSize = Math.max((int)((NonLockingStringReader)reader).getLength() / 5, 5);
+ }
+
+ }
+ else if (reader instanceof ILengthAwareReader && ((ILengthAwareReader)reader).getInputType() == InputType.FILE)
+ {
+ listSize = 9;
+ if (((ILengthAwareReader)reader).getLength() > 0)
+ {
+ listSize = Math.max((int)((ILengthAwareReader)reader).getLength() / 9, 9);
+
+ }
+ }
+ tokenList = new ArrayList<ASToken>(listSize);
+ return tokenList;
+ }
+
+ @Override
+ public IASToken[] getTokens(final String range)
+ {
+ return getTokens(new NonLockingStringReader(range));
+ }
+
+ /**
+ * Returns the next token that can be produced from the underlying reader
+ *
+ * @param filter an {@link ITokenStreamFilter} to restrict the tokens that
+ * are returned
+ * @return an ASToken, or null if no more tokens can be produced
+ */
+ public final ASToken next(final ITokenStreamFilter filter)
+ {
+ ASToken retVal = null;
+ while (true)
+ {
+ retVal = next();
+ if (retVal == null || filter.accept(retVal))
+ {
+ break;
+ }
+ }
+ return retVal;
+ }
+
+ /**
+ * Returns the next token that can be produced from the underlying reader.
+ * <p>
+ * If the forked "include file tokenizer" is open (not null), return the
+ * next token from it. If the forked tokenizer reaches the end of the
+ * included file, close (set to null) the forked tokenizer and return token
+ * from the main source file.
+ *
+ * @return an ASToken, or null if no more tokens can be produced
+ */
+ public final ASToken next()
+ {
+ ASToken retVal = null;
+ // If the lexer for the included file is open, read from the included tokenizer.
+ boolean consumeSemi = false;
+ try
+ {
+ // Return token from the main file.
+ if (forkIncludeTokenizer != null)
+ {
+ retVal = forkIncludeTokenizer.next();
+
+ // Check if the forked tokenizer reached EOF.
+ if (retVal == null)
+ {
+ closeIncludeTokenizer();
+ // We should consume the next semicolon we find.
+ // Most include statements are terminated with a semicolon,
+ // and because we read the contents of the included file,
+ // this could cause problems with a semicolon in a place
+ // we don't want it.
+ consumeSemi = true;
+ }
+ else
+ return retVal;
+ }
+ if (bufferSize > 0)
+ {
+ retVal = lookAheadBuffer.remove(0);
+ bufferSize--;
+ }
+ else
+ {
+ retVal = nextTokenFromReader();
+
+ }
+ if (retVal == null)
+ return null;
+ final int tokenType = retVal.getType();
+
+ switch (tokenType)
+ {
+ // if we're seeing each in this part of the loop, it's not a
+ // syntactic keyword
+ // since we do lookahead when we see "for", checking for "each"
+ case TOKEN_RESERVED_WORD_EACH:
+ treatKeywordAsIdentifier(retVal);
+ processUserDefinedNamespace(retVal, 0);
+ return retVal;
+ case TOKEN_KEYWORD_INCLUDE:
+ {
+ // "followIncludes=false" is usually used for code model
+ // partitioner. They want the "include" token.
+ if (!config.followIncludes)
+ return retVal;
+
+ final ASToken token = LT(1);
+
+ // "include" at EOF is always a keyword
+ if (token == null)
+ return retVal;
+
+ if (!matches(token, TOKEN_LITERAL_STRING))
+ {
+ treatKeywordAsIdentifier(retVal); // it's an identifier
+ processUserDefinedNamespace(retVal, 0);
+ }
+ else
+ {
+ hasEncounteredIncludeStatements = true;
+ // Consume the file path after the include token.
+ consume(1);
+ final String filenameTokenText = token.getText();
+ final String includeString = filenameTokenText.substring(1, filenameTokenText.length() - 1);
+
+ if (sourcePath == null)
+ throw new NullPointerException("Source file is needed for resolving included file path.");
+ IFileSpecification includedFileSpec = null;
+ //respond to problems from our file handler
+ includedFileSpec = includeHandler.getFileSpecificationForInclude(sourcePath, includeString);
+ //
+ if (includedFileSpec == null)
+ {
+ ICompilerProblem problem = new FileNotFoundProblem(token, filenameTokenText); //the text will be the path not found
+ problems.add(problem);
+ retVal = next();
+ return retVal;
+ }
+ if (includeHandler.isCyclicInclude(includedFileSpec.getPath()))
+ {
+ ICompilerProblem problem = new CyclicalIncludesProblem(token);
+ problems.add(problem);
+ retVal = next();
+ return retVal;
+ }
+ else
+ {
+ // Fork a tokenizer for the included file
+ try
+ {
+ forkIncludeTokenizer = createForIncludeFile(this, includedFileSpec, includeHandler);
+ retVal = forkIncludeTokenizer.next();
+ }
+ catch (FileNotFoundException fnfe)
+ {
+ includeHandler.handleFileNotFound(includedFileSpec);
+ ICompilerProblem problem = new FileNotFoundProblem(token, includedFileSpec.getPath());
+ problems.add(problem);
+ retVal = next();
+ return retVal;
+ }
+ }
+ }
+
+ // Recover from compiler problems and continue.
+ if (retVal == null)
+ {
+ // Included file is empty.
+ closeIncludeTokenizer();
+ // Fall back to main source.
+ retVal = this.next();
+ }
+ return retVal;
+ }
+ case TOKEN_RESERVED_WORD_CONFIG:
+ if (matches(LT(1), TOKEN_RESERVED_WORD_NAMESPACE))
+ { //we config namespace
+ retVal.setType(TOKEN_RESERVED_WORD_CONFIG);
+ return retVal;
+ }
+ treatKeywordAsIdentifier(retVal); //identifier
+ processUserDefinedNamespace(retVal, 0);
+ return retVal;
+ case HIDDEN_TOKEN_BUILTIN_NS:
+ if (matches(LT(1), TOKEN_OPERATOR_NS_QUALIFIER))
+ { //we have public:: and this structure is not an annotation but a name ref
+ retVal.setType(TOKEN_NAMESPACE_NAME);
+ return retVal;
+ }
+ retVal.setType(TOKEN_NAMESPACE_ANNOTATION);
+ return retVal;
+ case TOKEN_MODIFIER_DYNAMIC:
+ case TOKEN_MODIFIER_FINAL:
+ case TOKEN_MODIFIER_NATIVE:
+ case TOKEN_MODIFIER_OVERRIDE:
+ case TOKEN_MODIFIER_STATIC:
+ case TOKEN_MODIFIER_VIRTUAL:
+ {
+ // previous token is either a modifier or a namespace, or if
+ // null, assume keyword
+ // next token is from a definition or a modifier or a namespace
+ final ASToken nextToken = LT(1);
+ if (nextToken != null)
+ {
+ switch (nextToken.getType())
+ {
+ case TOKEN_KEYWORD_CLASS:
+ case TOKEN_KEYWORD_FUNCTION:
+ case TOKEN_KEYWORD_INTERFACE:
+ case TOKEN_RESERVED_WORD_NAMESPACE:
+ case TOKEN_KEYWORD_VAR:
+ case TOKEN_KEYWORD_CONST:
+ case TOKEN_MODIFIER_DYNAMIC:
+ case TOKEN_MODIFIER_FINAL:
+ case TOKEN_MODIFIER_NATIVE:
+ case TOKEN_MODIFIER_OVERRIDE:
+ case TOKEN_MODIFIER_STATIC:
+ case TOKEN_MODIFIER_VIRTUAL:
+ case TOKEN_NAMESPACE_ANNOTATION:
+ case TOKEN_NAMESPACE_NAME:
+ case HIDDEN_TOKEN_BUILTIN_NS:
+ return retVal;
+ case TOKEN_IDENTIFIER:
+ if (isUserDefinedNamespace(nextToken, 1)) // we're already looking ahead one so make sure we look ahead one further
+ return retVal;
+ default:
+ // Not applicable to other token types.
+ break;
+ }
+ }
+ treatKeywordAsIdentifier(retVal);
+ processUserDefinedNamespace(retVal, 0);
+ return retVal;
+ }
+ //we combine +/- for numeric literals here
+ case TOKEN_OPERATOR_MINUS:
+ case TOKEN_OPERATOR_PLUS:
+ {
+ if (lastToken == null || !lastToken.canPreceedSignedOperator())
+ {
+ final ASToken nextToken = LT(1);
+ if (nextToken != null)
+ {
+ switch (nextToken.getType())
+ {
+ case TOKEN_LITERAL_NUMBER:
+ case TOKEN_LITERAL_HEX_NUMBER:
+ retVal.setEnd(nextToken.getEnd());
+ final StringBuilder builder = new StringBuilder(retVal.getText());
+ builder.append(nextToken.getText());
+ retVal.setText(poolString(builder.toString()));
+ consume(1);
+ retVal.setType(nextToken.getType());
+ break;
+ default:
+ // ignore other tokens
+ break;
+ }
+ }
+ }
+
+ return retVal;
+ }
+ //RECOGNIZE: for each
+ case TOKEN_KEYWORD_FOR:
+ {
+ final ASToken token = LT(1);
+ if (matches(token, TOKEN_RESERVED_WORD_EACH))
+ {
+ retVal.setEnd(token.getEnd());
+ retVal.setText(FOR_EACH);
+ consume(1);
+ return retVal;
+ }
+ if (lastToken != null)
+ {
+ int lastTokenType = lastToken.getType();
+ switch (lastTokenType)
+ {
+ case TOKEN_KEYWORD_VAR:
+ case TOKEN_KEYWORD_FUNCTION:
+ case TOKEN_RESERVED_WORD_GET:
+ case TOKEN_RESERVED_WORD_SET:
+ case TOKEN_OPERATOR_MEMBER_ACCESS:
+ retVal.setType(TOKEN_IDENTIFIER);
+ }
+ }
+ return retVal;
+ }
+ //RECOGNIZE: default xml namespace
+ //default xml namespace must exist on the same line
+ case TOKEN_KEYWORD_DEFAULT:
+ {
+ final ASToken maybeNS = LT(2);
+ final boolean foundTokenNamespace = maybeNS != null &&
+ maybeNS.getType() == TOKEN_RESERVED_WORD_NAMESPACE;
+ final ASToken maybeXML = LT(1);
+ if (foundTokenNamespace)
+ {
+ final boolean foundTokenXML = maybeXML != null &&
+ maybeXML.getType() == TOKEN_IDENTIFIER &&
+ XML.equals(maybeXML.getText());
+ if (!foundTokenXML)
+ {
+ final ICompilerProblem problem =
+ new ExpectXmlBeforeNamespaceProblem(maybeNS);
+ problems.add(problem);
+ }
+
+ //combine all of these tokens together
+ retVal.setEnd(maybeNS.getEnd());
+ retVal.setText(DEFAULT_XML_NAMESPACE);
+ retVal.setType(TOKEN_DIRECTIVE_DEFAULT_XML);
+ consume(2);
+ }
+ // if this isn't "default xml namespace" then
+ // see if it is the default case in a switch
+ // otherwise, assume it is an identiferName
+ else if (maybeXML != null &&
+ maybeXML.getType() != TOKEN_COLON)
+ retVal.setType(TOKEN_IDENTIFIER);
++ else if (lastToken != null)
++ {
++ int lastTokenType = lastToken.getType();
++ switch (lastTokenType)
++ {
++ case TOKEN_KEYWORD_VAR:
++ case TOKEN_KEYWORD_FUNCTION:
++ case TOKEN_RESERVED_WORD_GET:
++ case TOKEN_RESERVED_WORD_SET:
++ case TOKEN_OPERATOR_MEMBER_ACCESS:
++ retVal.setType(TOKEN_IDENTIFIER);
++ }
++ }
+ return retVal;
+ }
+ case TOKEN_KEYWORD_VOID:
+ {
+ //check for void 0
+ final ASToken token = LT(1);
+ if (matches(token, TOKEN_LITERAL_NUMBER) && ZERO.equals(token.getText()))
+ {
+ retVal.setType(TOKEN_VOID_0);
+ combineText(retVal, token);
+ consume(1);
+ }
+ //check for void(0)
+ else if (matches(token, TOKEN_PAREN_OPEN))
+ {
+ final ASToken zeroT = LT(2);
+ if (matches(zeroT, TOKEN_LITERAL_NUMBER) && ZERO.equals(zeroT.getText()))
+ {
+ final ASToken closeParenT = LT(3);
+ if (matches(closeParenT, TOKEN_PAREN_CLOSE))
+ {
+ combineText(retVal, token);
+ combineText(retVal, zeroT);
+ combineText(retVal, closeParenT);
+ retVal.setType(TOKEN_VOID_0);
+ consume(3);
+ }
+ }
+ }
+ return retVal;
+ }
+ case TOKEN_IDENTIFIER:
+ {
+ //check for user-defined namespace before we return anything
+ processUserDefinedNamespace(retVal, 0);
+ return retVal;
+ }
+ //this is for metadata processing
+ case TOKEN_SQUARE_OPEN:
+ {
+ retVal = tryParseMetadata(retVal);
+ return retVal;
+ }
+ case HIDDEN_TOKEN_STAR_ASSIGNMENT:
+ {
+ //this is to solve an ambiguous case, where we can't tell the difference between
+ //var foo:*=null and foo *= null;
+ retVal.setType(TOKEN_OPERATOR_STAR);
+ retVal.setEnd(retVal.getEnd() - 1);
+ retVal.setText("*");
+ //add the equals
+ final ASToken nextToken = tokenizer.buildToken(TOKEN_OPERATOR_ASSIGNMENT,
+ retVal.getEnd() + 1, retVal.getEnd() + 2,
+ retVal.getLine(), retVal.getColumn(), "=");
+ nextToken.setSourcePath(sourcePath);
+ addTokenToBuffer(nextToken);
+ return retVal;
+ }
+ case TOKEN_SEMICOLON:
+ if (consumeSemi)
+ {
+ return next();
+ }
+ return retVal;
+ case TOKEN_VOID_0:
+ case TOKEN_LITERAL_REGEXP:
+ case TOKEN_COMMA:
+ case TOKEN_COLON:
+ case TOKEN_PAREN_OPEN:
+ case TOKEN_PAREN_CLOSE:
+ case TOKEN_SQUARE_CLOSE:
+ case TOKEN_ELLIPSIS:
+ case TOKEN_OPERATOR_PLUS_ASSIGNMENT:
+ case TOKEN_OPERATOR_MINUS_ASSIGNMENT:
+ case TOKEN_OPERATOR_MULTIPLICATION_ASSIGNMENT:
+ case TOKEN_OPERATOR_DIVISION_ASSIGNMENT:
+ case TOKEN_OPERATOR_MODULO_ASSIGNMENT:
+ case TOKEN_OPERATOR_BITWISE_AND_ASSIGNMENT:
+ case TOKEN_OPERATOR_BITWISE_OR_ASSIGNMENT:
+ case TOKEN_OPERATOR_BITWISE_XOR_ASSIGNMENT:
+ case TOKEN_OPERATOR_BITWISE_LEFT_SHIFT_ASSIGNMENT:
+ case TOKEN_OPERATOR_BITWISE_RIGHT_SHIFT_ASSIGNMENT:
+ case TOKEN_OPERATOR_BITWISE_UNSIGNED_RIGHT_SHIFT_ASSIGNMENT:
+ case TOKEN_OPERATOR_STAR:
+ case TOKEN_OPERATOR_NS_QUALIFIER:
+ case TOKEN_ASDOC_COMMENT:
+ case TOKEN_OPERATOR_DIVISION:
+ case TOKEN_OPERATOR_MODULO:
+ case TOKEN_OPERATOR_BITWISE_LEFT_SHIFT:
+ case TOKEN_OPERATOR_BITWISE_RIGHT_SHIFT:
+ case TOKEN_OPERATOR_BITWISE_UNSIGNED_RIGHT_SHIFT:
+ case TOKEN_OPERATOR_LESS_THAN:
+ case TOKEN_OPERATOR_GREATER_THAN:
+ case TOKEN_OPERATOR_LESS_THAN_EQUALS:
+ case TOKEN_OPERATOR_GREATER_THAN_EQUALS:
+ case TOKEN_OPERATOR_EQUAL:
+ case TOKEN_OPERATOR_NOT_EQUAL:
+ case TOKEN_OPERATOR_STRICT_EQUAL:
+ case TOKEN_OPERATOR_STRICT_NOT_EQUAL:
+ case TOKEN_OPERATOR_BITWISE_AND:
+ case TOKEN_OPERATOR_BITWISE_XOR:
+ case TOKEN_OPERATOR_BITWISE_OR:
+ case TOKEN_OPERATOR_LOGICAL_AND:
+ case TOKEN_OPERATOR_LOGICAL_OR:
+ case TOKEN_OPERATOR_LOGICAL_AND_ASSIGNMENT:
+ case TOKEN_OPERATOR_LOGICAL_OR_ASSIGNMENT:
+ case TOKEN_TYPED_COLLECTION_OPEN:
+ case TOKEN_TYPED_COLLECTION_CLOSE:
+ case TOKEN_OPERATOR_MEMBER_ACCESS:
+ case TOKEN_RESERVED_WORD_NAMESPACE:
+ case TOKEN_RESERVED_WORD_GET:
+ case TOKEN_RESERVED_WORD_SET:
+ case TOKEN_OPERATOR_ASSIGNMENT:
+ case TOKEN_TYPED_LITERAL_CLOSE:
+ case TOKEN_TYPED_LITERAL_OPEN:
+ case TOKEN_OPERATOR_TERNARY:
+ case TOKEN_OPERATOR_DECREMENT:
+ case TOKEN_OPERATOR_INCREMENT:
+ case TOKEN_OPERATOR_ATSIGN:
+ case TOKEN_OPERATOR_BITWISE_NOT:
+ case TOKEN_OPERATOR_LOGICAL_NOT:
+ case TOKEN_E4X_BINDING_CLOSE:
+ case TOKEN_E4X_BINDING_OPEN:
+ case TOKEN_OPERATOR_DESCENDANT_ACCESS:
+ case TOKEN_NAMESPACE_ANNOTATION:
+ case TOKEN_NAMESPACE_NAME:
+ case TOKEN_BLOCK_OPEN:
+ case TOKEN_BLOCK_CLOSE:
+ case TOKEN_KEYWORD_FUNCTION:
+ return retVal;
+ case HIDDEN_TOKEN_MULTI_LINE_COMMENT:
+ case HIDDEN_TOKEN_SINGLE_LINE_COMMENT:
+ if (tokenizer.isCollectingComments())
+ {
+ return retVal;
+ }
+ assert (false);
+ return null;
+ case TOKEN_KEYWORD_INSTANCEOF:
+ case TOKEN_KEYWORD_AS:
+ case TOKEN_KEYWORD_IN:
+ case TOKEN_KEYWORD_IS:
+ if (lastToken != null)
+ {
+ int lastTokenType = lastToken.getType();
+ switch (lastTokenType)
+ {
+ case TOKEN_SEMICOLON:
+ case TOKEN_BLOCK_OPEN:
+ case TOKEN_COMMA:
+ retVal.setType(TOKEN_IDENTIFIER);
+ return retVal;
+ }
+ }
+ else
+ {
+ // we are first token so assume identifier
+ retVal.setType(TOKEN_IDENTIFIER);
+ return retVal;
+ }
+ // and fall through
+ case TOKEN_KEYWORD_DELETE:
+ ASToken nextToken = LT(1);
+ if (nextToken != null)
+ {
+ int nextTokenType = nextToken.getType();
+ switch (nextTokenType)
+ {
+ // if followed by a token assume it is the
+ // keyword and not the identiferName;
+ case TOKEN_IDENTIFIER:
+ return retVal;
+ // followed by a comma or semicolon
+ // probably being used in an expression
+ case TOKEN_COMMA:
+ case TOKEN_SEMICOLON:
+ retVal.setType(TOKEN_IDENTIFIER);
+ return retVal;
+ }
+ }
+ // and fall through
+ case TOKEN_KEYWORD_BREAK:
+ case TOKEN_KEYWORD_CASE:
+ case TOKEN_KEYWORD_CATCH:
+ case TOKEN_KEYWORD_CLASS:
+ case TOKEN_KEYWORD_CONST:
+ case TOKEN_KEYWORD_CONTINUE:
+ case TOKEN_KEYWORD_DO:
+ case TOKEN_KEYWORD_ELSE:
+ case TOKEN_KEYWORD_FALSE:
+ case TOKEN_KEYWORD_FINALLY:
+ case TOKEN_KEYWORD_IF:
+ case TOKEN_KEYWORD_IMPORT:
+ case TOKEN_KEYWORD_INTERFACE:
+ case TOKEN_KEYWORD_NULL:
+ case TOKEN_KEYWORD_PACKAGE:
+ case TOKEN_KEYWORD_SUPER:
+ case TOKEN_KEYWORD_SWITCH:
+ case TOKEN_KEYWORD_THIS:
+ case TOKEN_KEYWORD_TRUE:
+ case TOKEN_KEYWORD_TRY:
+ case TOKEN_KEYWORD_TYPEOF:
+ case TOKEN_KEYWORD_USE:
+ case TOKEN_KEYWORD_VAR:
+ case TOKEN_KEYWORD_WHILE:
+ case TOKEN_KEYWORD_WITH:
+ case TOKEN_KEYWORD_RETURN:
+ case TOKEN_KEYWORD_THROW:
+ case TOKEN_KEYWORD_NEW:
+ if (lastToken != null)
+ {
+ int lastTokenType = lastToken.getType();
+ switch (lastTokenType)
+ {
+ case TOKEN_KEYWORD_VAR:
+ case TOKEN_KEYWORD_FUNCTION:
+ case TOKEN_RESERVED_WORD_GET:
+ case TOKEN_RESERVED_WORD_SET:
+ case TOKEN_OPERATOR_MEMBER_ACCESS:
+ retVal.setType(TOKEN_IDENTIFIER);
+ }
+ }
+ return retVal;
+ default:
+ if (ASToken.isE4X(tokenType))
+ return retVal;
+
+ if (retVal.isKeywordOrContextualReservedWord() || retVal.isLiteral())
+ return retVal;
+
+ // If we reach here, the token fails to match any processing logic.
+ final UnexpectedTokenProblem problem = new UnexpectedTokenProblem(
+ retVal,
+ ASTokenKind.UNKNOWN);
+ problems.add(problem);
+ }
+ }
+ catch (final Exception e)
+ {
+ if (lastException != null)
+ {
+ if (lastException.getClass().isInstance(e))
+ {
+ ICompilerProblem problem = new InternalCompilerProblem2(sourcePath, e, "StreamingASTokenizer");
+ problems.add(problem);
+ return null;
+ }
+ }
+ else
+ {
+ lastException = e;
+ retVal = null;
+ return next();
+ }
+ }
+ finally
+ {
+ consumeSemi = false;
+ lastToken = retVal;
+ }
+ return null;
+ }
+
+ /**
+ * Error recovery: convert the given keyword token into an identifier token,
+ * and log a syntax error.
+ *
+ * @param token Keyword token.
+ */
+ private void treatKeywordAsIdentifier(final ASToken token)
+ {
+ assert token != null : "token can't be null";
+ assert token.isKeywordOrContextualReservedWord() : "only transfer reserved words";
+
+ if (token.isKeyword())
+ {
+ final UnexpectedTokenProblem problem = new UnexpectedTokenProblem(token, ASTokenKind.IDENTIFIER);
+ problems.add(problem);
+ }
+ token.setType(TOKEN_IDENTIFIER);
+ }
+
+ /**
+ * Decide within the current context whether the following content can be
+ * parsed as a metadata tag token.
+ *
+ * @param nextToken The next token coming from
+ * {@link #nextTokenFromReader()}.
+ * @return If the following content can be a metadata tag, the result is a
+ * token of type {@link ASTokenTypes#TOKEN_ATTRIBUTE}. Otherwise, the
+ * argument {@code nextToken} is returned.
+ * @throws Exception Parsing error.
+ */
+ private ASToken tryParseMetadata(ASToken nextToken) throws Exception
+ {
+ // Do not initialize this variable so that Java flow-analysis can check if
+ // the following rules cover all the possibilities.
+ final boolean isNextMetadata;
+
+ if (!config.findMetadata)
+ {
+ // The lexer is configured to not recognize metadata.
+ isNextMetadata = false;
+ }
+ else if (lastToken == null)
+ {
+ // An "[" at the beginning of a script is always a part of a metadata.
+ isNextMetadata = true;
+ }
+ else
+ {
+ switch (lastToken.getType())
+ {
+ case TOKEN_ASDOC_COMMENT:
+ case TOKEN_SEMICOLON:
+ case TOKEN_ATTRIBUTE:
+ case TOKEN_BLOCK_OPEN:
+ // "[" after these tokens are always part of a metadata token.
+ isNextMetadata = true;
+ break;
+
+ case TOKEN_SQUARE_CLOSE:
+ case TOKEN_IDENTIFIER:
+ // "[" following a "]" is an array access.
+ // "[" following an identifier is an array access.
+ isNextMetadata = false;
+ break;
+
+ case TOKEN_KEYWORD_INCLUDE:
+ case TOKEN_BLOCK_CLOSE:
+ case TOKEN_OPERATOR_STAR:
+ // "[" after these tokens are part of a metadata token, if
+ // the "[" is on a new line.
+ isNextMetadata = !lastToken.matchesLine(nextToken);
+ break;
+
+ default:
+ // If we are lexing an entire file
+ // then at this point we "know" that the next token
+ // is not meta-data.
+ if (config.completeContent)
+ {
+ isNextMetadata = false;
+ }
+ else
+ {
+ // In "fragment" mode which is used by the syntax coloring code
+ // in builder, we assume the following list of tokens can not
+ // precede meta-data because they all start or occur in expressions.
+ switch (lastToken.getType())
+ {
+ case TOKEN_OPERATOR_EQUAL:
+ case TOKEN_OPERATOR_TERNARY:
+ case TOKEN_COLON:
+ case TOKEN_OPERATOR_PLUS:
+ case TOKEN_OPERATOR_MINUS:
+ case TOKEN_OPERATOR_STAR:
+ case TOKEN_OPERATOR_DIVISION:
+ case TOKEN_OPERATOR_MODULO:
+ case TOKEN_OPERATOR_BITWISE_AND:
+ case TOKEN_OPERATOR_BITWISE_OR:
+ case TOKEN_KEYWORD_AS:
+ case TOKEN_OPERATOR_BITWISE_XOR:
+ case TOKEN_OPERATOR_LOGICAL_AND:
+ case TOKEN_OPERATOR_LOGICAL_OR:
+ case TOKEN_PAREN_OPEN:
+ case TOKEN_COMMA:
+ case TOKEN_OPERATOR_BITWISE_NOT:
+ case TOKEN_OPERATOR_LOGICAL_NOT:
+ case TOKEN_OPERATOR_ASSIGNMENT:
+ case TOKEN_OPERATOR_BITWISE_LEFT_SHIFT:
+ case TOKEN_OPERATOR_BITWISE_RIGHT_SHIFT:
+ case TOKEN_OPERATOR_BITWISE_UNSIGNED_RIGHT_SHIFT:
+ case TOKEN_OPERATOR_LESS_THAN:
+ case TOKEN_OPERATOR_GREATER_THAN:
+ case TOKEN_OPERATOR_LESS_THAN_EQUALS:
+ case TOKEN_OPERATOR_GREATER_THAN_EQUALS:
+ case TOKEN_OPERATOR_NOT_EQUAL:
+ case TOKEN_OPERATOR_STRICT_EQUAL:
+ case TOKEN_OPERATOR_STRICT_NOT_EQUAL:
+ case TOKEN_OPERATOR_PLUS_ASSIGNMENT:
+ case TOKEN_OPERATOR_MINUS_ASSIGNMENT:
+ case TOKEN_OPERATOR_MULTIPLICATION_ASSIGNMENT:
+ case TOKEN_OPERATOR_DIVISION_ASSIGNMENT:
+ case TOKEN_OPERATOR_MODULO_ASSIGNMENT:
+ case TOKEN_OPERATOR_BITWISE_AND_ASSIGNMENT:
+ case TOKEN_OPERATOR_BITWISE_OR_ASSIGNMENT:
+ case TOKEN_OPERATOR_BITWISE_XOR_ASSIGNMENT:
+ case TOKEN_OPERATOR_BITWISE_LEFT_SHIFT_ASSIGNMENT:
+ case TOKEN_OPERATOR_BITWISE_RIGHT_SHIFT_ASSIGNMENT:
+ case TOKEN_OPERATOR_BITWISE_UNSIGNED_RIGHT_SHIFT_ASSIGNMENT:
+ isNextMetadata = false;
+ break;
+ default:
+ isNextMetadata = true;
+ break;
+ }
+ }
+ break;
+ }
+ }
+
+ final ASToken result;
+ if (isNextMetadata)
+ result = consumeMetadata(nextToken);
+ else
+ result = nextToken;
+
+ return result;
+ }
+
+ /**
+ * Close the forked include file tokenizer, and set it to null.
+ */
+ private void closeIncludeTokenizer()
+ {
+ if (forkIncludeTokenizer == null)
+ return;
+
+ try
+ {
+ problems.addAll(forkIncludeTokenizer.problems);
+ forkIncludeTokenizer.close();
+ }
+ catch (IOException e)
+ {
+ throw new RuntimeException(e);
+ }
+ includeHandler.leaveFile(forkIncludeTokenizer.getEndOffset());
+ forkIncludeTokenizer = null;
+ }
+
+ /**
+ * @throws Exception
+ */
+ private final ASToken consumeMetadata(final ASToken startToken) throws Exception
+ {
+ final ASToken originalToken = new ASToken(startToken);
+ MetaDataPayloadToken payload = new MetaDataPayloadToken(originalToken);
+ final ArrayList<ASToken> safetyNet = new ArrayList<ASToken>(5);
+ boolean isMetadata = true;
+ while (true)
+ {
+ tokenizer.setReuseLastToken();
+ final ASToken next = LT(1);
+ if (next == null)
+ {
+ break;
+ }
+ safetyNet.add(new ASToken(next)); //sadly, we have to deal with the extra object creation if we're wrong
+ payload.addToken(next); //here too
+
+ if (!next.canExistInMetadata())
+ {
+ isMetadata = false;
+ //consume the last token we saw so that we don't get ourselves into an infinite loop
+ //it was the last token of the metadata, and this makes "next" the current token.
+ consume(1);
+ break;
+ }
+ consume(1);
+ if (next.getType() == TOKEN_SQUARE_CLOSE)
+ {
+ break;
+ }
+ }
+ if (!isMetadata)
+ { //we're wrong, so let's add back the tokens to our lookahead buffer
+ lookAheadBuffer.addAll(safetyNet);
+ bufferSize = lookAheadBuffer.size();
+ return originalToken;
+ }
+ return payload;
+
+ }
+
+ private final void fill(final int distance) throws Exception
+ {
+ int pos = 0;
+ while (pos < distance)
+ {
+ addTokenToBuffer(nextTokenFromReader());
+ pos++;
+ }
+ }
+
+ /**
+ * @param nextToken
+ */
+ private final void addTokenToBuffer(final ASToken nextToken)
+ {
+ bufferSize++;
+ lookAheadBuffer.add(nextToken);
+ // at EOF, nextToken can be null.
+ if (nextToken != null)
+ nextToken.lock();
+ }
+
+ /**
+ * Get the pooled version of a given string.
+ *
+ * @param text String literal.
+ * @return Pooled string.
+ */
+ private final String poolString(final String text)
+ {
+ String pooledString = stringPool.get(text);
+ if (pooledString == null)
+ {
+ stringPool.put(text, text);
+ pooledString = text;
+ }
+ return pooledString;
+ }
+
+ /**
+ * Get the next token from the source input. If this tokenizer is created
+ * for a source file by {@link ASC}, and there are files included by
+ * {@code -in} option, the tokenizer will return the
+ * "injected include tokens" before real tokens coming from the JFlex
+ * generated tokenizer.
+ *
+ * @return next token from the source input
+ * @throws IOException error
+ * @see ASCompilationUnit#createMainCompilationUnitForASC()
+ */
+ private final ASToken nextTokenFromReader() throws IOException
+ {
+ final ASToken nextToken;
+ if (ascIncludeImaginaryTokens != null && ascIncludeImaginaryTokens.hasNext())
+ nextToken = ascIncludeImaginaryTokens.next();
+ else if (tokenizer.hasBufferToken())
+ nextToken = tokenizer.getBufferToken();
+ else
+ nextToken = tokenizer.nextToken();
+
+ if (nextToken != null)
+ {
+ // Converting unicode on-the-fly in the lexer is much slower than
+ // converting it here after the token is made, especially for
+ // identifiers.
+ switch (nextToken.getType())
+ {
+ case TOKEN_LITERAL_NUMBER:
+ nextToken.setText(poolString(nextToken.getText()));
+ break;
+ case TOKEN_LITERAL_REGEXP:
+ // Any "backslash-u" entities left after "convertUnicode"
+ // are invalid unicode escape sequences. According to AS3
+ // behavior, the backslash character is dropped.
+ nextToken.setText(poolString(convertUnicode(nextToken.getText()).replaceAll("\\\\u", "u")));
+ break;
+ case TOKEN_IDENTIFIER:
+ // Intern 'identifiers' and 'keywords'.
+ // 'keywords' were 'identifiers' before they are analyzed.
+ final String originalIdentifierName = nextToken.getText();
+ final String normalizedIdentifierName = poolString(convertUnicode(originalIdentifierName));
+ nextToken.setText(normalizedIdentifierName);
+ if (!config.ignoreKeywords)
+ {
+ /**
+ * If the identifier has escaped unicode sequence, it
+ * can't be a keyword.
+ * <p>
+ * According to ASL syntax spec chapter 3.4:
+ * <blockquote> Unicode escape sequences may be used to
+ * spell the names of identifiers that would otherwise
+ * be keywords. This is in contrast to ECMAScript.
+ * </blockquote>
+ */
+ if (originalIdentifierName.equals(normalizedIdentifierName))
+ {
+ // do keyword analysis here
+ final Integer info = keywordToTokenMap.get(nextToken.getText());
+ if (info != null)
+ nextToken.setType(info);
+ }
+ }
+ break;
+ default:
+ // Ignore other tokens.
+ break;
+ }
+
+ //so we want to adjust all of our offsets here, BUT
+ //the column is really only valid for the first line, which is line 0.
+ //if we're not the first line, don't bother
+ nextToken.adjustLocation(
+ offsetAdjustment,
+ lineAdjustment,
+ nextToken.getLine() == 0 ? columnAdjustment : 0);
+ nextToken.storeLocalOffset();
+
+ if (includeHandler != null)
+ {
+ nextToken.setSourcePath(includeHandler.getIncludeStackTop());
+ includeHandler.onNextToken(nextToken);
+ }
+
+ if (nextToken.getSourcePath() == null)
+ nextToken.setSourcePath(sourcePath);
+
+ if (reader instanceof SourceFragmentsReader)
+ ((SourceFragmentsReader)reader).adjustLocation(nextToken);
+ }
+ return nextToken;
+ }
+
+ /**
+ * Consume tokens in the buffer
+ *
+ * @param distance the number of tokens to consume
+ */
+ private final void consume(int distance)
+ {
+ if (bufferSize >= distance)
+ {
+ for (; distance > 0; distance--)
+ {
+ lookAheadBuffer.remove(bufferSize - 1);
+ bufferSize--;
+ }
+ }
+ }
+
+ /**
+ * Returns the next token that will be produced by the underlying lexer
+ *
+ * @param distance distance to look ahead
+ * @return an {@link ASToken}
+ * @throws Exception
+ */
+ private final ASToken LT(final int distance) throws Exception
+ {
+ if (bufferSize < distance)
+ {
+ fill(distance - bufferSize);
+ }
+ return lookAheadBuffer.get(distance - 1);
+ }
+
+ private static final boolean matches(final ASToken token, final int type)
+ {
+ return token != null && token.getType() == type;
+ }
+
+ /**
+ * Retrieve the end offset of the file.
+ * <p>
+ * The result is the end offset of the file, not the offset of the last
+ * token, this allows any trailing space to be included so that the parser
+ * can span the result {@code FileNode} to the entire file.
+ *
+ * @return the end offset of the input file
+ */
+ public final int getEndOffset()
+ {
+ return tokenizer.getOffset() + offsetAdjustment;
+ }
+
+ /**
+ * Computers whether the following token is a user-defined namespace. This
+ * method calls processUserDefinedNamespace which will change token types
+ *
+ * @param token token to start our analysis
+ * @param lookaheadOffset offset of the tokens to look at
+ * @return true if we're a user-defined namespace
+ * @throws Exception
+ */
+ private final boolean isUserDefinedNamespace(final ASToken token, final int lookaheadOffset) throws Exception
+ {
+ processUserDefinedNamespace(token, lookaheadOffset);
+ return token.getType() == TOKEN_NAMESPACE_ANNOTATION || token.getType() == TOKEN_NAMESPACE_NAME;
+ }
+
+ /**
+ * Because AS3 supports qualified/unqualified namespaces as decorators on
+ * definitions, we need to detect them before we even make it to the parser.
+ * These look exactly like names/qnames, and so if they're on the same line
+ * as a definition they might be a namespace name instead of a standard
+ * identifier. This method will detect these cases, and change token types
+ * accordingly
+ *
+ * @param token token token to start our analysis
+ * @param lookaheadOffset offset of the tokens to look at
+ * @throws Exception
+ */
+ private final void processUserDefinedNamespace(final ASToken token, final int lookaheadOffset) throws Exception
+ {
+ token.lock();
+
+ //determine if we have a user-defined namespace
+ //our first token will be an identifier, and the cases we're looking for are:
+ //1.) user_namespace (function|var|dynamic|static|final|native|override)
+ //2.) my.pack.user_namespace (function|var|dynamic|static|final|native|override)
+ //option number 1 is probably the 99% case so optimize for it
+ ASToken nextToken = LT(1 + lookaheadOffset);
+ if (token.matchesLine(nextToken))
+ {
+ // If the next token is an identifier check to see if it should
+ // be modified to a TOKEN_NAMESPACE_ANNOTATION
+ // This is so that code like:
+ // ns1 ns2 var x;
+ // gets parsed correctly (2 namespace annotations, which is an error)
+ if (nextToken.getType() == TOKEN_IDENTIFIER)
+ processUserDefinedNamespace(nextToken, 1 + lookaheadOffset);
+
+ switch (nextToken.getType())
+ {
+ case TOKEN_KEYWORD_FUNCTION:
+ case TOKEN_KEYWORD_VAR:
+ case TOKEN_KEYWORD_CONST:
+ case TOKEN_RESERVED_WORD_NAMESPACE:
+ case TOKEN_MODIFIER_DYNAMIC:
+ case TOKEN_MODIFIER_FINAL:
+ case TOKEN_MODIFIER_NATIVE:
+ case TOKEN_MODIFIER_OVERRIDE:
+ case TOKEN_MODIFIER_STATIC:
+ case TOKEN_MODIFIER_VIRTUAL:
+ case TOKEN_KEYWORD_CLASS:
+ case TOKEN_KEYWORD_INTERFACE:
+ case TOKEN_NAMESPACE_ANNOTATION:
+ case HIDDEN_TOKEN_BUILTIN_NS:
+ token.setType(TOKEN_NAMESPACE_ANNOTATION);
+ return;
+ case TOKEN_OPERATOR_NS_QUALIFIER: //simple name with a :: binding after it. has to be a NS
+ token.setType(TOKEN_NAMESPACE_NAME);
+ return;
+ }
+ if (nextToken.getType() == TOKEN_OPERATOR_MEMBER_ACCESS)
+ {
+ int nextValidPart = TOKEN_IDENTIFIER;
+ final ArrayList<ASToken> toTransform = new ArrayList<ASToken>(3);
+ toTransform.add(token);
+ toTransform.add(nextToken);
+ int laDistance = lookaheadOffset + 1;
+ while (true)
+ {
+ nextToken = LT(++laDistance);
+ if (token.matchesLine(nextToken))
+ {
+ if (nextToken.getType() == nextValidPart)
+ {
+ nextValidPart = (nextToken.getType() == TOKEN_IDENTIFIER) ? TOKEN_OPERATOR_MEMBER_ACCESS : TOKEN_IDENTIFIER;
+ toTransform.add(nextToken);
+ }
+ else if (nextValidPart != TOKEN_IDENTIFIER && nextToken.canFollowUserNamespace())
+ {
+ // Next token is in the follow set of a namespace,
+ // so all the buffered tokens need to be converted
+ // into namespace tokens.
+ for (final ASToken ttToken : toTransform)
+ {
+ if (ttToken.getType() == TOKEN_IDENTIFIER)
+ ttToken.setType(TOKEN_NAMESPACE_ANNOTATION);
+ else
+ ttToken.setType(TOKEN_OPERATOR_MEMBER_ACCESS);
+ }
+ break;
+ }
+ else
+ {
+ break;
+ }
+ }
+ else
+ {
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Combines the text of two tokens, adding whitespace between them and
+ * adjusting offsets appropriately
+ *
+ * @param target the base token that we will add the next to
+ * @param source the source of the text to add
+ */
+ private final void combineText(TokenBase target, TokenBase source)
+ {
+ StringBuilder text = new StringBuilder();
+ text.append(target.getText());
+ //add whitespace for gaps between tokens
+ for (int i = 0; i < (source.getStart() - target.getEnd()); i++)
+ {
+ text.append(" ");
+ }
+ text.append(source.getText());
+ target.setText(poolString(text.toString()));
+ target.setEnd(target.getStart() + text.length());
+ }
+
+ /**
+ * Unicode pattern for {@code \u0000}.
+ */
+ private static final Pattern UNICODE_PATTERN = Pattern.compile(BaseRawASTokenizer.PATTERN_U4);
+
+ /**
+ * Leading characters of a unicode pattern.
+ */
+ private static final String UNICODE_LEADING_CHARS = "\\u";
+
+ /**
+ * Convert escaped unicode sequence in a string. For example:
+ * {@code foo\u0051bar} is converted into {@code fooQbar}.
+ *
+ * @param text input string
+ * @return converted text
+ */
+ static String convertUnicode(final String text)
+ {
+ // Calling Pattern.matcher() is much slower than String.contains(), so
+ // we need this predicate to skip unnecessary RegEx computation.
+ if (text.contains(UNICODE_LEADING_CHARS))
+ {
+ final StringBuilder result = new StringBuilder();
+ final Matcher matcher = UNICODE_PATTERN.matcher(text);
+ int start = 0;
+ while (matcher.find())
+ {
+ result.append(text, start, matcher.start());
+ result.append(Character.toChars(BaseRawASTokenizer.decodeEscapedUnicode(matcher.group())));
+ start = matcher.end();
+ }
+ result.append(text, start, text.length());
+ return result.toString();
+ }
+ else
+ {
+ return text;
+ }
+ }
+
+ /**
+ * Gets the source path to the file being tokenized.
+ */
+ public String getSourcePath()
+ {
+ return sourcePath;
+ }
+}