You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2016/09/11 21:30:45 UTC
[14/50] [abbrv] lucenenet git commit: Ported QueryParser.Simple namespace + tests.

Ported QueryParser.Simple namespace + tests.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/6224f3e2
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/6224f3e2
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/6224f3e2

Branch: refs/heads/master
Commit: 6224f3e295c61defce4a5c6dfd5dd9458b030777
Parents: e45f328
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Mon Aug 1 15:19:34 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Fri Sep 2 22:30:34 2016 +0700

----------------------------------------------------------------------
 .../Lucene.Net.QueryParser.csproj               |   1 +
 .../Simple/SimpleQueryParser.cs                 | 788 +++++++++++++++++++
 .../Lucene.Net.Tests.QueryParser.csproj         |   1 +
 .../Simple/TestSimpleQueryParser.cs             | 728 +++++++++++++++++
 4 files changed, 1518 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6224f3e2/Lucene.Net.QueryParser/Lucene.Net.QueryParser.csproj
----------------------------------------------------------------------
diff --git a/Lucene.Net.QueryParser/Lucene.Net.QueryParser.csproj b/Lucene.Net.QueryParser/Lucene.Net.QueryParser.csproj
index b42ed22..646e931 100644
--- a/Lucene.Net.QueryParser/Lucene.Net.QueryParser.csproj
+++ b/Lucene.Net.QueryParser/Lucene.Net.QueryParser.csproj
@@ -57,6 +57,7 @@
     <Compile Include="Ext\ParserExtension.cs" />
     <Compile Include="Flexible\Standard\CommonQueryParserConfiguration.cs" />
     <Compile Include="Properties\AssemblyInfo.cs" />
+    <Compile Include="Simple\SimpleQueryParser.cs" />
   </ItemGroup>
   <ItemGroup>
     <ProjectReference Include="..\src\Lucene.Net.Analysis.Common\Lucene.Net.Analysis.Common.csproj">

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6224f3e2/Lucene.Net.QueryParser/Simple/SimpleQueryParser.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.QueryParser/Simple/SimpleQueryParser.cs b/Lucene.Net.QueryParser/Simple/SimpleQueryParser.cs
new file mode 100644
index 0000000..8607d27
--- /dev/null
+++ b/Lucene.Net.QueryParser/Simple/SimpleQueryParser.cs
@@ -0,0 +1,788 @@
+\ufeffusing Lucene.Net.Analysis;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using Lucene.Net.Util.Automaton;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+
+namespace Lucene.Net.QueryParser.Simple
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// SimpleQueryParser is used to parse human readable query syntax.
+    /// <p>
+    /// The main idea behind this parser is that a person should be able to type
+    /// whatever they want to represent a query, and this parser will do its best
+    /// to interpret what to search for no matter how poorly composed the request
+    /// may be. Tokens are considered to be any of a term, phrase, or subquery for the
+    /// operations described below.  Whitespace including ' ' '\n' '\r' and '\t'
+    /// and certain operators may be used to delimit tokens ( ) + | " .
+    /// <p>
+    /// Any errors in query syntax will be ignored and the parser will attempt
+    /// to decipher what it can; however, this may mean odd or unexpected results.
+    /// <h4>Query Operators</h4>
+    /// <ul>
+    ///  <li>'{@code +}' specifies {@code AND} operation: <tt>token1+token2</tt>
+    ///  <li>'{@code |}' specifies {@code OR} operation: <tt>token1|token2</tt>
+    ///  <li>'{@code -}' negates a single token: <tt>-token0</tt>
+    ///  <li>'{@code "}' creates phrases of terms: <tt>"term1 term2 ..."</tt>
+    ///  <li>'{@code *}' at the end of terms specifies prefix query: <tt>term*</tt>
+    ///  <li>'{@code ~}N' at the end of terms specifies fuzzy query: <tt>term~1</tt>
+    ///  <li>'{@code ~}N' at the end of phrases specifies near query: <tt>"term1 term2"~5</tt>
+    ///  <li>'{@code (}' and '{@code )}' specifies precedence: <tt>token1 + (token2 | token3)</tt>
+    /// </ul>
+    /// <p>
+    /// The {@link #setDefaultOperator default operator} is {@code OR} if no other operator is specified.
+    /// For example, the following will {@code OR} {@code token1} and {@code token2} together:
+    /// <tt>token1 token2</tt>
+    /// <p>
+    /// Normal operator precedence will be simple order from right to left.
+    /// For example, the following will evaluate {@code token1 OR token2} first,
+    /// then {@code AND} with {@code token3}:
+    /// <blockquote>token1 | token2 + token3</blockquote>
+    /// <h4>Escaping</h4>
+    /// <p>
+    /// An individual term may contain any possible character with certain characters
+    /// requiring escaping using a '{@code \}'.  The following characters will need to be escaped in
+    /// terms and phrases:
+    /// {@code + | " ( ) ' \}
+    /// <p>
+    /// The '{@code -}' operator is a special case.  On individual terms (not phrases) the first
+    /// character of a term that is {@code -} must be escaped; however, any '{@code -}' characters
+    /// beyond the first character do not need to be escaped.
+    /// For example:
+    /// <ul>
+    ///   <li>{@code -term1}   -- Specifies {@code NOT} operation against {@code term1}
+    ///   <li>{@code \-term1}  -- Searches for the term {@code -term1}.
+    ///   <li>{@code term-1}   -- Searches for the term {@code term-1}.
+    ///   <li>{@code term\-1}  -- Searches for the term {@code term-1}.
+    /// </ul>
+    /// <p>
+    /// The '{@code *}' operator is a special case. On individual terms (not phrases) the last
+    /// character of a term that is '{@code *}' must be escaped; however, any '{@code *}' characters
+    /// before the last character do not need to be escaped:
+    /// <ul>
+    ///   <li>{@code term1*}  --  Searches for the prefix {@code term1}
+    ///   <li>{@code term1\*} --  Searches for the term {@code term1*}
+    ///   <li>{@code term*1}  --  Searches for the term {@code term*1}
+    ///   <li>{@code term\*1} --  Searches for the term {@code term*1}
+    /// </ul>
+    /// <p>
+    /// Note that above examples consider the terms before text processing.
+    /// </summary>
+    public class SimpleQueryParser : QueryBuilder
+    {
+        /** Map of fields to query against with their weights */
+        protected readonly IDictionary<string, float> weights;
+
+        // TODO: Make these into a [Flags] enum??
+        /** flags to the parser (to turn features on/off) */
+        protected readonly int flags;
+
+        /** Enables {@code AND} operator (+) */
+        public static readonly int AND_OPERATOR         = 1<<0;
+        /** Enables {@code NOT} operator (-) */
+        public static readonly int NOT_OPERATOR         = 1<<1;
+        /** Enables {@code OR} operator (|) */
+        public static readonly int OR_OPERATOR          = 1<<2;
+        /** Enables {@code PREFIX} operator (*) */
+        public static readonly int PREFIX_OPERATOR      = 1<<3;
+        /** Enables {@code PHRASE} operator (") */
+        public static readonly int PHRASE_OPERATOR      = 1<<4;
+        /** Enables {@code PRECEDENCE} operators: {@code (} and {@code )} */
+        public static readonly int PRECEDENCE_OPERATORS = 1<<5;
+        /** Enables {@code ESCAPE} operator (\) */
+        public static readonly int ESCAPE_OPERATOR      = 1<<6;
+        /** Enables {@code WHITESPACE} operators: ' ' '\n' '\r' '\t' */
+        public static readonly int WHITESPACE_OPERATOR  = 1<<7;
+        /** Enables {@code FUZZY} operators: (~) on single terms */
+        public static readonly int FUZZY_OPERATOR       = 1<<8;
+        /** Enables {@code NEAR} operators: (~) on phrases */
+        public static readonly int NEAR_OPERATOR        = 1<<9;
+
+        private BooleanClause.Occur defaultOperator = BooleanClause.Occur.SHOULD;
+
+        /// <summary>
+        /// Creates a new parser searching over a single field.
+        /// </summary>
+        /// <param name="analyzer"></param>
+        /// <param name="field"></param>
+        public SimpleQueryParser(Analyzer analyzer, string field)
+            : this(analyzer, new HashMap<string, float>() { { field, 1.0F } })
+        {
+        }
+
+        /// <summary>
+        /// Creates a new parser searching over multiple fields with different weights.
+        /// </summary>
+        /// <param name="analyzer"></param>
+        /// <param name="weights"></param>
+        public SimpleQueryParser(Analyzer analyzer, IDictionary<string, float> weights)
+            : this(analyzer, weights, -1)
+        {
+        }
+
+        /// <summary>
+        /// Creates a new parser with custom flags used to enable/disable certain features.
+        /// </summary>
+        /// <param name="analyzer"></param>
+        /// <param name="weights"></param>
+        /// <param name="flags"></param>
+        public SimpleQueryParser(Analyzer analyzer, IDictionary<string, float> weights, int flags)
+            : base(analyzer)
+        {
+            this.weights = weights;
+            this.flags = flags;
+        }
+
+        /// <summary>
+        /// Parses the query text and returns parsed query (or null if empty)
+        /// </summary>
+        /// <param name="queryText"></param>
+        /// <returns></returns>
+        public Query Parse(string queryText)
+        {
+            char[] data = queryText.ToCharArray();
+            char[] buffer = new char[data.Length];
+
+            State state = new State(data, buffer, 0, data.Length);
+            ParseSubQuery(state);
+            return state.Top;
+        }
+
+        private void ParseSubQuery(State state)
+        {
+            while (state.Index < state.Length)
+            {
+                if (state.Data[state.Index] == '(' && (flags & PRECEDENCE_OPERATORS) != 0)
+                {
+                    // the beginning of a subquery has been found
+                    ConsumeSubQuery(state);
+                }
+                else if (state.Data[state.Index] == ')' && (flags & PRECEDENCE_OPERATORS) != 0)
+                {
+                    // this is an extraneous character so it is ignored
+                    ++state.Index;
+                }
+                else if (state.Data[state.Index] == '"' && (flags & PHRASE_OPERATOR) != 0)
+                {
+                    // the beginning of a phrase has been found
+                    ConsumePhrase(state);
+                }
+                else if (state.Data[state.Index] == '+' && (flags & AND_OPERATOR) != 0)
+                {
+                    // an and operation has been explicitly set
+                    // if an operation has already been set this one is ignored
+                    // if a term (or phrase or subquery) has not been found yet the
+                    // operation is also ignored since there is no previous
+                    // term (or phrase or subquery) to and with
+                    if (!state.CurrentOperationIsSet && state.Top != null)
+                    {
+                        state.CurrentOperation = BooleanClause.Occur.MUST;
+                    }
+
+                    ++state.Index;
+                }
+                else if (state.Data[state.Index] == '|' && (flags & OR_OPERATOR) != 0)
+                {
+                    // an or operation has been explicitly set
+                    // if an operation has already been set this one is ignored
+                    // if a term (or phrase or subquery) has not been found yet the
+                    // operation is also ignored since there is no previous
+                    // term (or phrase or subquery) to or with
+                    if (!state.CurrentOperationIsSet && state.Top != null)
+                    {
+                        state.CurrentOperation = BooleanClause.Occur.SHOULD;
+                    }
+
+                    ++state.Index;
+                }
+                else if (state.Data[state.Index] == '-' && (flags & NOT_OPERATOR) != 0)
+                {
+                    // a not operator has been found, so increase the not count
+                    // two not operators in a row negate each other
+                    ++state.Not;
+                    ++state.Index;
+
+                    // continue so the not operator is not reset
+                    // before the next character is determined
+                    continue;
+                }
+                else if ((state.Data[state.Index] == ' '
+                  || state.Data[state.Index] == '\t'
+                  || state.Data[state.Index] == '\n'
+                  || state.Data[state.Index] == '\r') && (flags & WHITESPACE_OPERATOR) != 0)
+                {
+                    // ignore any whitespace found as it may have already been
+                    // used a delimiter across a term (or phrase or subquery)
+                    // or is simply extraneous
+                    ++state.Index;
+                }
+                else
+                {
+                    // the beginning of a token has been found
+                    ConsumeToken(state);
+                }
+
+                // reset the not operator as even whitespace is not allowed when
+                // specifying the not operation for a term (or phrase or subquery)
+                state.Not = 0;
+            }
+        }
+
+        private void ConsumeSubQuery(State state)
+        {
+            Debug.Assert((flags & PRECEDENCE_OPERATORS) != 0);
+            int start = ++state.Index;
+            int precedence = 1;
+            bool escaped = false;
+
+            while (state.Index < state.Length)
+            {
+                if (!escaped)
+                {
+                    if (state.Data[state.Index] == '\\' && (flags & ESCAPE_OPERATOR) != 0)
+                    {
+                        // an escape character has been found so
+                        // whatever character is next will become
+                        // part of the subquery unless the escape
+                        // character is the last one in the data
+                        escaped = true;
+                        ++state.Index;
+
+                        continue;
+                    }
+                    else if (state.Data[state.Index] == '(')
+                    {
+                        // increase the precedence as there is a
+                        // subquery in the current subquery
+                        ++precedence;
+                    }
+                    else if (state.Data[state.Index] == ')')
+                    {
+                        --precedence;
+
+                        if (precedence == 0)
+                        {
+                            // this should be the end of the subquery
+                            // all characters found will used for
+                            // creating the subquery
+                            break;
+                        }
+                    }
+                }
+
+                escaped = false;
+                ++state.Index;
+            }
+
+            if (state.Index == state.Length)
+            {
+                // a closing parenthesis was never found so the opening
+                // parenthesis is considered extraneous and will be ignored
+                state.Index = start;
+            }
+            else if (state.Index == start)
+            {
+                // a closing parenthesis was found immediately after the opening
+                // parenthesis so the current operation is reset since it would
+                // have been applied to this subquery
+                state.CurrentOperationIsSet = false;
+
+                ++state.Index;
+            }
+            else
+            {
+                // a complete subquery has been found and is recursively parsed by
+                // starting over with a new state object
+                State subState = new State(state.Data, state.Buffer, start, state.Index);
+                ParseSubQuery(subState);
+                BuildQueryTree(state, subState.Top);
+
+                ++state.Index;
+            }
+        }
+
+        private void ConsumePhrase(State state)
+        {
+            Debug.Assert((flags & PHRASE_OPERATOR) != 0);
+            int start = ++state.Index;
+            int copied = 0;
+            bool escaped = false;
+            bool hasSlop = false;
+
+            while (state.Index < state.Length)
+            {
+                if (!escaped)
+                {
+                    if (state.Data[state.Index] == '\\' && (flags & ESCAPE_OPERATOR) != 0)
+                    {
+                        // an escape character has been found so
+                        // whatever character is next will become
+                        // part of the phrase unless the escape
+                        // character is the last one in the data
+                        escaped = true;
+                        ++state.Index;
+
+                        continue;
+                    }
+                    else if (state.Data[state.Index] == '"')
+                    {
+                        // if there are still characters after the closing ", check for a
+                        // tilde
+                        if (state.Length > (state.Index + 1) &&
+                            state.Data[state.Index + 1] == '~' &&
+                            (flags & NEAR_OPERATOR) != 0)
+                        {
+                            state.Index++;
+                            // check for characters after the tilde
+                            if (state.Length > (state.Index + 1))
+                            {
+                                hasSlop = true;
+                            }
+                            break;
+                        }
+                        else
+                        {
+                            // this should be the end of the phrase
+                            // all characters found will used for
+                            // creating the phrase query
+                            break;
+                        }
+                    }
+                }
+
+                escaped = false;
+                state.Buffer[copied++] = state.Data[state.Index++];
+            }
+
+            if (state.Index == state.Length)
+            {
+                // a closing double quote was never found so the opening
+                // double quote is considered extraneous and will be ignored
+                state.Index = start;
+            }
+            else if (state.Index == start)
+            {
+                // a closing double quote was found immediately after the opening
+                // double quote so the current operation is reset since it would
+                // have been applied to this phrase
+                state.CurrentOperationIsSet = false;
+
+                ++state.Index;
+            }
+            else
+            {
+                // a complete phrase has been found and is parsed through
+                // through the analyzer from the given field
+                string phrase = new string(state.Buffer, 0, copied);
+                Query branch;
+                if (hasSlop)
+                {
+                    branch = NewPhraseQuery(phrase, ParseFuzziness(state));
+                }
+                else
+                {
+                    branch = NewPhraseQuery(phrase, 0);
+                }
+                BuildQueryTree(state, branch);
+
+                ++state.Index;
+            }
+        }
+
+        private void ConsumeToken(State state)
+        {
+            int copied = 0;
+            bool escaped = false;
+            bool prefix = false;
+            bool fuzzy = false;
+
+            while (state.Index < state.Length)
+            {
+                if (!escaped)
+                {
+                    if (state.Data[state.Index] == '\\' && (flags & ESCAPE_OPERATOR) != 0)
+                    {
+                        // an escape character has been found so
+                        // whatever character is next will become
+                        // part of the term unless the escape
+                        // character is the last one in the data
+                        escaped = true;
+                        prefix = false;
+                        ++state.Index;
+
+                        continue;
+                    }
+                    else if (TokenFinished(state))
+                    {
+                        // this should be the end of the term
+                        // all characters found will used for
+                        // creating the term query
+                        break;
+                    }
+                    else if (copied > 0 && state.Data[state.Index] == '~' && (flags & FUZZY_OPERATOR) != 0)
+                    {
+                        fuzzy = true;
+                        break;
+                    }
+
+                    // wildcard tracks whether or not the last character
+                    // was a '*' operator that hasn't been escaped
+                    // there must be at least one valid character before
+                    // searching for a prefixed set of terms
+                    prefix = copied > 0 && state.Data[state.Index] == '*' && (flags & PREFIX_OPERATOR) != 0;
+                }
+
+                escaped = false;
+                state.Buffer[copied++] = state.Data[state.Index++];
+            }
+
+            if (copied > 0)
+            {
+                Query branch;
+
+                if (fuzzy && (flags & FUZZY_OPERATOR) != 0)
+                {
+                    string token = new string(state.Buffer, 0, copied);
+                    int fuzziness = ParseFuzziness(state);
+                    // edit distance has a maximum, limit to the maximum supported
+                    fuzziness = Math.Min(fuzziness, LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
+                    if (fuzziness == 0)
+                    {
+                        branch = NewDefaultQuery(token);
+                    }
+                    else
+                    {
+                        branch = NewFuzzyQuery(token, fuzziness);
+                    }
+                }
+                else if (prefix)
+                {
+                    // if a term is found with a closing '*' it is considered to be a prefix query
+                    // and will have prefix added as an option
+                    string token = new string(state.Buffer, 0, copied - 1);
+                    branch = NewPrefixQuery(token);
+                }
+                else
+                {
+                    // a standard term has been found so it will be run through
+                    // the entire analysis chain from the specified schema field
+                    string token = new string(state.Buffer, 0, copied);
+                    branch = NewDefaultQuery(token);
+                }
+
+                BuildQueryTree(state, branch);
+            }
+        }
+
+        /// <summary>
+        /// buildQueryTree should be called after a term, phrase, or subquery
+        /// is consumed to be added to our existing query tree
+        /// this method will only add to the existing tree if the branch contained in state is not null
+        /// </summary>
+        /// <param name="state"></param>
+        /// <param name="branch"></param>
+        private void BuildQueryTree(State state, Query branch)
+        {
+            if (branch != null)
+            {
+                // modify our branch to a BooleanQuery wrapper for not
+                // this is necessary any time a term, phrase, or subquery is negated
+                if (state.Not % 2 == 1)
+                {
+                    BooleanQuery nq = new BooleanQuery();
+                    nq.Add(branch, BooleanClause.Occur.MUST_NOT);
+                    nq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
+                    branch = nq;
+                }
+
+                // first term (or phrase or subquery) found and will begin our query tree
+                if (state.Top == null)
+                {
+                    state.Top = branch;
+                }
+                else
+                {
+                    // more than one term (or phrase or subquery) found
+                    // set currentOperation to the default if no other operation is explicitly set
+                    if (!state.CurrentOperationIsSet)
+                    {
+                        state.CurrentOperation = defaultOperator;
+                    }
+
+                    // operational change requiring a new parent node
+                    // this occurs if the previous operation is not the same as current operation
+                    // because the previous operation must be evaluated separately to preserve
+                    // the proper precedence and the current operation will take over as the top of the tree
+                    if (!state.PreviousOperationIsSet || state.PreviousOperation != state.CurrentOperation)
+                    {
+                        BooleanQuery bq = new BooleanQuery();
+                        bq.Add(state.Top, state.CurrentOperation);
+                        state.Top = bq;
+                    }
+
+                    // reset all of the state for reuse
+                    ((BooleanQuery)state.Top).Add(branch, state.CurrentOperation);
+                    state.PreviousOperation = state.CurrentOperation;
+                }
+
+                // reset the current operation as it was intended to be applied to
+                // the incoming term (or phrase or subquery) even if branch was null
+                // due to other possible errors
+                state.CurrentOperationIsSet = false;
+            }
+        }
+
+        /// <summary>
+        /// Helper parsing fuzziness from parsing state
+        /// </summary>
+        /// <param name="state"></param>
+        /// <returns>slop/edit distance, 0 in the case of non-parsing slop/edit string</returns>
+        private int ParseFuzziness(State state)
+        {
+            char[] slopText = new char[state.Length];
+            int slopLength = 0;
+
+            if (state.Data[state.Index] == '~')
+            {
+                while (state.Index < state.Length)
+                {
+                    state.Index++;
+                    // it's possible that the ~ was at the end, so check after incrementing
+                    // to make sure we don't go out of bounds
+                    if (state.Index < state.Length)
+                    {
+                        if (TokenFinished(state))
+                        {
+                            break;
+                        }
+                        slopText[slopLength] = state.Data[state.Index];
+                        slopLength++;
+                    }
+                }
+                int fuzziness = 0;
+                int.TryParse(new string(slopText, 0, slopLength), out fuzziness);
+                // negative -> 0
+                if (fuzziness < 0)
+                {
+                    fuzziness = 0;
+                }
+                return fuzziness;
+            }
+            return 0;
+        }
+
+        /// <summary>
+        /// Helper returning true if the state has reached the end of token.
+        /// </summary>
+        /// <param name="state"></param>
+        /// <returns></returns>
+        private bool TokenFinished(State state)
+        {
+            if ((state.Data[state.Index] == '"' && (flags & PHRASE_OPERATOR) != 0)
+                || (state.Data[state.Index] == '|' && (flags & OR_OPERATOR) != 0)
+                || (state.Data[state.Index] == '+' && (flags & AND_OPERATOR) != 0)
+                || (state.Data[state.Index] == '(' && (flags & PRECEDENCE_OPERATORS) != 0)
+                || (state.Data[state.Index] == ')' && (flags & PRECEDENCE_OPERATORS) != 0)
+                || ((state.Data[state.Index] == ' '
+                || state.Data[state.Index] == '\t'
+                || state.Data[state.Index] == '\n'
+                || state.Data[state.Index] == '\r') && (flags & WHITESPACE_OPERATOR) != 0))
+            {
+                return true;
+            }
+            return false;
+        }
+
+        /// <summary>
+        /// Factory method to generate a standard query (no phrase or prefix operators).
+        /// </summary>
+        /// <param name="text"></param>
+        /// <returns></returns>
+        protected virtual Query NewDefaultQuery(string text)
+        {
+            BooleanQuery bq = new BooleanQuery(true);
+            foreach (var entry in weights)
+            {
+                Query q = CreateBooleanQuery(entry.Key, text, defaultOperator);
+                if (q != null)
+                {
+                    q.Boost = entry.Value;
+                    bq.Add(q, BooleanClause.Occur.SHOULD);
+                }
+            }
+            return Simplify(bq);
+        }
+
+        /// <summary>
+        /// Factory method to generate a fuzzy query.
+        /// </summary>
+        /// <param name="text"></param>
+        /// <param name="fuzziness"></param>
+        /// <returns></returns>
+        protected virtual Query NewFuzzyQuery(string text, int fuzziness)
+        {
+            BooleanQuery bq = new BooleanQuery(true);
+            foreach (var entry in weights)
+            {
+                Query q = new FuzzyQuery(new Term(entry.Key, text), fuzziness);
+                if (q != null)
+                {
+                    q.Boost = entry.Value;
+                    bq.Add(q, BooleanClause.Occur.SHOULD);
+                }
+            }
+            return Simplify(bq);
+        }
+
+        /// <summary>
+        /// Factory method to generate a phrase query with slop.
+        /// </summary>
+        /// <param name="text"></param>
+        /// <param name="slop"></param>
+        /// <returns></returns>
+        protected virtual Query NewPhraseQuery(string text, int slop)
+        {
+            BooleanQuery bq = new BooleanQuery(true);
+            foreach (var entry in weights)
+            {
+                Query q = CreatePhraseQuery(entry.Key, text, slop);
+                if (q != null)
+                {
+                    q.Boost = entry.Value;
+                    bq.Add(q, BooleanClause.Occur.SHOULD);
+                }
+            }
+            return Simplify(bq);
+        }
+
+        /// <summary>
+        /// Factory method to generate a prefix query.
+        /// </summary>
+        /// <param name="text"></param>
+        /// <returns></returns>
+        protected virtual Query NewPrefixQuery(string text)
+        {
+            BooleanQuery bq = new BooleanQuery(true);
+            foreach (var entry in weights)
+            {
+                PrefixQuery prefix = new PrefixQuery(new Term(entry.Key, text));
+                prefix.Boost = entry.Value;
+                bq.Add(prefix, BooleanClause.Occur.SHOULD);
+            }
+            return Simplify(bq);
+        }
+
+        /// <summary>
+        /// Helper to simplify boolean queries with 0 or 1 clause
+        /// </summary>
+        /// <param name="bq"></param>
+        /// <returns></returns>
+        protected virtual Query Simplify(BooleanQuery bq)
+        {
+            if (!bq.Clauses.Any())
+            {
+                return null;
+            }
+            else if (bq.Clauses.Length == 1)
+            {
+                return bq.Clauses[0].Query;
+            }
+            else
+            {
+                return bq;
+            }
+        }
+
+        /// <summary>
+        /// Gets or Sets the implicit operator setting, which will be
+        /// either {@code SHOULD} or {@code MUST}.
+        /// </summary>
+        public virtual BooleanClause.Occur DefaultOperator
+        {
+            get { return defaultOperator; }
+            set { defaultOperator = value; }
+        }
+
+
+        public class State
+        {
+            //private readonly char[] data;   // the characters in the query string
+            //private readonly char[] buffer; // a temporary buffer used to reduce necessary allocations
+            //private int index;
+            //private int length;
+
+            private BooleanClause.Occur currentOperation;
+            private BooleanClause.Occur previousOperation;
+            //private int not;
+
+            //private Query top;
+
+            internal State(char[] data, char[] buffer, int index, int length)
+            {
+                this.Data = data;
+                this.Buffer = buffer;
+                this.Index = index;
+                this.Length = length;
+            }
+
+            public char[] Data { get; protected set; } // the characters in the query string
+            public char[] Buffer { get; protected set; } // a temporary buffer used to reduce necessary allocations
+            public int Index { get; set; }
+            public int Length { get; protected set; }
+
+            public BooleanClause.Occur CurrentOperation 
+            {
+                get 
+                { 
+                    return currentOperation; 
+                }
+                set
+                {
+                    currentOperation = value;
+                    CurrentOperationIsSet = true;
+                }
+            }
+
+            public BooleanClause.Occur PreviousOperation
+            {
+                get
+                {
+                    return previousOperation;
+                }
+                set
+                {
+                    previousOperation = value;
+                    PreviousOperationIsSet = true;
+                }
+            }
+
+            public bool CurrentOperationIsSet { get; set; }
+            public bool PreviousOperationIsSet { get; set; }
+
+            public int Not { get; set; }
+            public Query Top { get; set; }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6224f3e2/Lucene.Net.Tests.QueryParser/Lucene.Net.Tests.QueryParser.csproj
----------------------------------------------------------------------
diff --git a/Lucene.Net.Tests.QueryParser/Lucene.Net.Tests.QueryParser.csproj b/Lucene.Net.Tests.QueryParser/Lucene.Net.Tests.QueryParser.csproj
index adab182..bcf9568 100644
--- a/Lucene.Net.Tests.QueryParser/Lucene.Net.Tests.QueryParser.csproj
+++ b/Lucene.Net.Tests.QueryParser/Lucene.Net.Tests.QueryParser.csproj
@@ -53,6 +53,7 @@
     <Compile Include="Ext\TestExtensions.cs" />
     <Compile Include="Properties\AssemblyInfo.cs" />
     <Compile Include="Classic\TestMultiAnalyzer.cs" />
+    <Compile Include="Simple\TestSimpleQueryParser.cs" />
     <Compile Include="Util\QueryParserTestBase.cs" />
   </ItemGroup>
   <ItemGroup>

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6224f3e2/Lucene.Net.Tests.QueryParser/Simple/TestSimpleQueryParser.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Tests.QueryParser/Simple/TestSimpleQueryParser.cs b/Lucene.Net.Tests.QueryParser/Simple/TestSimpleQueryParser.cs
new file mode 100644
index 0000000..0a9d49f
--- /dev/null
+++ b/Lucene.Net.Tests.QueryParser/Simple/TestSimpleQueryParser.cs
@@ -0,0 +1,728 @@
+\ufeffusing Lucene.Net.Analysis;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using Lucene.Net.Util.Automaton;
+using NUnit.Framework;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Lucene.Net.QueryParser.Simple
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests for <see cref="SimpleQueryParser"/>
+    /// </summary>
+    [TestFixture]
+    public class TestSimpleQueryParser : LuceneTestCase
+    {
+        /// <summary>
+        /// helper to parse a query with whitespace+lowercase analyzer across "field",
+        /// with default operator of MUST
+        /// </summary>
+        /// <param name="text"></param>
+        /// <returns></returns>
+        private Query Parse(string text)
+        {
+            Analyzer analyzer = new MockAnalyzer(Random());
+            SimpleQueryParser parser = new SimpleQueryParser(analyzer, "field");
+            parser.DefaultOperator = BooleanClause.Occur.MUST;
+            return parser.Parse(text);
+        }
+
+        /// <summary>
+        /// helper to parse a query with whitespace+lowercase analyzer across "field",
+        /// with default operator of MUST
+        /// </summary>
+        /// <param name="text"></param>
+        /// <param name="flags"></param>
+        /// <returns></returns>
+        private Query Parse(string text, int flags)
+        {
+            Analyzer analyzer = new MockAnalyzer(Random());
+            SimpleQueryParser parser = new SimpleQueryParser(analyzer, new HashMap<string, float>() { { "field", 1f } }, flags);
+            parser.DefaultOperator = BooleanClause.Occur.MUST;
+            return parser.Parse(text);
+        }
+
+        /** test a simple term */
+        [Test]
+        public void TestTerm()
+        {
+            Query expected = new TermQuery(new Term("field", "foobar"));
+
+            assertEquals(expected, Parse("foobar"));
+        }
+
+        /** test a fuzzy query */
+        [Test]
+        public void TestFuzzy()
+        {
+            Query regular = new TermQuery(new Term("field", "foobar"));
+            Query expected = new FuzzyQuery(new Term("field", "foobar"), 2);
+
+            assertEquals(expected, Parse("foobar~2"));
+            assertEquals(regular, Parse("foobar~"));
+            assertEquals(regular, Parse("foobar~a"));
+            assertEquals(regular, Parse("foobar~1a"));
+
+            BooleanQuery @bool = new BooleanQuery();
+            FuzzyQuery fuzzy = new FuzzyQuery(new Term("field", "foo"), LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
+            @bool.Add(fuzzy, BooleanClause.Occur.MUST);
+            @bool.Add(new TermQuery(new Term("field", "bar")), BooleanClause.Occur.MUST);
+
+            assertEquals(@bool, Parse("foo~" + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE + 1 + " bar"));
+        }
+
+        /** test a simple phrase */
+        [Test]
+        public void TestPhrase()
+        {
+            PhraseQuery expected = new PhraseQuery();
+            expected.Add(new Term("field", "foo"));
+            expected.Add(new Term("field", "bar"));
+
+            assertEquals(expected, Parse("\"foo bar\""));
+        }
+
+        /** test a simple phrase with various slop settings */
+        [Test]
+        public void TestPhraseWithSlop()
+        {
+            PhraseQuery expectedWithSlop = new PhraseQuery();
+            expectedWithSlop.Add(new Term("field", "foo"));
+            expectedWithSlop.Add(new Term("field", "bar"));
+            expectedWithSlop.Slop = (2);
+
+            assertEquals(expectedWithSlop, Parse("\"foo bar\"~2"));
+
+            PhraseQuery expectedWithMultiDigitSlop = new PhraseQuery();
+            expectedWithMultiDigitSlop.Add(new Term("field", "foo"));
+            expectedWithMultiDigitSlop.Add(new Term("field", "bar"));
+            expectedWithMultiDigitSlop.Slop = (10);
+
+            assertEquals(expectedWithMultiDigitSlop, Parse("\"foo bar\"~10"));
+
+            PhraseQuery expectedNoSlop = new PhraseQuery();
+            expectedNoSlop.Add(new Term("field", "foo"));
+            expectedNoSlop.Add(new Term("field", "bar"));
+
+            assertEquals("Ignore trailing tilde with no slop", expectedNoSlop, Parse("\"foo bar\"~"));
+            assertEquals("Ignore non-numeric trailing slop", expectedNoSlop, Parse("\"foo bar\"~a"));
+            assertEquals("Ignore non-numeric trailing slop", expectedNoSlop, Parse("\"foo bar\"~1a"));
+            assertEquals("Ignore negative trailing slop", expectedNoSlop, Parse("\"foo bar\"~-1"));
+
+            PhraseQuery pq = new PhraseQuery();
+            pq.Add(new Term("field", "foo"));
+            pq.Add(new Term("field", "bar"));
+            pq.Slop = (12);
+
+            BooleanQuery expectedBoolean = new BooleanQuery();
+            expectedBoolean.Add(pq, BooleanClause.Occur.MUST);
+            expectedBoolean.Add(new TermQuery(new Term("field", "baz")), BooleanClause.Occur.MUST);
+
+            assertEquals(expectedBoolean, Parse("\"foo bar\"~12 baz"));
+        }
+
+        /** test a simple prefix */
+        [Test]
+        public void TestPrefix()
+        {
+            PrefixQuery expected = new PrefixQuery(new Term("field", "foobar"));
+
+            assertEquals(expected, Parse("foobar*"));
+        }
+
+        /** test some AND'd terms using '+' operator */
+        [Test]
+        public void TestAND()
+        {
+            BooleanQuery expected = new BooleanQuery();
+            expected.Add(new TermQuery(new Term("field", "foo")), BooleanClause.Occur.MUST);
+            expected.Add(new TermQuery(new Term("field", "bar")), BooleanClause.Occur.MUST);
+
+            assertEquals(expected, Parse("foo+bar"));
+        }
+
+        /** test some AND'd phrases using '+' operator */
+        [Test]
+        public void TestANDPhrase()
+        {
+            PhraseQuery phrase1 = new PhraseQuery();
+            phrase1.Add(new Term("field", "foo"));
+            phrase1.Add(new Term("field", "bar"));
+            PhraseQuery phrase2 = new PhraseQuery();
+            phrase2.Add(new Term("field", "star"));
+            phrase2.Add(new Term("field", "wars"));
+            BooleanQuery expected = new BooleanQuery();
+            expected.Add(phrase1, BooleanClause.Occur.MUST);
+            expected.Add(phrase2, BooleanClause.Occur.MUST);
+
+            assertEquals(expected, Parse("\"foo bar\"+\"star wars\""));
+        }
+
+        /** test some AND'd terms (just using whitespace) */
+        [Test]
+        public void TestANDImplicit()
+        {
+            BooleanQuery expected = new BooleanQuery();
+            expected.Add(new TermQuery(new Term("field", "foo")), BooleanClause.Occur.MUST);
+            expected.Add(new TermQuery(new Term("field", "bar")), BooleanClause.Occur.MUST);
+
+            assertEquals(expected, Parse("foo bar"));
+        }
+
+        /** test some OR'd terms */
+        [Test]
+        public void TestOR()
+        {
+            BooleanQuery expected = new BooleanQuery();
+            expected.Add(new TermQuery(new Term("field", "foo")), BooleanClause.Occur.SHOULD);
+            expected.Add(new TermQuery(new Term("field", "bar")), BooleanClause.Occur.SHOULD);
+
+            assertEquals(expected, Parse("foo|bar"));
+            assertEquals(expected, Parse("foo||bar"));
+        }
+
+        /** test some OR'd terms (just using whitespace) */
+        [Test]
+        public void TestORImplicit()
+        {
+            BooleanQuery expected = new BooleanQuery();
+            expected.Add(new TermQuery(new Term("field", "foo")), BooleanClause.Occur.SHOULD);
+            expected.Add(new TermQuery(new Term("field", "bar")), BooleanClause.Occur.SHOULD);
+
+            SimpleQueryParser parser = new SimpleQueryParser(new MockAnalyzer(Random()), "field");
+            assertEquals(expected, parser.Parse("foo bar"));
+        }
+
+        /** test some OR'd phrases using '|' operator */
+        [Test]
+        public void TestORPhrase()
+        {
+            PhraseQuery phrase1 = new PhraseQuery();
+            phrase1.Add(new Term("field", "foo"));
+            phrase1.Add(new Term("field", "bar"));
+            PhraseQuery phrase2 = new PhraseQuery();
+            phrase2.Add(new Term("field", "star"));
+            phrase2.Add(new Term("field", "wars"));
+            BooleanQuery expected = new BooleanQuery();
+            expected.Add(phrase1, BooleanClause.Occur.SHOULD);
+            expected.Add(phrase2, BooleanClause.Occur.SHOULD);
+
+            assertEquals(expected, Parse("\"foo bar\"|\"star wars\""));
+        }
+
+        /** test negated term */
+        [Test]
+        public void TestNOT()
+        {
+            BooleanQuery expected = new BooleanQuery();
+            expected.Add(new TermQuery(new Term("field", "foo")), BooleanClause.Occur.MUST_NOT);
+            expected.Add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
+
+            assertEquals(expected, Parse("-foo"));
+            assertEquals(expected, Parse("-(foo)"));
+            assertEquals(expected, Parse("---foo"));
+        }
+
+        /** test crazy prefixes with multiple asterisks */
+        [Test]
+        public void TestCrazyPrefixes1()
+        {
+            Query expected = new PrefixQuery(new Term("field", "st*ar"));
+
+            assertEquals(expected, Parse("st*ar*"));
+        }
+
+        /** test prefixes with some escaping */
+        [Test]
+        public void TestCrazyPrefixes2()
+        {
+            Query expected = new PrefixQuery(new Term("field", "st*ar\\*"));
+
+            assertEquals(expected, Parse("st*ar\\\\**"));
+        }
+
+        /** not a prefix query! the prefix operator is escaped */
+        [Test]
+        public void TestTermInDisguise()
+        {
+            Query expected = new TermQuery(new Term("field", "st*ar\\*"));
+
+            assertEquals(expected, Parse("sT*Ar\\\\\\*"));
+        }
+
+        // a number of test cases here have garbage/errors in
+        // the syntax passed in to test that the query can
+        // still be interpreted as a guess to what the human
+        // input was trying to be
+
+        [Test]
+        public void TestGarbageTerm()
+        {
+            Query expected = new TermQuery(new Term("field", "star"));
+
+            assertEquals(expected, Parse("star"));
+            assertEquals(expected, Parse("star\n"));
+            assertEquals(expected, Parse("star\r"));
+            assertEquals(expected, Parse("star\t"));
+            assertEquals(expected, Parse("star("));
+            assertEquals(expected, Parse("star)"));
+            assertEquals(expected, Parse("star\""));
+            assertEquals(expected, Parse("\t \r\n\nstar   \n \r \t "));
+            assertEquals(expected, Parse("- + \"\" - star \\"));
+        }
+
+        [Test]
+        public void TestGarbageEmpty()
+        {
+            assertNull(Parse(""));
+            assertNull(Parse("  "));
+            assertNull(Parse("  "));
+            assertNull(Parse("\\ "));
+            assertNull(Parse("\\ \\ "));
+            assertNull(Parse("\"\""));
+            assertNull(Parse("\" \""));
+            assertNull(Parse("\" \"|\" \""));
+            assertNull(Parse("(\" \"|\" \")"));
+            assertNull(Parse("\" \" \" \""));
+            assertNull(Parse("(\" \" \" \")"));
+        }
+
+        [Test]
+        public void TestGarbageAND()
+        {
+            BooleanQuery expected = new BooleanQuery();
+            expected.Add(new TermQuery(new Term("field", "star")), BooleanClause.Occur.MUST);
+            expected.Add(new TermQuery(new Term("field", "wars")), BooleanClause.Occur.MUST);
+
+            assertEquals(expected, Parse("star wars"));
+            assertEquals(expected, Parse("star+wars"));
+            assertEquals(expected, Parse("     star     wars   "));
+            assertEquals(expected, Parse("     star +    wars   "));
+            assertEquals(expected, Parse("  |     star + + |   wars   "));
+            assertEquals(expected, Parse("  |     star + + |   wars   \\"));
+        }
+
+        [Test]
+        public void TestGarbageOR()
+        {
+            BooleanQuery expected = new BooleanQuery();
+            expected.Add(new TermQuery(new Term("field", "star")), BooleanClause.Occur.SHOULD);
+            expected.Add(new TermQuery(new Term("field", "wars")), BooleanClause.Occur.SHOULD);
+
+            assertEquals(expected, Parse("star|wars"));
+            assertEquals(expected, Parse("     star |    wars   "));
+            assertEquals(expected, Parse("  |     star | + |   wars   "));
+            assertEquals(expected, Parse("  +     star | + +   wars   \\"));
+        }
+
+        [Test]
+        public void TestGarbageNOT()
+        {
+            BooleanQuery expected = new BooleanQuery();
+            expected.Add(new TermQuery(new Term("field", "star")), BooleanClause.Occur.MUST_NOT);
+            expected.Add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
+
+            assertEquals(expected, Parse("-star"));
+            assertEquals(expected, Parse("---star"));
+            assertEquals(expected, Parse("- -star -"));
+        }
+
+        [Test]
+        public void TestGarbagePhrase()
+        {
+            PhraseQuery expected = new PhraseQuery();
+            expected.Add(new Term("field", "star"));
+            expected.Add(new Term("field", "wars"));
+
+            assertEquals(expected, Parse("\"star wars\""));
+            assertEquals(expected, Parse("\"star wars\\ \""));
+            assertEquals(expected, Parse("\"\" | \"star wars\""));
+            assertEquals(expected, Parse("          \"star wars\"        \"\"\\"));
+        }
+
+        [Test]
+        public void TestGarbageSubquery()
+        {
+            Query expected = new TermQuery(new Term("field", "star"));
+
+            assertEquals(expected, Parse("(star)"));
+            assertEquals(expected, Parse("(star))"));
+            assertEquals(expected, Parse("((star)"));
+            assertEquals(expected, Parse("     -()(star)        \n\n\r     "));
+            assertEquals(expected, Parse("| + - ( + - |      star    \n      ) \n"));
+        }
+
+        [Test]
+        public void TestCompoundAnd()
+        {
+            BooleanQuery expected = new BooleanQuery();
+            expected.Add(new TermQuery(new Term("field", "star")), BooleanClause.Occur.MUST);
+            expected.Add(new TermQuery(new Term("field", "wars")), BooleanClause.Occur.MUST);
+            expected.Add(new TermQuery(new Term("field", "empire")), BooleanClause.Occur.MUST);
+
+            assertEquals(expected, Parse("star wars empire"));
+            assertEquals(expected, Parse("star+wars + empire"));
+            assertEquals(expected, Parse(" | --star wars empire \n\\"));
+        }
+
+        [Test]
+        public void TestCompoundOr()
+        {
+            BooleanQuery expected = new BooleanQuery();
+            expected.Add(new TermQuery(new Term("field", "star")), BooleanClause.Occur.SHOULD);
+            expected.Add(new TermQuery(new Term("field", "wars")), BooleanClause.Occur.SHOULD);
+            expected.Add(new TermQuery(new Term("field", "empire")), BooleanClause.Occur.SHOULD);
+
+            assertEquals(expected, Parse("star|wars|empire"));
+            assertEquals(expected, Parse("star|wars | empire"));
+            assertEquals(expected, Parse(" | --star|wars|empire \n\\"));
+        }
+
+        [Test]
+        public void TestComplex00()
+        {
+            BooleanQuery expected = new BooleanQuery();
+            BooleanQuery inner = new BooleanQuery();
+            inner.Add(new TermQuery(new Term("field", "star")), BooleanClause.Occur.SHOULD);
+            inner.Add(new TermQuery(new Term("field", "wars")), BooleanClause.Occur.SHOULD);
+            expected.Add(inner, BooleanClause.Occur.MUST);
+            expected.Add(new TermQuery(new Term("field", "empire")), BooleanClause.Occur.MUST);
+
+            assertEquals(expected, Parse("star|wars empire"));
+            assertEquals(expected, Parse("star|wars + empire"));
+            assertEquals(expected, Parse("star| + wars + ----empire |"));
+        }
+
+        [Test]
+        public void TestComplex01()
+        {
+            BooleanQuery expected = new BooleanQuery();
+            BooleanQuery inner = new BooleanQuery();
+            inner.Add(new TermQuery(new Term("field", "star")), BooleanClause.Occur.MUST);
+            inner.Add(new TermQuery(new Term("field", "wars")), BooleanClause.Occur.MUST);
+            expected.Add(inner, BooleanClause.Occur.SHOULD);
+            expected.Add(new TermQuery(new Term("field", "empire")), BooleanClause.Occur.SHOULD);
+
+            assertEquals(expected, Parse("star wars | empire"));
+            assertEquals(expected, Parse("star + wars|empire"));
+            assertEquals(expected, Parse("star + | wars | ----empire +"));
+        }
+
+        [Test]
+        public void TestComplex02()
+        {
+            BooleanQuery expected = new BooleanQuery();
+            BooleanQuery inner = new BooleanQuery();
+            inner.Add(new TermQuery(new Term("field", "star")), BooleanClause.Occur.MUST);
+            inner.Add(new TermQuery(new Term("field", "wars")), BooleanClause.Occur.MUST);
+            expected.Add(inner, BooleanClause.Occur.SHOULD);
+            expected.Add(new TermQuery(new Term("field", "empire")), BooleanClause.Occur.SHOULD);
+            expected.Add(new TermQuery(new Term("field", "strikes")), BooleanClause.Occur.SHOULD);
+
+            assertEquals(expected, Parse("star wars | empire | strikes"));
+            assertEquals(expected, Parse("star + wars|empire | strikes"));
+            assertEquals(expected, Parse("star + | wars | ----empire | + --strikes \\"));
+        }
+
+        [Test]
+        public void TestComplex03()
+        {
+            BooleanQuery expected = new BooleanQuery();
+            BooleanQuery inner = new BooleanQuery();
+            BooleanQuery inner2 = new BooleanQuery();
+            inner2.Add(new TermQuery(new Term("field", "star")), BooleanClause.Occur.MUST);
+            inner2.Add(new TermQuery(new Term("field", "wars")), BooleanClause.Occur.MUST);
+            inner.Add(inner2, BooleanClause.Occur.SHOULD);
+            inner.Add(new TermQuery(new Term("field", "empire")), BooleanClause.Occur.SHOULD);
+            inner.Add(new TermQuery(new Term("field", "strikes")), BooleanClause.Occur.SHOULD);
+            expected.Add(inner, BooleanClause.Occur.MUST);
+            expected.Add(new TermQuery(new Term("field", "back")), BooleanClause.Occur.MUST);
+
+            assertEquals(expected, Parse("star wars | empire | strikes back"));
+            assertEquals(expected, Parse("star + wars|empire | strikes + back"));
+            assertEquals(expected, Parse("star + | wars | ----empire | + --strikes + | --back \\"));
+        }
+
+        [Test]
+        public void TestComplex04()
+        {
+            BooleanQuery expected = new BooleanQuery();
+            BooleanQuery inner = new BooleanQuery();
+            BooleanQuery inner2 = new BooleanQuery();
+            inner.Add(new TermQuery(new Term("field", "star")), BooleanClause.Occur.MUST);
+            inner.Add(new TermQuery(new Term("field", "wars")), BooleanClause.Occur.MUST);
+            inner2.Add(new TermQuery(new Term("field", "strikes")), BooleanClause.Occur.MUST);
+            inner2.Add(new TermQuery(new Term("field", "back")), BooleanClause.Occur.MUST);
+            expected.Add(inner, BooleanClause.Occur.SHOULD);
+            expected.Add(new TermQuery(new Term("field", "empire")), BooleanClause.Occur.SHOULD);
+            expected.Add(inner2, BooleanClause.Occur.SHOULD);
+
+            assertEquals(expected, Parse("(star wars) | empire | (strikes back)"));
+            assertEquals(expected, Parse("(star + wars) |empire | (strikes + back)"));
+            assertEquals(expected, Parse("(star + | wars |) | ----empire | + --(strikes + | --back) \\"));
+        }
+
+        [Test]
+        public void TestComplex05()
+        {
+            BooleanQuery expected = new BooleanQuery();
+            BooleanQuery inner1 = new BooleanQuery();
+            BooleanQuery inner2 = new BooleanQuery();
+            BooleanQuery inner3 = new BooleanQuery();
+            BooleanQuery inner4 = new BooleanQuery();
+
+            expected.Add(inner1, BooleanClause.Occur.SHOULD);
+            expected.Add(inner2, BooleanClause.Occur.SHOULD);
+
+            inner1.Add(new TermQuery(new Term("field", "star")), BooleanClause.Occur.MUST);
+            inner1.Add(new TermQuery(new Term("field", "wars")), BooleanClause.Occur.MUST);
+
+            inner2.Add(new TermQuery(new Term("field", "empire")), BooleanClause.Occur.SHOULD);
+            inner2.Add(inner3, BooleanClause.Occur.SHOULD);
+
+            inner3.Add(new TermQuery(new Term("field", "strikes")), BooleanClause.Occur.MUST);
+            inner3.Add(new TermQuery(new Term("field", "back")), BooleanClause.Occur.MUST);
+            inner3.Add(inner4, BooleanClause.Occur.MUST);
+
+            inner4.Add(new TermQuery(new Term("field", "jarjar")), BooleanClause.Occur.MUST_NOT);
+            inner4.Add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
+
+            assertEquals(expected, Parse("(star wars) | (empire | (strikes back -jarjar))"));
+            assertEquals(expected, Parse("(star + wars) |(empire | (strikes + back -jarjar) () )"));
+            assertEquals(expected, Parse("(star + | wars |) | --(--empire | + --(strikes + | --back + -jarjar) \"\" ) \""));
+        }
+
+        [Test]
+        public void TestComplex06()
+        {
+            BooleanQuery expected = new BooleanQuery();
+            BooleanQuery inner1 = new BooleanQuery();
+            BooleanQuery inner2 = new BooleanQuery();
+            BooleanQuery inner3 = new BooleanQuery();
+
+            expected.Add(new TermQuery(new Term("field", "star")), BooleanClause.Occur.MUST);
+            expected.Add(inner1, BooleanClause.Occur.MUST);
+
+            inner1.Add(new TermQuery(new Term("field", "wars")), BooleanClause.Occur.SHOULD);
+            inner1.Add(inner2, BooleanClause.Occur.SHOULD);
+
+            inner2.Add(inner3, BooleanClause.Occur.MUST);
+            inner3.Add(new TermQuery(new Term("field", "empire")), BooleanClause.Occur.SHOULD);
+            inner3.Add(new TermQuery(new Term("field", "strikes")), BooleanClause.Occur.SHOULD);
+            inner2.Add(new TermQuery(new Term("field", "back")), BooleanClause.Occur.MUST);
+            inner2.Add(new TermQuery(new Term("field", "jar+|jar")), BooleanClause.Occur.MUST);
+
+            assertEquals(expected, Parse("star (wars | (empire | strikes back jar\\+\\|jar))"));
+            assertEquals(expected, Parse("star + (wars |(empire | strikes + back jar\\+\\|jar) () )"));
+            assertEquals(expected, Parse("star + (| wars | | --(--empire | + --strikes + | --back + jar\\+\\|jar) \"\" ) \""));
+        }
+
+        /** test a term with field weights */
+        [Test]
+        public void TestWeightedTerm()
+        {
+            IDictionary<string, float> weights = new Dictionary<string, float>();
+            weights["field0"] = 5f;
+            weights["field1"] = 10f;
+
+            BooleanQuery expected = new BooleanQuery(true);
+            Query field0 = new TermQuery(new Term("field0", "foo"));
+            field0.Boost = (5f);
+            expected.Add(field0, BooleanClause.Occur.SHOULD);
+            Query field1 = new TermQuery(new Term("field1", "foo"));
+            field1.Boost = (10f);
+            expected.Add(field1, BooleanClause.Occur.SHOULD);
+
+            Analyzer analyzer = new MockAnalyzer(Random());
+            SimpleQueryParser parser = new SimpleQueryParser(analyzer, weights);
+            assertEquals(expected, parser.Parse("foo"));
+        }
+
+        /** test a more complex query with field weights */
+        [Test]
+        public void testWeightedOR()
+        {
+            IDictionary<string, float> weights = new Dictionary<string, float>();
+            weights["field0"] = 5f;
+            weights["field1"] = 10f;
+
+            BooleanQuery expected = new BooleanQuery();
+            BooleanQuery foo = new BooleanQuery(true);
+            Query field0 = new TermQuery(new Term("field0", "foo"));
+            field0.Boost = (5f);
+            foo.Add(field0, BooleanClause.Occur.SHOULD);
+            Query field1 = new TermQuery(new Term("field1", "foo"));
+            field1.Boost = (10f);
+            foo.Add(field1, BooleanClause.Occur.SHOULD);
+            expected.Add(foo, BooleanClause.Occur.SHOULD);
+
+            BooleanQuery bar = new BooleanQuery(true);
+            field0 = new TermQuery(new Term("field0", "bar"));
+            field0.Boost = (5f);
+            bar.Add(field0, BooleanClause.Occur.SHOULD);
+            field1 = new TermQuery(new Term("field1", "bar"));
+            field1.Boost = (10f);
+            bar.Add(field1, BooleanClause.Occur.SHOULD);
+            expected.Add(bar, BooleanClause.Occur.SHOULD);
+
+            Analyzer analyzer = new MockAnalyzer(Random());
+            SimpleQueryParser parser = new SimpleQueryParser(analyzer, weights);
+            assertEquals(expected, parser.Parse("foo|bar"));
+        }
+
+        /** helper to parse a query with keyword analyzer across "field" */
+        private Query ParseKeyword(string text, int flags)
+        {
+            Analyzer analyzer = new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false);
+            SimpleQueryParser parser = new SimpleQueryParser(analyzer,
+                new HashMap<string, float>() { { "field", 1f } },
+                flags);
+            return parser.Parse(text);
+        }
+
+        /** test the ability to enable/disable phrase operator */
+        [Test]
+        public void TestDisablePhrase()
+        {
+            Query expected = new TermQuery(new Term("field", "\"test\""));
+            assertEquals(expected, ParseKeyword("\"test\"", SimpleQueryParser.PHRASE_OPERATOR));
+        }
+
+        /** test the ability to enable/disable prefix operator */
+        [Test]
+        public void TestDisablePrefix()
+        {
+            Query expected = new TermQuery(new Term("field", "test*"));
+            assertEquals(expected, ParseKeyword("test*", SimpleQueryParser.PREFIX_OPERATOR));
+        }
+
+        /** test the ability to enable/disable AND operator */
+        [Test]
+        public void TestDisableAND()
+        {
+            Query expected = new TermQuery(new Term("field", "foo+bar"));
+            assertEquals(expected, ParseKeyword("foo+bar", SimpleQueryParser.AND_OPERATOR));
+            expected = new TermQuery(new Term("field", "+foo+bar"));
+            assertEquals(expected, ParseKeyword("+foo+bar", SimpleQueryParser.AND_OPERATOR));
+        }
+
+        /** test the ability to enable/disable OR operator */
+        [Test]
+        public void TestDisableOR()
+        {
+            Query expected = new TermQuery(new Term("field", "foo|bar"));
+            assertEquals(expected, ParseKeyword("foo|bar", SimpleQueryParser.OR_OPERATOR));
+            expected = new TermQuery(new Term("field", "|foo|bar"));
+            assertEquals(expected, ParseKeyword("|foo|bar", SimpleQueryParser.OR_OPERATOR));
+        }
+
+        /** test the ability to enable/disable NOT operator */
+        [Test]
+        public void TestDisableNOT()
+        {
+            Query expected = new TermQuery(new Term("field", "-foo"));
+            assertEquals(expected, ParseKeyword("-foo", SimpleQueryParser.NOT_OPERATOR));
+        }
+
+        /** test the ability to enable/disable precedence operators */
+        [Test]
+        public void TestDisablePrecedence()
+        {
+            Query expected = new TermQuery(new Term("field", "(foo)"));
+            assertEquals(expected, ParseKeyword("(foo)", SimpleQueryParser.PRECEDENCE_OPERATORS));
+            expected = new TermQuery(new Term("field", ")foo("));
+            assertEquals(expected, ParseKeyword(")foo(", SimpleQueryParser.PRECEDENCE_OPERATORS));
+        }
+
+        /** test the ability to enable/disable escape operators */
+        [Test]
+        public void TestDisableEscape()
+        {
+            Query expected = new TermQuery(new Term("field", "foo\\bar"));
+            assertEquals(expected, ParseKeyword("foo\\bar", SimpleQueryParser.ESCAPE_OPERATOR));
+            assertEquals(expected, ParseKeyword("(foo\\bar)", SimpleQueryParser.ESCAPE_OPERATOR));
+            assertEquals(expected, ParseKeyword("\"foo\\bar\"", SimpleQueryParser.ESCAPE_OPERATOR));
+        }
+
+        [Test]
+        public void TestDisableWhitespace()
+        {
+            Query expected = new TermQuery(new Term("field", "foo foo"));
+            assertEquals(expected, ParseKeyword("foo foo", SimpleQueryParser.WHITESPACE_OPERATOR));
+            expected = new TermQuery(new Term("field", " foo foo\n "));
+            assertEquals(expected, ParseKeyword(" foo foo\n ", SimpleQueryParser.WHITESPACE_OPERATOR));
+            expected = new TermQuery(new Term("field", "\t\tfoo foo foo"));
+            assertEquals(expected, ParseKeyword("\t\tfoo foo foo", SimpleQueryParser.WHITESPACE_OPERATOR));
+        }
+
+        [Test]
+        public void TestDisableFuzziness()
+        {
+            Query expected = new TermQuery(new Term("field", "foo~1"));
+            assertEquals(expected, ParseKeyword("foo~1", SimpleQueryParser.FUZZY_OPERATOR));
+        }
+
+        [Test]
+        public void TestDisableSlop()
+        {
+            PhraseQuery expectedPhrase = new PhraseQuery();
+            expectedPhrase.Add(new Term("field", "foo"));
+            expectedPhrase.Add(new Term("field", "bar"));
+
+            BooleanQuery expected = new BooleanQuery();
+            expected.Add(expectedPhrase, BooleanClause.Occur.MUST);
+            expected.Add(new TermQuery(new Term("field", "~2")), BooleanClause.Occur.MUST);
+            assertEquals(expected, Parse("\"foo bar\"~2", SimpleQueryParser.NEAR_OPERATOR));
+        }
+
+        // we aren't supposed to barf on any input...
+        [Test]
+        public void TestRandomQueries()
+        {
+            for (int i = 0; i < 1000; i++)
+            {
+                string query = TestUtil.RandomUnicodeString(Random());
+                Parse(query); // no exception
+                ParseKeyword(query, TestUtil.NextInt(Random(), 0, 1024)); // no exception
+            }
+        }
+
+        [Test]
+        public void testRandomQueries2()
+        {
+            char[] chars = new char[] { 'a', '1', '|', '&', ' ', '(', ')', '"', '-', '~' };
+            StringBuilder sb = new StringBuilder();
+            for (int i = 0; i < 1000; i++)
+            {
+                sb.Length = (0);
+                int queryLength = Random().Next(20);
+                for (int j = 0; j < queryLength; j++)
+                {
+                    sb.append(chars[Random().Next(chars.Length)]);
+                }
+                Parse(sb.toString()); // no exception
+                ParseKeyword(sb.toString(), TestUtil.NextInt(Random(), 0, 1024)); // no exception
+            }
+        }
+    }
+}