You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2016/09/11 21:30:55 UTC
[24/50] [abbrv] lucenenet git commit: Moved Lucene.Net.QueryParser
and Lucene.Net.Tests.QueryParser projects into src\ directory.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/679ad24c/src/Lucene.Net.QueryParser/Ext/ParserExtension.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.QueryParser/Ext/ParserExtension.cs b/src/Lucene.Net.QueryParser/Ext/ParserExtension.cs
new file mode 100644
index 0000000..27b9212
--- /dev/null
+++ b/src/Lucene.Net.QueryParser/Ext/ParserExtension.cs
@@ -0,0 +1,50 @@
+\ufeffusing Lucene.Net.QueryParser.Classic;
+using Lucene.Net.Search;
+
+namespace Lucene.Net.QueryParser.Ext
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// This class represents an extension base class to the Lucene standard
+ /// <see cref="Classic.QueryParser"/>. The
+ /// <see cref="Classic.QueryParser"/> is generated by the JavaCC
+ /// parser generator. Changing or adding functionality or syntax in the standard
+ /// query parser requires changes to the JavaCC source file. To enable extending
+ /// the standard query parser without changing the JavaCC sources and re-generate
+ /// the parser the <see cref="ParserExtension"/> can be customized and plugged into an
+ /// instance of <see cref="ExtendableQueryParser"/>, a direct subclass of
+ /// <see cref="Classic.QueryParser"/>.
+ ///
+ /// <see cref="Extensions"/>
+ /// <see cref="ExtendableQueryParser"/>
+ /// </summary>
+ public abstract class ParserExtension
+ {
+ /// <summary>
+ /// Processes the given <see cref="ExtensionQuery"/> and returns a corresponding
+ /// <see cref="Query"/> instance. Subclasses must either return a <see cref="Query"/>
+ /// instance or raise a <see cref="ParseException"/>. This method must not return
+ /// <code>null</code>.
+ /// </summary>
+ /// <param name="query">the extension query</param>
+ /// <returns>a new query instance</returns>
+ /// <exception cref="ParseException">if the query can not be parsed.</exception>
+ public abstract Query Parse(ExtensionQuery query);
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/679ad24c/src/Lucene.Net.QueryParser/Flexible/Standard/CommonQueryParserConfiguration.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.QueryParser/Flexible/Standard/CommonQueryParserConfiguration.cs b/src/Lucene.Net.QueryParser/Flexible/Standard/CommonQueryParserConfiguration.cs
new file mode 100644
index 0000000..ae3809f
--- /dev/null
+++ b/src/Lucene.Net.QueryParser/Flexible/Standard/CommonQueryParserConfiguration.cs
@@ -0,0 +1,106 @@
+\ufeffusing Lucene.Net.Analysis;
+using Lucene.Net.Documents;
+using Lucene.Net.Search;
+using System;
+using System.Globalization;
+
+namespace Lucene.Net.QueryParser.Flexible.Standard
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Configuration options common across queryparser implementations.
+ /// </summary>
+ public interface ICommonQueryParserConfiguration
+ {
+ /// <summary>
+ /// Whether terms of multi-term queries (e.g., wildcard,
+ /// prefix, fuzzy and range) should be automatically
+ /// lower-cased or not. Default is <code>true</code>.
+ /// </summary>
+ bool LowercaseExpandedTerms { get; set; }
+
+ /// <summary>
+ /// Set to <code>true</code> to allow leading wildcard characters.
+ /// <p>
+ /// When set, <code>*</code> or <code>?</code> are allowed as the first
+ /// character of a PrefixQuery and WildcardQuery. Note that this can produce
+ /// very slow queries on big indexes.
+ /// <p>
+ /// Default: false.
+ /// </summary>
+ bool AllowLeadingWildcard { get; set; }
+
+ /// <summary>
+ /// Set to <code>true</code> to enable position increments in result query.
+ /// <p>
+ /// When set, result phrase and multi-phrase queries will be aware of position
+ /// increments. Useful when e.g. a StopFilter increases the position increment
+ /// of the token that follows an omitted token.
+ /// <p>
+ /// Default: false.
+ /// </summary>
+ bool EnablePositionIncrements { get; set; }
+
+ /// <summary>
+ /// By default, it uses
+ /// {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} when creating a
+ /// prefix, wildcard and range queries. This implementation is generally
+ /// preferable because it a) Runs faster b) Does not have the scarcity of terms
+ /// unduly influence score c) avoids any {@link TooManyListenersException}
+ /// exception. However, if your application really needs to use the
+ /// old-fashioned boolean queries expansion rewriting and the above points are
+ /// not relevant then use this change the rewrite method.
+ /// </summary>
+ MultiTermQuery.RewriteMethod MultiTermRewriteMethod { get; set; }
+
+ /// <summary>
+ /// Get or Set the prefix length for fuzzy queries. Default is 0.
+ /// </summary>
+ int FuzzyPrefixLength { get; set; }
+
+ /// <summary>
+ /// Get or Set locale used by date range parsing.
+ /// </summary>
+ CultureInfo Locale { get; set; }
+
+ /// <summary>
+ /// Gets or Sets the time zone.
+ /// </summary>
+ TimeZoneInfo TimeZone { get; set; }
+
+ /// <summary>
+ /// Gets or Sets the default slop for phrases. If zero, then exact phrase matches are
+ /// required. Default value is zero.
+ /// </summary>
+ int PhraseSlop { get; set; }
+
+ Analyzer Analyzer { get; }
+
+ /// <summary>
+ /// Get the minimal similarity for fuzzy queries.
+ /// </summary>
+ float FuzzyMinSim { get; set; }
+
+ /// <summary>
+ /// Sets the default <see cref="T:DateTools.Resolution"/> used for certain field when
+ /// no <see cref="T:DateTools.Resolution"/> is defined for this field.
+ /// </summary>
+ void SetDateResolution(DateTools.Resolution dateResolution);
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/679ad24c/src/Lucene.Net.QueryParser/Lucene.Net.QueryParser.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.QueryParser/Lucene.Net.QueryParser.csproj b/src/Lucene.Net.QueryParser/Lucene.Net.QueryParser.csproj
new file mode 100644
index 0000000..e3f8fc6
--- /dev/null
+++ b/src/Lucene.Net.QueryParser/Lucene.Net.QueryParser.csproj
@@ -0,0 +1,107 @@
+\ufeff<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <ProjectGuid>{949BA34B-6AE6-4CE3-B578-61E13E4D76BF}</ProjectGuid>
+ <OutputType>Library</OutputType>
+ <AppDesignerFolder>Properties</AppDesignerFolder>
+ <RootNamespace>Lucene.Net.QueryParser</RootNamespace>
+ <AssemblyName>Lucene.Net.QueryParser</AssemblyName>
+ <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
+ <FileAlignment>512</FileAlignment>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+ <DebugSymbols>true</DebugSymbols>
+ <DebugType>full</DebugType>
+ <Optimize>false</Optimize>
+ <OutputPath>bin\Debug\</OutputPath>
+ <DefineConstants>DEBUG;TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+ <DebugType>pdbonly</DebugType>
+ <Optimize>true</Optimize>
+ <OutputPath>bin\Release\</OutputPath>
+ <DefineConstants>TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <ItemGroup>
+ <Reference Include="System" />
+ <Reference Include="System.Core" />
+ <Reference Include="System.Xml.Linq" />
+ <Reference Include="System.Data.DataSetExtensions" />
+ <Reference Include="Microsoft.CSharp" />
+ <Reference Include="System.Data" />
+ <Reference Include="System.Xml" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="Analyzing\AnalyzingQueryParser.cs" />
+ <Compile Include="Classic\CharStream.cs" />
+ <Compile Include="Classic\FastCharStream.cs" />
+ <Compile Include="Classic\MultiFieldQueryParser.cs" />
+ <Compile Include="Classic\ParseException.cs" />
+ <Compile Include="Classic\QueryParser.cs" />
+ <Compile Include="Classic\QueryParserBase.cs" />
+ <Compile Include="Classic\QueryParserConstants.cs" />
+ <Compile Include="Classic\QueryParserTokenManager.cs" />
+ <Compile Include="Classic\Token.cs" />
+ <Compile Include="Classic\TokenMgrError.cs" />
+ <Compile Include="ComplexPhrase\ComplexPhraseQueryParser.cs" />
+ <Compile Include="Ext\ExtendableQueryParser.cs" />
+ <Compile Include="Ext\ExtensionQuery.cs" />
+ <Compile Include="Ext\Extensions.cs" />
+ <Compile Include="Ext\ParserExtension.cs" />
+ <Compile Include="Flexible\Standard\CommonQueryParserConfiguration.cs" />
+ <Compile Include="Properties\AssemblyInfo.cs" />
+ <Compile Include="Simple\SimpleQueryParser.cs" />
+ <Compile Include="Surround\Parser\CharStream.cs" />
+ <Compile Include="Surround\Parser\FastCharStream.cs" />
+ <Compile Include="Surround\Parser\ParseException.cs" />
+ <Compile Include="Surround\Parser\QueryParser.cs" />
+ <Compile Include="Surround\Parser\QueryParserConstants.cs" />
+ <Compile Include="Surround\Parser\QueryParserTokenManager.cs" />
+ <Compile Include="Surround\Parser\Token.cs" />
+ <Compile Include="Surround\Parser\TokenMgrError.cs" />
+ <Compile Include="Surround\Query\AndQuery.cs" />
+ <Compile Include="Surround\Query\BasicQueryFactory.cs" />
+ <Compile Include="Surround\Query\ComposedQuery.cs" />
+ <Compile Include="Surround\Query\DistanceQuery.cs" />
+ <Compile Include="Surround\Query\DistanceRewriteQuery.cs" />
+ <Compile Include="Surround\Query\DistanceSubQuery.cs" />
+ <Compile Include="Surround\Query\FieldsQuery.cs" />
+ <Compile Include="Surround\Query\NotQuery.cs" />
+ <Compile Include="Surround\Query\OrQuery.cs" />
+ <Compile Include="Surround\Query\RewriteQuery.cs" />
+ <Compile Include="Surround\Query\SimpleTerm.cs" />
+ <Compile Include="Surround\Query\SimpleTermRewriteQuery.cs" />
+ <Compile Include="Surround\Query\SpanNearClauseFactory.cs" />
+ <Compile Include="Surround\Query\SrndBooleanQuery.cs" />
+ <Compile Include="Surround\Query\SrndPrefixQuery.cs" />
+ <Compile Include="Surround\Query\SrndQuery.cs" />
+ <Compile Include="Surround\Query\SrndTermQuery.cs" />
+ <Compile Include="Surround\Query\SrndTruncQuery.cs" />
+ <Compile Include="Surround\Query\TooManyBasicQueries.cs" />
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\Lucene.Net.Analysis.Common\Lucene.Net.Analysis.Common.csproj">
+ <Project>{4add0bbc-b900-4715-9526-d871de8eea64}</Project>
+ <Name>Lucene.Net.Analysis.Common</Name>
+ </ProjectReference>
+ <ProjectReference Include="..\Lucene.Net.Core\Lucene.Net.csproj">
+ <Project>{5d4ad9be-1ffb-41ab-9943-25737971bf57}</Project>
+ <Name>Lucene.Net</Name>
+ </ProjectReference>
+ </ItemGroup>
+ <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+ <!-- To modify your build process, add your task inside one of the targets below and uncomment it.
+ Other similar extension points exist, see Microsoft.Common.targets.
+ <Target Name="BeforeBuild">
+ </Target>
+ <Target Name="AfterBuild">
+ </Target>
+ -->
+</Project>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/679ad24c/src/Lucene.Net.QueryParser/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.QueryParser/Properties/AssemblyInfo.cs b/src/Lucene.Net.QueryParser/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..023bf34
--- /dev/null
+++ b/src/Lucene.Net.QueryParser/Properties/AssemblyInfo.cs
@@ -0,0 +1,39 @@
+\ufeffusing System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.QueryParser")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("Lucene.Net.QueryParser")]
+[assembly: AssemblyCopyright("Copyright � 2016")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible
+// to COM components. If you need to access a type in this assembly from
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("7c58cf05-89dd-4c02-a948-c28cdaf05247")]
+
+// for testing
+[assembly: InternalsVisibleTo("Lucene.Net.Tests.QueryParser")]
+
+// Version information for an assembly consists of the following four values:
+//
+// Major Version
+// Minor Version
+// Build Number
+// Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/679ad24c/src/Lucene.Net.QueryParser/Simple/SimpleQueryParser.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.QueryParser/Simple/SimpleQueryParser.cs b/src/Lucene.Net.QueryParser/Simple/SimpleQueryParser.cs
new file mode 100644
index 0000000..1029c8b
--- /dev/null
+++ b/src/Lucene.Net.QueryParser/Simple/SimpleQueryParser.cs
@@ -0,0 +1,788 @@
+\ufeffusing Lucene.Net.Analysis;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using Lucene.Net.Util.Automaton;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+
+namespace Lucene.Net.QueryParser.Simple
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// SimpleQueryParser is used to parse human readable query syntax.
+ /// <p>
+ /// The main idea behind this parser is that a person should be able to type
+ /// whatever they want to represent a query, and this parser will do its best
+ /// to interpret what to search for no matter how poorly composed the request
+ /// may be. Tokens are considered to be any of a term, phrase, or subquery for the
+ /// operations described below. Whitespace including ' ' '\n' '\r' and '\t'
+ /// and certain operators may be used to delimit tokens ( ) + | " .
+ /// <p>
+ /// Any errors in query syntax will be ignored and the parser will attempt
+ /// to decipher what it can; however, this may mean odd or unexpected results.
+ /// <h4>Query Operators</h4>
+ /// <ul>
+ /// <li>'{@code +}' specifies {@code AND} operation: <tt>token1+token2</tt>
+ /// <li>'{@code |}' specifies {@code OR} operation: <tt>token1|token2</tt>
+ /// <li>'{@code -}' negates a single token: <tt>-token0</tt>
+ /// <li>'{@code "}' creates phrases of terms: <tt>"term1 term2 ..."</tt>
+ /// <li>'{@code *}' at the end of terms specifies prefix query: <tt>term*</tt>
+ /// <li>'{@code ~}N' at the end of terms specifies fuzzy query: <tt>term~1</tt>
+ /// <li>'{@code ~}N' at the end of phrases specifies near query: <tt>"term1 term2"~5</tt>
+ /// <li>'{@code (}' and '{@code )}' specifies precedence: <tt>token1 + (token2 | token3)</tt>
+ /// </ul>
+ /// <p>
+ /// The {@link #setDefaultOperator default operator} is {@code OR} if no other operator is specified.
+ /// For example, the following will {@code OR} {@code token1} and {@code token2} together:
+ /// <tt>token1 token2</tt>
+ /// <p>
+ /// Normal operator precedence will be simple order from right to left.
+ /// For example, the following will evaluate {@code token1 OR token2} first,
+ /// then {@code AND} with {@code token3}:
+ /// <blockquote>token1 | token2 + token3</blockquote>
+ /// <h4>Escaping</h4>
+ /// <p>
+ /// An individual term may contain any possible character with certain characters
+ /// requiring escaping using a '{@code \}'. The following characters will need to be escaped in
+ /// terms and phrases:
+ /// {@code + | " ( ) ' \}
+ /// <p>
+ /// The '{@code -}' operator is a special case. On individual terms (not phrases) the first
+ /// character of a term that is {@code -} must be escaped; however, any '{@code -}' characters
+ /// beyond the first character do not need to be escaped.
+ /// For example:
+ /// <ul>
+ /// <li>{@code -term1} -- Specifies {@code NOT} operation against {@code term1}
+ /// <li>{@code \-term1} -- Searches for the term {@code -term1}.
+ /// <li>{@code term-1} -- Searches for the term {@code term-1}.
+ /// <li>{@code term\-1} -- Searches for the term {@code term-1}.
+ /// </ul>
+ /// <p>
+ /// The '{@code *}' operator is a special case. On individual terms (not phrases) the last
+ /// character of a term that is '{@code *}' must be escaped; however, any '{@code *}' characters
+ /// before the last character do not need to be escaped:
+ /// <ul>
+ /// <li>{@code term1*} -- Searches for the prefix {@code term1}
+ /// <li>{@code term1\*} -- Searches for the term {@code term1*}
+ /// <li>{@code term*1} -- Searches for the term {@code term*1}
+ /// <li>{@code term\*1} -- Searches for the term {@code term*1}
+ /// </ul>
+ /// <p>
+ /// Note that above examples consider the terms before text processing.
+ /// </summary>
+ public class SimpleQueryParser : QueryBuilder
+ {
+ /** Map of fields to query against with their weights */
+ protected readonly IDictionary<string, float> weights;
+
+ // TODO: Make these into a [Flags] enum in .NET??
+ /** flags to the parser (to turn features on/off) */
+ protected readonly int flags;
+
+ /** Enables {@code AND} operator (+) */
+ public static readonly int AND_OPERATOR = 1<<0;
+ /** Enables {@code NOT} operator (-) */
+ public static readonly int NOT_OPERATOR = 1<<1;
+ /** Enables {@code OR} operator (|) */
+ public static readonly int OR_OPERATOR = 1<<2;
+ /** Enables {@code PREFIX} operator (*) */
+ public static readonly int PREFIX_OPERATOR = 1<<3;
+ /** Enables {@code PHRASE} operator (") */
+ public static readonly int PHRASE_OPERATOR = 1<<4;
+ /** Enables {@code PRECEDENCE} operators: {@code (} and {@code )} */
+ public static readonly int PRECEDENCE_OPERATORS = 1<<5;
+ /** Enables {@code ESCAPE} operator (\) */
+ public static readonly int ESCAPE_OPERATOR = 1<<6;
+ /** Enables {@code WHITESPACE} operators: ' ' '\n' '\r' '\t' */
+ public static readonly int WHITESPACE_OPERATOR = 1<<7;
+ /** Enables {@code FUZZY} operators: (~) on single terms */
+ public static readonly int FUZZY_OPERATOR = 1<<8;
+ /** Enables {@code NEAR} operators: (~) on phrases */
+ public static readonly int NEAR_OPERATOR = 1<<9;
+
+ private BooleanClause.Occur defaultOperator = BooleanClause.Occur.SHOULD;
+
+ /// <summary>
+ /// Creates a new parser searching over a single field.
+ /// </summary>
+ /// <param name="analyzer"></param>
+ /// <param name="field"></param>
+ public SimpleQueryParser(Analyzer analyzer, string field)
+ : this(analyzer, new HashMap<string, float>() { { field, 1.0F } })
+ {
+ }
+
+ /// <summary>
+ /// Creates a new parser searching over multiple fields with different weights.
+ /// </summary>
+ /// <param name="analyzer"></param>
+ /// <param name="weights"></param>
+ public SimpleQueryParser(Analyzer analyzer, IDictionary<string, float> weights)
+ : this(analyzer, weights, -1)
+ {
+ }
+
+ /// <summary>
+ /// Creates a new parser with custom flags used to enable/disable certain features.
+ /// </summary>
+ /// <param name="analyzer"></param>
+ /// <param name="weights"></param>
+ /// <param name="flags"></param>
+ public SimpleQueryParser(Analyzer analyzer, IDictionary<string, float> weights, int flags)
+ : base(analyzer)
+ {
+ this.weights = weights;
+ this.flags = flags;
+ }
+
+ /// <summary>
+ /// Parses the query text and returns parsed query (or null if empty)
+ /// </summary>
+ /// <param name="queryText"></param>
+ /// <returns></returns>
+ public Query Parse(string queryText)
+ {
+ char[] data = queryText.ToCharArray();
+ char[] buffer = new char[data.Length];
+
+ State state = new State(data, buffer, 0, data.Length);
+ ParseSubQuery(state);
+ return state.Top;
+ }
+
+ private void ParseSubQuery(State state)
+ {
+ while (state.Index < state.Length)
+ {
+ if (state.Data[state.Index] == '(' && (flags & PRECEDENCE_OPERATORS) != 0)
+ {
+ // the beginning of a subquery has been found
+ ConsumeSubQuery(state);
+ }
+ else if (state.Data[state.Index] == ')' && (flags & PRECEDENCE_OPERATORS) != 0)
+ {
+ // this is an extraneous character so it is ignored
+ ++state.Index;
+ }
+ else if (state.Data[state.Index] == '"' && (flags & PHRASE_OPERATOR) != 0)
+ {
+ // the beginning of a phrase has been found
+ ConsumePhrase(state);
+ }
+ else if (state.Data[state.Index] == '+' && (flags & AND_OPERATOR) != 0)
+ {
+ // an and operation has been explicitly set
+ // if an operation has already been set this one is ignored
+ // if a term (or phrase or subquery) has not been found yet the
+ // operation is also ignored since there is no previous
+ // term (or phrase or subquery) to and with
+ if (!state.CurrentOperationIsSet && state.Top != null)
+ {
+ state.CurrentOperation = BooleanClause.Occur.MUST;
+ }
+
+ ++state.Index;
+ }
+ else if (state.Data[state.Index] == '|' && (flags & OR_OPERATOR) != 0)
+ {
+ // an or operation has been explicitly set
+ // if an operation has already been set this one is ignored
+ // if a term (or phrase or subquery) has not been found yet the
+ // operation is also ignored since there is no previous
+ // term (or phrase or subquery) to or with
+ if (!state.CurrentOperationIsSet && state.Top != null)
+ {
+ state.CurrentOperation = BooleanClause.Occur.SHOULD;
+ }
+
+ ++state.Index;
+ }
+ else if (state.Data[state.Index] == '-' && (flags & NOT_OPERATOR) != 0)
+ {
+ // a not operator has been found, so increase the not count
+ // two not operators in a row negate each other
+ ++state.Not;
+ ++state.Index;
+
+ // continue so the not operator is not reset
+ // before the next character is determined
+ continue;
+ }
+ else if ((state.Data[state.Index] == ' '
+ || state.Data[state.Index] == '\t'
+ || state.Data[state.Index] == '\n'
+ || state.Data[state.Index] == '\r') && (flags & WHITESPACE_OPERATOR) != 0)
+ {
+ // ignore any whitespace found as it may have already been
+ // used a delimiter across a term (or phrase or subquery)
+ // or is simply extraneous
+ ++state.Index;
+ }
+ else
+ {
+ // the beginning of a token has been found
+ ConsumeToken(state);
+ }
+
+ // reset the not operator as even whitespace is not allowed when
+ // specifying the not operation for a term (or phrase or subquery)
+ state.Not = 0;
+ }
+ }
+
+ private void ConsumeSubQuery(State state)
+ {
+ Debug.Assert((flags & PRECEDENCE_OPERATORS) != 0);
+ int start = ++state.Index;
+ int precedence = 1;
+ bool escaped = false;
+
+ while (state.Index < state.Length)
+ {
+ if (!escaped)
+ {
+ if (state.Data[state.Index] == '\\' && (flags & ESCAPE_OPERATOR) != 0)
+ {
+ // an escape character has been found so
+ // whatever character is next will become
+ // part of the subquery unless the escape
+ // character is the last one in the data
+ escaped = true;
+ ++state.Index;
+
+ continue;
+ }
+ else if (state.Data[state.Index] == '(')
+ {
+ // increase the precedence as there is a
+ // subquery in the current subquery
+ ++precedence;
+ }
+ else if (state.Data[state.Index] == ')')
+ {
+ --precedence;
+
+ if (precedence == 0)
+ {
+ // this should be the end of the subquery
+ // all characters found will used for
+ // creating the subquery
+ break;
+ }
+ }
+ }
+
+ escaped = false;
+ ++state.Index;
+ }
+
+ if (state.Index == state.Length)
+ {
+ // a closing parenthesis was never found so the opening
+ // parenthesis is considered extraneous and will be ignored
+ state.Index = start;
+ }
+ else if (state.Index == start)
+ {
+ // a closing parenthesis was found immediately after the opening
+ // parenthesis so the current operation is reset since it would
+ // have been applied to this subquery
+ state.CurrentOperationIsSet = false;
+
+ ++state.Index;
+ }
+ else
+ {
+ // a complete subquery has been found and is recursively parsed by
+ // starting over with a new state object
+ State subState = new State(state.Data, state.Buffer, start, state.Index);
+ ParseSubQuery(subState);
+ BuildQueryTree(state, subState.Top);
+
+ ++state.Index;
+ }
+ }
+
+ private void ConsumePhrase(State state)
+ {
+ Debug.Assert((flags & PHRASE_OPERATOR) != 0);
+ int start = ++state.Index;
+ int copied = 0;
+ bool escaped = false;
+ bool hasSlop = false;
+
+ while (state.Index < state.Length)
+ {
+ if (!escaped)
+ {
+ if (state.Data[state.Index] == '\\' && (flags & ESCAPE_OPERATOR) != 0)
+ {
+ // an escape character has been found so
+ // whatever character is next will become
+ // part of the phrase unless the escape
+ // character is the last one in the data
+ escaped = true;
+ ++state.Index;
+
+ continue;
+ }
+ else if (state.Data[state.Index] == '"')
+ {
+ // if there are still characters after the closing ", check for a
+ // tilde
+ if (state.Length > (state.Index + 1) &&
+ state.Data[state.Index + 1] == '~' &&
+ (flags & NEAR_OPERATOR) != 0)
+ {
+ state.Index++;
+ // check for characters after the tilde
+ if (state.Length > (state.Index + 1))
+ {
+ hasSlop = true;
+ }
+ break;
+ }
+ else
+ {
+ // this should be the end of the phrase
+ // all characters found will used for
+ // creating the phrase query
+ break;
+ }
+ }
+ }
+
+ escaped = false;
+ state.Buffer[copied++] = state.Data[state.Index++];
+ }
+
+ if (state.Index == state.Length)
+ {
+ // a closing double quote was never found so the opening
+ // double quote is considered extraneous and will be ignored
+ state.Index = start;
+ }
+ else if (state.Index == start)
+ {
+ // a closing double quote was found immediately after the opening
+ // double quote so the current operation is reset since it would
+ // have been applied to this phrase
+ state.CurrentOperationIsSet = false;
+
+ ++state.Index;
+ }
+ else
+ {
+ // a complete phrase has been found and is parsed through
+ // through the analyzer from the given field
+ string phrase = new string(state.Buffer, 0, copied);
+ Query branch;
+ if (hasSlop)
+ {
+ branch = NewPhraseQuery(phrase, ParseFuzziness(state));
+ }
+ else
+ {
+ branch = NewPhraseQuery(phrase, 0);
+ }
+ BuildQueryTree(state, branch);
+
+ ++state.Index;
+ }
+ }
+
+ private void ConsumeToken(State state)
+ {
+ int copied = 0;
+ bool escaped = false;
+ bool prefix = false;
+ bool fuzzy = false;
+
+ while (state.Index < state.Length)
+ {
+ if (!escaped)
+ {
+ if (state.Data[state.Index] == '\\' && (flags & ESCAPE_OPERATOR) != 0)
+ {
+ // an escape character has been found so
+ // whatever character is next will become
+ // part of the term unless the escape
+ // character is the last one in the data
+ escaped = true;
+ prefix = false;
+ ++state.Index;
+
+ continue;
+ }
+ else if (TokenFinished(state))
+ {
+ // this should be the end of the term
+ // all characters found will used for
+ // creating the term query
+ break;
+ }
+ else if (copied > 0 && state.Data[state.Index] == '~' && (flags & FUZZY_OPERATOR) != 0)
+ {
+ fuzzy = true;
+ break;
+ }
+
+ // wildcard tracks whether or not the last character
+ // was a '*' operator that hasn't been escaped
+ // there must be at least one valid character before
+ // searching for a prefixed set of terms
+ prefix = copied > 0 && state.Data[state.Index] == '*' && (flags & PREFIX_OPERATOR) != 0;
+ }
+
+ escaped = false;
+ state.Buffer[copied++] = state.Data[state.Index++];
+ }
+
+ if (copied > 0)
+ {
+ Query branch;
+
+ if (fuzzy && (flags & FUZZY_OPERATOR) != 0)
+ {
+ string token = new string(state.Buffer, 0, copied);
+ int fuzziness = ParseFuzziness(state);
+ // edit distance has a maximum, limit to the maximum supported
+ fuzziness = Math.Min(fuzziness, LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
+ if (fuzziness == 0)
+ {
+ branch = NewDefaultQuery(token);
+ }
+ else
+ {
+ branch = NewFuzzyQuery(token, fuzziness);
+ }
+ }
+ else if (prefix)
+ {
+ // if a term is found with a closing '*' it is considered to be a prefix query
+ // and will have prefix added as an option
+ string token = new string(state.Buffer, 0, copied - 1);
+ branch = NewPrefixQuery(token);
+ }
+ else
+ {
+ // a standard term has been found so it will be run through
+ // the entire analysis chain from the specified schema field
+ string token = new string(state.Buffer, 0, copied);
+ branch = NewDefaultQuery(token);
+ }
+
+ BuildQueryTree(state, branch);
+ }
+ }
+
+ /// <summary>
+ /// buildQueryTree should be called after a term, phrase, or subquery
+ /// is consumed to be added to our existing query tree
+ /// this method will only add to the existing tree if the branch contained in state is not null
+ /// </summary>
+ /// <param name="state"></param>
+ /// <param name="branch"></param>
+ private void BuildQueryTree(State state, Query branch)
+ {
+ if (branch != null)
+ {
+ // modify our branch to a BooleanQuery wrapper for not
+ // this is necessary any time a term, phrase, or subquery is negated
+ if (state.Not % 2 == 1)
+ {
+ BooleanQuery nq = new BooleanQuery();
+ nq.Add(branch, BooleanClause.Occur.MUST_NOT);
+ nq.Add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
+ branch = nq;
+ }
+
+ // first term (or phrase or subquery) found and will begin our query tree
+ if (state.Top == null)
+ {
+ state.Top = branch;
+ }
+ else
+ {
+ // more than one term (or phrase or subquery) found
+ // set currentOperation to the default if no other operation is explicitly set
+ if (!state.CurrentOperationIsSet)
+ {
+ state.CurrentOperation = defaultOperator;
+ }
+
+ // operational change requiring a new parent node
+ // this occurs if the previous operation is not the same as current operation
+ // because the previous operation must be evaluated separately to preserve
+ // the proper precedence and the current operation will take over as the top of the tree
+ if (!state.PreviousOperationIsSet || state.PreviousOperation != state.CurrentOperation)
+ {
+ BooleanQuery bq = new BooleanQuery();
+ bq.Add(state.Top, state.CurrentOperation);
+ state.Top = bq;
+ }
+
+ // reset all of the state for reuse
+ ((BooleanQuery)state.Top).Add(branch, state.CurrentOperation);
+ state.PreviousOperation = state.CurrentOperation;
+ }
+
+ // reset the current operation as it was intended to be applied to
+ // the incoming term (or phrase or subquery) even if branch was null
+ // due to other possible errors
+ state.CurrentOperationIsSet = false;
+ }
+ }
+
+ /// <summary>
+ /// Helper parsing fuzziness from parsing state
+ /// </summary>
+ /// <param name="state"></param>
+ /// <returns>slop/edit distance, 0 in the case of non-parsing slop/edit string</returns>
+ private int ParseFuzziness(State state)
+ {
+ char[] slopText = new char[state.Length];
+ int slopLength = 0;
+
+ if (state.Data[state.Index] == '~')
+ {
+ while (state.Index < state.Length)
+ {
+ state.Index++;
+ // it's possible that the ~ was at the end, so check after incrementing
+ // to make sure we don't go out of bounds
+ if (state.Index < state.Length)
+ {
+ if (TokenFinished(state))
+ {
+ break;
+ }
+ slopText[slopLength] = state.Data[state.Index];
+ slopLength++;
+ }
+ }
+ int fuzziness = 0;
+ int.TryParse(new string(slopText, 0, slopLength), out fuzziness);
+ // negative -> 0
+ if (fuzziness < 0)
+ {
+ fuzziness = 0;
+ }
+ return fuzziness;
+ }
+ return 0;
+ }
+
+ /// <summary>
+ /// Helper returning true if the state has reached the end of token.
+ /// </summary>
+ /// <param name="state"></param>
+ /// <returns></returns>
+ private bool TokenFinished(State state)
+ {
+ if ((state.Data[state.Index] == '"' && (flags & PHRASE_OPERATOR) != 0)
+ || (state.Data[state.Index] == '|' && (flags & OR_OPERATOR) != 0)
+ || (state.Data[state.Index] == '+' && (flags & AND_OPERATOR) != 0)
+ || (state.Data[state.Index] == '(' && (flags & PRECEDENCE_OPERATORS) != 0)
+ || (state.Data[state.Index] == ')' && (flags & PRECEDENCE_OPERATORS) != 0)
+ || ((state.Data[state.Index] == ' '
+ || state.Data[state.Index] == '\t'
+ || state.Data[state.Index] == '\n'
+ || state.Data[state.Index] == '\r') && (flags & WHITESPACE_OPERATOR) != 0))
+ {
+ return true;
+ }
+ return false;
+ }
+
+ /// <summary>
+ /// Factory method to generate a standard query (no phrase or prefix operators).
+ /// </summary>
+ /// <param name="text"></param>
+ /// <returns></returns>
+ protected virtual Query NewDefaultQuery(string text)
+ {
+ BooleanQuery bq = new BooleanQuery(true);
+ foreach (var entry in weights)
+ {
+ Query q = CreateBooleanQuery(entry.Key, text, defaultOperator);
+ if (q != null)
+ {
+ q.Boost = entry.Value;
+ bq.Add(q, BooleanClause.Occur.SHOULD);
+ }
+ }
+ return Simplify(bq);
+ }
+
+ /// <summary>
+ /// Factory method to generate a fuzzy query.
+ /// </summary>
+ /// <param name="text"></param>
+ /// <param name="fuzziness"></param>
+ /// <returns></returns>
+ protected virtual Query NewFuzzyQuery(string text, int fuzziness)
+ {
+ BooleanQuery bq = new BooleanQuery(true);
+ foreach (var entry in weights)
+ {
+ Query q = new FuzzyQuery(new Term(entry.Key, text), fuzziness);
+ if (q != null)
+ {
+ q.Boost = entry.Value;
+ bq.Add(q, BooleanClause.Occur.SHOULD);
+ }
+ }
+ return Simplify(bq);
+ }
+
+ /// <summary>
+ /// Factory method to generate a phrase query with slop.
+ /// </summary>
+ /// <param name="text"></param>
+ /// <param name="slop"></param>
+ /// <returns></returns>
+ protected virtual Query NewPhraseQuery(string text, int slop)
+ {
+ BooleanQuery bq = new BooleanQuery(true);
+ foreach (var entry in weights)
+ {
+ Query q = CreatePhraseQuery(entry.Key, text, slop);
+ if (q != null)
+ {
+ q.Boost = entry.Value;
+ bq.Add(q, BooleanClause.Occur.SHOULD);
+ }
+ }
+ return Simplify(bq);
+ }
+
+ /// <summary>
+ /// Factory method to generate a prefix query.
+ /// </summary>
+ /// <param name="text"></param>
+ /// <returns></returns>
+ protected virtual Query NewPrefixQuery(string text)
+ {
+ BooleanQuery bq = new BooleanQuery(true);
+ foreach (var entry in weights)
+ {
+ PrefixQuery prefix = new PrefixQuery(new Term(entry.Key, text));
+ prefix.Boost = entry.Value;
+ bq.Add(prefix, BooleanClause.Occur.SHOULD);
+ }
+ return Simplify(bq);
+ }
+
+ /// <summary>
+ /// Helper to simplify boolean queries with 0 or 1 clause
+ /// </summary>
+ /// <param name="bq"></param>
+ /// <returns></returns>
+ protected virtual Query Simplify(BooleanQuery bq)
+ {
+ if (!bq.Clauses.Any())
+ {
+ return null;
+ }
+ else if (bq.Clauses.Length == 1)
+ {
+ return bq.Clauses[0].Query;
+ }
+ else
+ {
+ return bq;
+ }
+ }
+
+ /// <summary>
+ /// Gets or Sets the implicit operator setting, which will be
+ /// either {@code SHOULD} or {@code MUST}.
+ /// </summary>
+ public virtual BooleanClause.Occur DefaultOperator
+ {
+ get { return defaultOperator; }
+ set { defaultOperator = value; }
+ }
+
+
+ public class State
+ {
+ //private readonly char[] data; // the characters in the query string
+ //private readonly char[] buffer; // a temporary buffer used to reduce necessary allocations
+ //private int index;
+ //private int length;
+
+ private BooleanClause.Occur currentOperation;
+ private BooleanClause.Occur previousOperation;
+ //private int not;
+
+ //private Query top;
+
+ internal State(char[] data, char[] buffer, int index, int length)
+ {
+ this.Data = data;
+ this.Buffer = buffer;
+ this.Index = index;
+ this.Length = length;
+ }
+
+ public char[] Data { get; protected set; } // the characters in the query string
+ public char[] Buffer { get; protected set; } // a temporary buffer used to reduce necessary allocations
+ public int Index { get; set; }
+ public int Length { get; protected set; }
+
+ public BooleanClause.Occur CurrentOperation
+ {
+ get
+ {
+ return currentOperation;
+ }
+ set
+ {
+ currentOperation = value;
+ CurrentOperationIsSet = true;
+ }
+ }
+
+ public BooleanClause.Occur PreviousOperation
+ {
+ get
+ {
+ return previousOperation;
+ }
+ set
+ {
+ previousOperation = value;
+ PreviousOperationIsSet = true;
+ }
+ }
+
+ public bool CurrentOperationIsSet { get; set; }
+ public bool PreviousOperationIsSet { get; set; }
+
+ public int Not { get; set; }
+ public Query Top { get; set; }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/679ad24c/src/Lucene.Net.QueryParser/Surround/Parser/CharStream.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.QueryParser/Surround/Parser/CharStream.cs b/src/Lucene.Net.QueryParser/Surround/Parser/CharStream.cs
new file mode 100644
index 0000000..bfb2fc2
--- /dev/null
+++ b/src/Lucene.Net.QueryParser/Surround/Parser/CharStream.cs
@@ -0,0 +1,134 @@
+\ufeffusing System;
+
+namespace Lucene.Net.QueryParser.Surround.Parser
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ /// <summary>
+ /// This interface describes a character stream that maintains line and
+ /// column number positions of the characters. It also has the capability
+ /// to backup the stream to some extent. An implementation of this
+ /// interface is used in the TokenManager implementation generated by
+ /// JavaCCParser.
+ ///
+ /// All the methods except backup can be implemented in any fashion. backup
+ /// needs to be implemented correctly for the correct operation of the lexer.
+ /// Rest of the methods are all used to get information like line number,
+ /// column number and the String that constitutes a token and are not used
+ /// by the lexer. Hence their implementation won't affect the generated lexer's
+ /// operation.
+ /// </summary>
+ public interface ICharStream
+ {
+ /// <summary>
+ /// Returns the next character from the selected input. The method
+ /// of selecting the input is the responsibility of the class
+ /// implementing this interface. Can throw any java.io.IOException.
+ /// </summary>
+ char ReadChar();
+
+ /// <summary>
+ /// Returns the column position of the character last read.
+ /// </summary>
+ /// <deprecated>
+ /// </deprecated>
+ /// <seealso cref="EndColumn">
+ /// </seealso>
+ [Obsolete]
+ int Column { get; }
+
+ /// <summary>
+ /// Returns the line number of the character last read.
+ /// </summary>
+ /// <deprecated>
+ /// </deprecated>
+ /// <seealso cref="EndLine">
+ /// </seealso>
+ [Obsolete]
+ int Line { get; }
+
+ /// <summary>
+ /// Returns the column number of the last character for current token (being
+ /// matched after the last call to BeginTOken).
+ /// </summary>
+ int EndColumn { get; }
+
+ /// <summary>
+ /// Returns the line number of the last character for current token (being
+ /// matched after the last call to BeginTOken).
+ /// </summary>
+ int EndLine { get; }
+
+ /// <summary>
+ /// Returns the column number of the first character for current token (being
+ /// matched after the last call to BeginTOken).
+ /// </summary>
+ int BeginColumn { get; }
+
+ /// <summary>
+ /// Returns the line number of the first character for current token (being
+ /// matched after the last call to BeginTOken).
+ /// </summary>
+ int BeginLine { get; }
+
+ /// <summary>
+ /// Backs up the input stream by amount steps. Lexer calls this method if it
+ /// had already read some characters, but could not use them to match a
+ /// (longer) token. So, they will be used again as the prefix of the next
+ /// token and it is the implemetation's responsibility to do this right.
+ /// </summary>
+ void Backup(int amount);
+
+ /// <summary>
+ /// Returns the next character that marks the beginning of the next token.
+ /// All characters must remain in the buffer between two successive calls
+ /// to this method to implement backup correctly.
+ /// </summary>
+ char BeginToken();
+
+ /// <summary>
+ /// Returns a string made up of characters from the marked token beginning
+ /// to the current buffer position. Implementations have the choice of returning
+ /// anything that they want to. For example, for efficiency, one might decide
+ /// to just return null, which is a valid implementation.
+ /// </summary>
+ string Image { get; }
+
+ /// <summary>
+ /// Returns an array of characters that make up the suffix of length 'len' for
+ /// the currently matched token. This is used to build up the matched string
+ /// for use in actions in the case of MORE. A simple and inefficient
+ /// implementation of this is as follows :
+ ///
+ /// {
+ /// String t = GetImage();
+ /// return t.substring(t.length() - len, t.length()).toCharArray();
+ /// }
+ /// </summary>
+ char[] GetSuffix(int len);
+
+ /// <summary>
+ /// The lexer calls this function to indicate that it is done with the stream
+ /// and hence implementations can free any resources held by this class.
+ /// Again, the body of this function can be just empty and it will not
+ /// affect the lexer's operation.
+ /// </summary>
+ void Done();
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/679ad24c/src/Lucene.Net.QueryParser/Surround/Parser/FastCharStream.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.QueryParser/Surround/Parser/FastCharStream.cs b/src/Lucene.Net.QueryParser/Surround/Parser/FastCharStream.cs
new file mode 100644
index 0000000..b33bd83
--- /dev/null
+++ b/src/Lucene.Net.QueryParser/Surround/Parser/FastCharStream.cs
@@ -0,0 +1,158 @@
+\ufeffusing System;
+
+namespace Lucene.Net.QueryParser.Surround.Parser
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// An efficient implementation of JavaCC's CharStream interface. <p/>Note that
+ /// this does not do line-number counting, but instead keeps track of the
+ /// character position of the token in the input, as required by Lucene's <see cref="Lucene.Net.Analysis.Token" />
+ /// API.
+ /// </summary>
+ public sealed class FastCharStream : ICharStream
+ {
+ internal char[] buffer = null;
+
+ internal int bufferLength = 0; // end of valid chars
+ internal int bufferPosition = 0; // next char to read
+
+ internal int tokenStart = 0; // offset in buffer
+ internal int bufferStart = 0; // position in file of buffer
+
+ internal System.IO.TextReader input; // source of chars
+
+ /// <summary>
+ /// Constructs from a Reader.
+ /// </summary>
+ public FastCharStream(System.IO.TextReader r)
+ {
+ input = r;
+ }
+
+ public char ReadChar()
+ {
+ if (bufferPosition >= bufferLength)
+ Refill();
+ return buffer[bufferPosition++];
+ }
+
+ private void Refill()
+ {
+ int newPosition = bufferLength - tokenStart;
+
+ if (tokenStart == 0)
+ {
+ // token won't fit in buffer
+ if (buffer == null)
+ {
+ // first time: alloc buffer
+ buffer = new char[2048];
+ }
+ else if (bufferLength == buffer.Length)
+ {
+ // grow buffer
+ char[] newBuffer = new char[buffer.Length * 2];
+ Array.Copy(buffer, 0, newBuffer, 0, bufferLength);
+ buffer = newBuffer;
+ }
+ }
+ else
+ {
+ // shift token to front
+ Array.Copy(buffer, tokenStart, buffer, 0, newPosition);
+ }
+
+ bufferLength = newPosition; // update state
+ bufferPosition = newPosition;
+ bufferStart += tokenStart;
+ tokenStart = 0;
+
+ int charsRead = input.Read(buffer, newPosition, buffer.Length - newPosition);
+ if (charsRead <= 0)
+ throw new System.IO.IOException("read past eof");
+ else
+ bufferLength += charsRead;
+ }
+
+ public char BeginToken()
+ {
+ tokenStart = bufferPosition;
+ return ReadChar();
+ }
+
+ public void Backup(int amount)
+ {
+ bufferPosition -= amount;
+ }
+
+ public string Image
+ {
+ get { return new System.String(buffer, tokenStart, bufferPosition - tokenStart); }
+ }
+
+ public char[] GetSuffix(int len)
+ {
+ char[] value_Renamed = new char[len];
+ Array.Copy(buffer, bufferPosition - len, value_Renamed, 0, len);
+ return value_Renamed;
+ }
+
+ public void Done()
+ {
+ try
+ {
+ input.Close();
+ }
+ catch (System.IO.IOException e)
+ {
+ System.Console.Error.WriteLine("Caught: " + e + "; ignoring.");
+ }
+ }
+
+ public int Column
+ {
+ get { return bufferStart + bufferPosition; }
+ }
+
+ public int Line
+ {
+ get { return 1; }
+ }
+
+ public int EndColumn
+ {
+ get { return bufferStart + bufferPosition; }
+ }
+
+ public int EndLine
+ {
+ get { return 1; }
+ }
+
+ public int BeginColumn
+ {
+ get { return bufferStart + tokenStart; }
+ }
+
+ public int BeginLine
+ {
+ get { return 1; }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/679ad24c/src/Lucene.Net.QueryParser/Surround/Parser/ParseException.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.QueryParser/Surround/Parser/ParseException.cs b/src/Lucene.Net.QueryParser/Surround/Parser/ParseException.cs
new file mode 100644
index 0000000..1716658
--- /dev/null
+++ b/src/Lucene.Net.QueryParser/Surround/Parser/ParseException.cs
@@ -0,0 +1,234 @@
+\ufeffusing System;
+using System.Text;
+
+namespace Lucene.Net.QueryParser.Surround.Parser
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// This exception is thrown when parse errors are encountered.
+ /// You can explicitly create objects of this exception type by
+ /// calling the method GenerateParseException in the generated
+ /// parser.
+ ///
+ /// You can modify this class to customize your error reporting
+ /// mechanisms so long as you retain the public fields.
+ /// </summary>
+ [Serializable]
+ public class ParseException : Exception
+ {
+ /// <summary>
+ /// This constructor is used by the method "GenerateParseException"
+ /// in the generated parser. Calling this constructor generates
+ /// a new object of this type with the fields "currentToken",
+ /// "expectedTokenSequences", and "tokenImage" set.
+ /// </summary>
+ /// <param name="currentTokenVal"></param>
+ /// <param name="expectedTokenSequencesVal"></param>
+ /// <param name="tokenImageVal"></param>
+ public ParseException(Token currentTokenVal,
+ int[][] expectedTokenSequencesVal,
+ string[] tokenImageVal)
+ : base(Initialize(currentTokenVal, expectedTokenSequencesVal, tokenImageVal))
+ {
+ currentToken = currentTokenVal;
+ expectedTokenSequences = expectedTokenSequencesVal;
+ tokenImage = tokenImageVal;
+ }
+
+ /**
+ * The following constructors are for use by you for whatever
+ * purpose you can think of. Constructing the exception in this
+ * manner makes the exception behave in the normal way - i.e., as
+ * documented in the class "Throwable". The fields "errorToken",
+ * "expectedTokenSequences", and "tokenImage" do not contain
+ * relevant information. The JavaCC generated code does not use
+ * these constructors.
+ */
+
+ public ParseException()
+ { }
+
+ public ParseException(string message)
+ : base(message)
+ { }
+
+ public ParseException(string message, Exception innerException)
+ : base(message, innerException)
+ { }
+
+
+ /// <summary>
+ /// This is the last token that has been consumed successfully. If
+ /// this object has been created due to a parse error, the token
+ /// following this token will (therefore) be the first error token.
+ /// </summary>
+ public Token currentToken;
+
+ /// <summary>
+ /// Each entry in this array is an array of integers. Each array
+ /// of integers represents a sequence of tokens (by their ordinal
+ /// values) that is expected at this point of the parse.
+ /// </summary>
+ public int[][] expectedTokenSequences;
+
+ /// <summary>
+ /// This is a reference to the "tokenImage" array of the generated
+ /// parser within which the parse error occurred. This array is
+ /// defined in the generated ...Constants interface.
+ /// </summary>
+ public string[] tokenImage;
+
+
+ /// <summary>
+ /// It uses "currentToken" and "expectedTokenSequences" to generate a parse
+ /// error message and returns it. If this object has been created
+ /// due to a parse error, and you do not catch it (it gets thrown
+ /// from the parser) the correct error message
+ /// gets displayed.
+ /// </summary>
+ /// <param name="currentToken"></param>
+ /// <param name="expectedTokenSequences"></param>
+ /// <param name="tokenImage"></param>
+ /// <returns></returns>
+ private static string Initialize(Token currentToken,
+ int[][] expectedTokenSequences,
+ string[] tokenImage)
+ {
+
+ StringBuilder expected = new StringBuilder();
+ int maxSize = 0;
+ for (int i = 0; i < expectedTokenSequences.Length; i++)
+ {
+ if (maxSize < expectedTokenSequences[i].Length)
+ {
+ maxSize = expectedTokenSequences[i].Length;
+ }
+ for (int j = 0; j < expectedTokenSequences[i].Length; j++)
+ {
+ expected.Append(tokenImage[expectedTokenSequences[i][j]]).Append(' ');
+ }
+ if (expectedTokenSequences[i][expectedTokenSequences[i].Length - 1] != 0)
+ {
+ expected.Append("...");
+ }
+ expected.Append(eol).Append(" ");
+ }
+ string retval = "Encountered \"";
+ Token tok = currentToken.next;
+ for (int i = 0; i < maxSize; i++)
+ {
+ if (i != 0)
+ retval += " ";
+ if (tok.kind == 0)
+ {
+ retval += tokenImage[0];
+ break;
+ }
+ retval += (" " + tokenImage[tok.kind]);
+ retval += " \"";
+ retval += Add_escapes(tok.image);
+ retval += " \"";
+ tok = tok.next;
+ }
+ retval += ("\" at line " + currentToken.next.beginLine + ", column " + currentToken.next.beginColumn);
+ retval += ("." + eol);
+ if (expectedTokenSequences.Length == 1)
+ {
+ retval += ("Was expecting:" + eol + " ");
+ }
+ else
+ {
+ retval += ("Was expecting one of:" + eol + " ");
+ }
+ retval += expected.ToString();
+ return retval;
+ }
+
+ /// <summary>
+ /// The end of line string for this machine.
+ /// </summary>
+ protected static string eol = Environment.NewLine;
+
+ /// <summary>
+ /// Used to convert raw characters to their escaped version
+ /// when these raw version cannot be used as part of an ASCII
+ /// string literal.
+ /// </summary>
+ internal static string Add_escapes(string str)
+ {
+ StringBuilder retval = new StringBuilder();
+ char ch;
+ for (int i = 0; i < str.Length; i++)
+ {
+ switch (str[i])
+ {
+
+ case (char)(0):
+ continue;
+
+ case '\b':
+ retval.Append("\\b");
+ continue;
+
+ case '\t':
+ retval.Append("\\t");
+ continue;
+
+ case '\n':
+ retval.Append("\\n");
+ continue;
+
+ case '\f':
+ retval.Append("\\f");
+ continue;
+
+ case '\r':
+ retval.Append("\\r");
+ continue;
+
+ case '\"':
+ retval.Append("\\\"");
+ continue;
+
+ case '\'':
+ retval.Append("\\\'");
+ continue;
+
+ case '\\':
+ retval.Append("\\\\");
+ continue;
+
+ default:
+ if ((ch = str[i]) < 0x20 || ch > 0x7e)
+ {
+ System.String s = "0000" + System.Convert.ToString(ch, 16);
+ retval.Append("\\u" + s.Substring(s.Length - 4, (s.Length) - (s.Length - 4)));
+ }
+ else
+ {
+ retval.Append(ch);
+ }
+ continue;
+
+ }
+ }
+ return retval.ToString();
+ }
+ }
+}
\ No newline at end of file