You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by di...@apache.org on 2011/07/17 18:31:31 UTC
[Lucene.Net] svn commit: r1147678 [1/2] - in
/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers: ./
AR/ Filters/ Miscellaneous/ Payloads/ Properties/ Shingle/ Shingle/Codec/
Shingle/Matrix/
Author: digy
Date: Sun Jul 17 16:31:29 2011
New Revision: 1147678
URL: http://svn.apache.org/viewvc?rev=1147678&view=rev
Log:
[LUCENENET-437] for 2.9.4g
Added:
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/ChainedFilter.cs
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/EmptyTokenStream.cs
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/InjectablePrefixAwareTokenFilter.cs
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/PrefixAwareTokenStream.cs
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/SingleTokenTokenStream.cs
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Payloads/
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Payloads/PayloadHelper.cs
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/OneDimensionalNonWeightedTokenSettingsCodec.cs
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/SimpleThreeDimensionalTokenSettingsCodec.cs
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/TokenSettingsCodec.cs
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/TwoDimensionalNonWeightedSynonymTokenSettingsCodec.cs
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/Column.cs
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/Matrix.cs
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/MatrixPermutationIterator.cs
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/Row.cs
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/ShingleAnalyzerWrapper.cs
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/ShingleFilter.cs
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/ShingleMatrixFilter.cs
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/TokenPositioner.cs
Removed:
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Filters/
Modified:
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/AR/ArabicAnalyzer.cs
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Contrib.Analyzers.csproj
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Properties/AssemblyInfo.cs
Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/AR/ArabicAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/AR/ArabicAnalyzer.cs?rev=1147678&r1=1147677&r2=1147678&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/AR/ArabicAnalyzer.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/AR/ArabicAnalyzer.cs Sun Jul 17 16:31:29 2011
@@ -80,7 +80,7 @@ namespace Lucene.Net.Analysis.AR
{
this.matchVersion = matchVersion;
- using (StreamReader reader = new StreamReader(System.Reflection.Assembly.GetAssembly(this.GetType()).GetManifestResourceStream("Lucene.Net.Analyzers.AR." + DEFAULT_STOPWORD_FILE)))
+ using (StreamReader reader = new StreamReader(System.Reflection.Assembly.GetAssembly(this.GetType()).GetManifestResourceStream("Lucene.Net.Analysis.AR." + DEFAULT_STOPWORD_FILE)))
{
while (!reader.EndOfStream)
{
Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Contrib.Analyzers.csproj
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Contrib.Analyzers.csproj?rev=1147678&r1=1147677&r2=1147678&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Contrib.Analyzers.csproj (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Contrib.Analyzers.csproj Sun Jul 17 16:31:29 2011
@@ -8,7 +8,7 @@
<ProjectGuid>{4286E961-9143-4821-B46D-3D39D3736386}</ProjectGuid>
<OutputType>Library</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
- <RootNamespace>Lucene.Net.Analyzers</RootNamespace>
+ <RootNamespace>Lucene.Net.Analysis</RootNamespace>
<AssemblyName>Lucene.Net.Analyzers</AssemblyName>
<TargetFrameworkVersion>v4.0</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
@@ -22,7 +22,7 @@
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
- <OutputPath>..\..\..\bin\contrib\Analyzers\Debug\</OutputPath>
+ <OutputPath>..\..\..\bin\contrib\Analyzers\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
@@ -60,10 +60,15 @@
<Compile Include="De\GermanStemFilter.cs" />
<Compile Include="De\GermanStemmer.cs" />
<Compile Include="De\WordlistLoader.cs" />
- <Compile Include="Filters\ChainedFilter.cs" />
+ <Compile Include="Miscellaneous\ChainedFilter.cs" />
<Compile Include="Fr\FrenchAnalyzer.cs" />
<Compile Include="Fr\FrenchStemFilter.cs" />
<Compile Include="Fr\FrenchStemmer.cs" />
+ <Compile Include="Miscellaneous\EmptyTokenStream.cs" />
+ <Compile Include="Miscellaneous\InjectablePrefixAwareTokenFilter.cs" />
+ <Compile Include="Miscellaneous\PrefixAndSuffixAwareTokenFilter.cs" />
+ <Compile Include="Miscellaneous\PrefixAwareTokenStream.cs" />
+ <Compile Include="Miscellaneous\SingleTokenTokenStream.cs" />
<Compile Include="NGram\EdgeNGramTokenFilter.cs" />
<Compile Include="NGram\EdgeNGramTokenizer.cs" />
<Compile Include="NGram\NGramTokenFilter.cs" />
@@ -72,6 +77,7 @@
<Compile Include="Nl\DutchStemFilter.cs" />
<Compile Include="Nl\DutchStemmer.cs" />
<Compile Include="Nl\WordlistLoader.cs" />
+ <Compile Include="Payloads\PayloadHelper.cs" />
<Compile Include="Ru\RussianAnalyzer.cs" />
<Compile Include="Ru\RussianCharsets.cs" />
<Compile Include="Ru\RussianLetterTokenizer.cs" />
@@ -79,6 +85,18 @@
<Compile Include="Ru\RussianStemFilter.cs" />
<Compile Include="Ru\RussianStemmer.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
+ <Compile Include="Shingle\Codec\OneDimensionalNonWeightedTokenSettingsCodec.cs" />
+ <Compile Include="Shingle\Codec\SimpleThreeDimensionalTokenSettingsCodec.cs" />
+ <Compile Include="Shingle\Codec\TokenSettingsCodec.cs" />
+ <Compile Include="Shingle\Codec\TwoDimensionalNonWeightedSynonymTokenSettingsCodec.cs" />
+ <Compile Include="Shingle\Matrix\Column.cs" />
+ <Compile Include="Shingle\Matrix\Matrix.cs" />
+ <Compile Include="Shingle\Matrix\MatrixPermutationIterator.cs" />
+ <Compile Include="Shingle\Matrix\Row.cs" />
+ <Compile Include="Shingle\ShingleAnalyzerWrapper.cs" />
+ <Compile Include="Shingle\ShingleFilter.cs" />
+ <Compile Include="Shingle\ShingleMatrixFilter.cs" />
+ <Compile Include="Shingle\TokenPositioner.cs" />
<Compile Include="WordlistLoader.cs" />
</ItemGroup>
<ItemGroup>
@@ -90,6 +108,7 @@
<Name>Lucene.Net</Name>
</ProjectReference>
</ItemGroup>
+ <ItemGroup />
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
Other similar extension points exist, see Microsoft.Common.targets.
Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/ChainedFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/ChainedFilter.cs?rev=1147678&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/ChainedFilter.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/ChainedFilter.cs Sun Jul 17 16:31:29 2011
@@ -0,0 +1,275 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+using Lucene.Net.Search;
+using Lucene.Net.Index;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Miscellaneous
+{
+
+ ///<summary>
+ ///* <p>
+ /// * Allows multiple {@link Filter}s to be chained.
+ /// * Logical operations such as <b>NOT</b> and <b>XOR</b>
+ /// * are applied between filters. One operation can be used
+ /// * for all filters, or a specific operation can be declared
+ /// * for each filter.
+ /// * </p>
+ /// * <p>
+ /// * Order in which filters are called depends on
+ /// * the position of the filter in the chain. It's probably
+ /// * more efficient to place the most restrictive filters
+ /// * /least computationally-intensive filters first.
+ /// * </p>
+ ///</summary>
+ public class ChainedFilter : Filter
+ {
+ public enum Logic
+ {
+ NONE = -1,
+ OR = 0,
+ AND = 1,
+ ANDNOT = 2,
+ XOR = 3
+ };
+
+ ///<summary>Logical operation when none is declared. Defaults to OR</summary>
+ public const Logic DEFAULT = Logic.OR;
+
+ /** The filter chain */
+ private Filter[] chain = null;
+
+ private Logic[] logicArray;
+
+ private Logic logic = Logic.NONE;
+
+ ///<summary>Ctor</summary><param name="chain">The chain of filters</param>
+ public ChainedFilter(Filter[] chain)
+ {
+ this.chain = chain;
+ }
+
+ ///<summary>ctor</summary>
+ ///<param name="chain">The chain of filters</param>
+ ///<param name="logicArray">Logical operations to apply between filters</param>
+ public ChainedFilter(Filter[] chain, Logic[] logicArray)
+ {
+ this.chain = chain;
+ this.logicArray = logicArray;
+ }
+
+ ///<summary>ctor</summary>
+ ///<param name="chain">The chain of filters</param>
+ ///<param name="logic">Logical operation to apply to ALL filters</param>
+ public ChainedFilter(Filter[] chain, Logic logic)
+ {
+ this.chain = chain;
+ this.logic = logic;
+ }
+
+ ///<see cref="Filter#getDocIdSet"/>
+ public override DocIdSet GetDocIdSet(IndexReader reader)
+ {
+ int[] index = new int[1]; // use array as reference to modifiable int;
+ index[0] = 0; // an object attribute would not be thread safe.
+ if (logic != Logic.NONE)
+ return GetDocIdSet(reader, logic, index);
+ else if (logicArray != null)
+ return GetDocIdSet(reader, logicArray, index);
+ else
+ return GetDocIdSet(reader, DEFAULT, index);
+ }
+
+ private DocIdSetIterator GetDISI(Filter filter, IndexReader reader)
+ {
+ DocIdSet docIdSet = filter.GetDocIdSet(reader);
+ if (docIdSet == null)
+ {
+ return DocIdSet.EMPTY_DOCIDSET.Iterator();
+ }
+ else
+ {
+ DocIdSetIterator iter = docIdSet.Iterator();
+ if (iter == null)
+ {
+ return DocIdSet.EMPTY_DOCIDSET.Iterator();
+ }
+ else
+ {
+ return iter;
+ }
+ }
+ }
+
+ private OpenBitSetDISI InitialResult(IndexReader reader, Logic logic, int[] index)
+ {
+ OpenBitSetDISI result;
+ /**
+ * First AND operation takes place against a completely false
+ * bitset and will always return zero results.
+ */
+ if (logic == Logic.AND)
+ {
+ result = new OpenBitSetDISI(GetDISI(chain[index[0]], reader), reader.MaxDoc());
+ ++index[0];
+ }
+ else if (logic == Logic.ANDNOT)
+ {
+ result = new OpenBitSetDISI(GetDISI(chain[index[0]], reader), reader.MaxDoc());
+ result.Flip(0, reader.MaxDoc()); // NOTE: may set bits for deleted docs.
+ ++index[0];
+ }
+ else
+ {
+ result = new OpenBitSetDISI(reader.MaxDoc());
+ }
+ return result;
+ }
+
+
+ ///<summary>
+ /// * Provide a SortedVIntList when it is definitely
+ /// * smaller than an OpenBitSet
+ /// * @deprecated Either use CachingWrapperFilter, or
+ /// * switch to a different DocIdSet implementation yourself.
+ /// * This method will be removed in Lucene 4.0
+ ///</summary>
+ protected DocIdSet FinalResult(OpenBitSetDISI result, int maxDocs)
+ {
+ return result;
+ }
+
+
+ /**
+ * Delegates to each filter in the chain.
+ * @param reader IndexReader
+ * @param logic Logical operation
+ * @return DocIdSet
+ */
+ private DocIdSet GetDocIdSet(IndexReader reader, Logic logic, int[] index)
+ {
+ OpenBitSetDISI result = InitialResult(reader, logic, index);
+ for (; index[0] < chain.Length; index[0]++)
+ {
+ DoChain(result, logic, chain[index[0]].GetDocIdSet(reader));
+ }
+ return FinalResult(result, reader.MaxDoc());
+ }
+
+ /**
+ * Delegates to each filter in the chain.
+ * @param reader IndexReader
+ * @param logic Logical operation
+ * @return DocIdSet
+ */
+ private DocIdSet GetDocIdSet(IndexReader reader, Logic[] logic, int[] index)
+ {
+ if (logic.Length != chain.Length)
+ throw new ArgumentException("Invalid number of elements in logic array");
+
+ OpenBitSetDISI result = InitialResult(reader, logic[0], index);
+ for (; index[0] < chain.Length; index[0]++)
+ {
+ DoChain(result, logic[index[0]], chain[index[0]].GetDocIdSet(reader));
+ }
+ return FinalResult(result, reader.MaxDoc());
+ }
+
+ public override String ToString()
+ {
+ StringBuilder sb = new StringBuilder();
+ sb.Append("ChainedFilter: [");
+ for (int i = 0; i < chain.Length; i++)
+ {
+ sb.Append(chain[i]);
+ sb.Append(' ');
+ }
+ sb.Append(']');
+ return sb.ToString();
+ }
+
+ private void DoChain(OpenBitSetDISI result, Logic logic, DocIdSet dis)
+ {
+
+ if (dis is OpenBitSet)
+ {
+ // optimized case for OpenBitSets
+ switch (logic)
+ {
+ case Logic.OR:
+ result.Or((OpenBitSet)dis);
+ break;
+ case Logic.AND:
+ result.And((OpenBitSet)dis);
+ break;
+ case Logic.ANDNOT:
+ result.AndNot((OpenBitSet)dis);
+ break;
+ case Logic.XOR:
+ result.Xor((OpenBitSet)dis);
+ break;
+ default:
+ DoChain(result, DEFAULT, dis);
+ break;
+ }
+ }
+ else
+ {
+ DocIdSetIterator disi;
+ if (dis == null)
+ {
+ disi = DocIdSet.EMPTY_DOCIDSET.Iterator();
+ }
+ else
+ {
+ disi = dis.Iterator();
+ if (disi == null)
+ {
+ disi = DocIdSet.EMPTY_DOCIDSET.Iterator();
+ }
+ }
+
+ switch (logic)
+ {
+ case Logic.OR:
+ result.InPlaceOr(disi);
+ break;
+ case Logic.AND:
+ result.InPlaceAnd(disi);
+ break;
+ case Logic.ANDNOT:
+ result.InPlaceNot(disi);
+ break;
+ case Logic.XOR:
+ result.InPlaceXor(disi);
+ break;
+ default:
+ DoChain(result, DEFAULT, dis);
+ break;
+ }
+ }
+ }
+
+ }
+
+}
\ No newline at end of file
Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/EmptyTokenStream.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/EmptyTokenStream.cs?rev=1147678&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/EmptyTokenStream.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/EmptyTokenStream.cs Sun Jul 17 16:31:29 2011
@@ -0,0 +1,31 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis;
+
+namespace Lucene.Net.Analysis.Miscellaneous
+{
+ public class EmptyTokenStream : TokenStream
+ {
+ [Obsolete("The new IncrementToken() and AttributeSource APIs should be used instead.")]
+ public override Token Next(Token reusableToken)
+ {
+ return null;
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/InjectablePrefixAwareTokenFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/InjectablePrefixAwareTokenFilter.cs?rev=1147678&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/InjectablePrefixAwareTokenFilter.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/InjectablePrefixAwareTokenFilter.cs Sun Jul 17 16:31:29 2011
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis;
+
+namespace Lucene.Net.Analysis.Miscellaneous
+{
+ public class InjectablePrefixAwareTokenFilter : PrefixAwareTokenFilter
+ {
+ public InjectablePrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(prefix, suffix)
+ {
+ }
+
+ public Func<Token, Token, Token> UpdateAction { get; set; }
+
+ public override Token UpdateSuffixToken(Token suffixToken, Token lastPrefixToken)
+ {
+ return UpdateAction(suffixToken, lastPrefixToken);
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs?rev=1147678&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs Sun Jul 17 16:31:29 2011
@@ -0,0 +1,98 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis;
+
+namespace Lucene.Net.Analysis.Miscellaneous
+{
+ /// <summary>
+ /// Links two PrefixAwareTokenFilter.
+ /// <p/>
+ /// <b>NOTE:</b> This filter might not behave correctly if used with custom Attributes, i.e. Attributes other than
+ /// the ones located in Lucene.Net.Analysis.Tokenattributes.
+ /// </summary>
+ public class PrefixAndSuffixAwareTokenFilter : TokenStream
+ {
+ private readonly PrefixAwareTokenFilter _suffix;
+
+ public PrefixAndSuffixAwareTokenFilter(TokenStream prefix, TokenStream input, TokenStream suffix) : base(suffix)
+ {
+ _suffix =
+ new InjectablePrefixAwareTokenFilter(
+ new InjectablePrefixAwareTokenFilter(prefix, input)
+ {
+ UpdateAction = UpdateInputToken
+ },
+ suffix)
+ {
+ UpdateAction = UpdateSuffixToken
+ };
+ }
+
+ public Token UpdateInputToken(Token inputToken, Token lastPrefixToken)
+ {
+ inputToken.SetStartOffset(lastPrefixToken.EndOffset() + inputToken.StartOffset());
+ inputToken.SetEndOffset(lastPrefixToken.EndOffset() + inputToken.EndOffset());
+ return inputToken;
+ }
+
+ public Token UpdateSuffixToken(Token suffixToken, Token lastInputToken)
+ {
+ suffixToken.SetStartOffset(lastInputToken.EndOffset() + suffixToken.StartOffset());
+ suffixToken.SetEndOffset(lastInputToken.EndOffset() + suffixToken.EndOffset());
+ return suffixToken;
+ }
+
+
+ public override sealed bool IncrementToken()
+ {
+ return _suffix.IncrementToken();
+ }
+
+ /// <summary>
+ /// @deprecated Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.
+ /// </summary>
+ /// <param name="reusableToken"></param>
+ /// <returns></returns>
+ [Obsolete("The new IncrementToken() and AttributeSource APIs should be used instead.")]
+ public override sealed Token Next(Token reusableToken)
+ {
+ return base.Next(reusableToken);
+ }
+
+ /// <summary>
+ /// @deprecated Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.
+ /// </summary>
+ [Obsolete("The returned Token is a \"full private copy\" (not re-used across calls to Next()) but will be slower than calling {@link #Next(Token)} or using the new IncrementToken() method with the new AttributeSource API.")]
+ public override sealed Token Next()
+ {
+ return base.Next();
+ }
+
+ public override void Reset()
+ {
+ _suffix.Reset();
+ }
+
+
+ public override void Close()
+ {
+ _suffix.Close();
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/PrefixAwareTokenStream.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/PrefixAwareTokenStream.cs?rev=1147678&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/PrefixAwareTokenStream.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/PrefixAwareTokenStream.cs Sun Jul 17 16:31:29 2011
@@ -0,0 +1,207 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Index;
+using FlagsAttribute = Lucene.Net.Analysis.Tokenattributes.FlagsAttribute;
+
+namespace Lucene.Net.Analysis.Miscellaneous
+{
+ /// <summary>
+ /// Joins two token streams and leaves the last token of the first stream available
+ /// to be used when updating the token values in the second stream based on that token.
+ ///
+ /// The default implementation adds last prefix token end offset to the suffix token start and end offsets.
+ /// <p/>
+ /// <b>NOTE:</b> This filter might not behave correctly if used with custom Attributes, i.e. Attributes other than
+ /// the ones located in Lucene.Net.Analysis.TokenAttributes.
+ /// </summary>
+ public class PrefixAwareTokenFilter : TokenStream
+ {
+ private readonly FlagsAttribute _flagsAtt;
+ private readonly OffsetAttribute _offsetAtt;
+ private readonly FlagsAttribute _pFlagsAtt;
+
+ private readonly OffsetAttribute _pOffsetAtt;
+ private readonly PayloadAttribute _pPayloadAtt;
+ private readonly PositionIncrementAttribute _pPosIncrAtt;
+ private readonly TermAttribute _pTermAtt;
+ private readonly TypeAttribute _pTypeAtt;
+ private readonly PayloadAttribute _payloadAtt;
+ private readonly PositionIncrementAttribute _posIncrAtt;
+
+ private readonly Token _previousPrefixToken = new Token();
+ private readonly Token _reusableToken = new Token();
+ private readonly TermAttribute _termAtt;
+ private readonly TypeAttribute _typeAtt;
+
+ private bool _prefixExhausted;
+
+ public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(suffix)
+ {
+ Suffix = suffix;
+ Prefix = prefix;
+ _prefixExhausted = false;
+
+ // ReSharper disable DoNotCallOverridableMethodsInConstructor
+ _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute));
+ _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute));
+ _payloadAtt = (PayloadAttribute) AddAttribute(typeof (PayloadAttribute));
+ _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute));
+ _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute));
+ _flagsAtt = (FlagsAttribute) AddAttribute(typeof (FlagsAttribute));
+ // ReSharper restore DoNotCallOverridableMethodsInConstructor
+
+ _pTermAtt = (TermAttribute) prefix.AddAttribute(typeof (TermAttribute));
+ _pPosIncrAtt = (PositionIncrementAttribute) prefix.AddAttribute(typeof (PositionIncrementAttribute));
+ _pPayloadAtt = (PayloadAttribute) prefix.AddAttribute(typeof (PayloadAttribute));
+ _pOffsetAtt = (OffsetAttribute) prefix.AddAttribute(typeof (OffsetAttribute));
+ _pTypeAtt = (TypeAttribute) prefix.AddAttribute(typeof (TypeAttribute));
+ _pFlagsAtt = (FlagsAttribute) prefix.AddAttribute(typeof (FlagsAttribute));
+ }
+
+ public TokenStream Prefix { get; set; }
+
+ public TokenStream Suffix { get; set; }
+
+ public override sealed bool IncrementToken()
+ {
+ if (!_prefixExhausted)
+ {
+ Token nextToken = GetNextPrefixInputToken(_reusableToken);
+ if (nextToken == null)
+ {
+ _prefixExhausted = true;
+ }
+ else
+ {
+ _previousPrefixToken.Reinit(nextToken);
+ // Make it a deep copy
+ Payload p = _previousPrefixToken.GetPayload();
+ if (p != null)
+ {
+ _previousPrefixToken.SetPayload((Payload) p.Clone());
+ }
+ SetCurrentToken(nextToken);
+ return true;
+ }
+ }
+
+ Token nextSuffixToken = GetNextSuffixInputToken(_reusableToken);
+ if (nextSuffixToken == null)
+ {
+ return false;
+ }
+
+ nextSuffixToken = UpdateSuffixToken(nextSuffixToken, _previousPrefixToken);
+ SetCurrentToken(nextSuffixToken);
+ return true;
+ }
+
+ /// <summary>
+ /// @deprecated Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.
+ /// </summary>
+ /// <param name="reusableToken"></param>
+ /// <returns></returns>
+ [Obsolete("The new IncrementToken() and AttributeSource APIs should be used instead.")]
+ public override sealed Token Next(Token reusableToken)
+ {
+ return base.Next(reusableToken);
+ }
+
+ /// <summary>
+ /// @deprecated Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.
+ /// </summary>
+ /// <returns></returns>
+ [Obsolete("The returned Token is a \"full private copy\" (not re-used across calls to Next()) but will be slower than calling {@link #Next(Token)} or using the new IncrementToken() method with the new AttributeSource API.")]
+ public override sealed Token Next()
+ {
+ return base.Next();
+ }
+
+ private void SetCurrentToken(Token token)
+ {
+ if (token == null) return;
+ ClearAttributes();
+ _termAtt.SetTermBuffer(token.TermBuffer(), 0, token.TermLength());
+ _posIncrAtt.SetPositionIncrement(token.GetPositionIncrement());
+ _flagsAtt.SetFlags(token.GetFlags());
+ _offsetAtt.SetOffset(token.StartOffset(), token.EndOffset());
+ _typeAtt.SetType(token.Type());
+ _payloadAtt.SetPayload(token.GetPayload());
+ }
+
+ private Token GetNextPrefixInputToken(Token token)
+ {
+ if (!Prefix.IncrementToken()) return null;
+ token.SetTermBuffer(_pTermAtt.TermBuffer(), 0, _pTermAtt.TermLength());
+ token.SetPositionIncrement(_pPosIncrAtt.GetPositionIncrement());
+ token.SetFlags(_pFlagsAtt.GetFlags());
+ token.SetOffset(_pOffsetAtt.StartOffset(), _pOffsetAtt.EndOffset());
+ token.SetType(_pTypeAtt.Type());
+ token.SetPayload(_pPayloadAtt.GetPayload());
+ return token;
+ }
+
+ private Token GetNextSuffixInputToken(Token token)
+ {
+ if (!Suffix.IncrementToken()) return null;
+ token.SetTermBuffer(_termAtt.TermBuffer(), 0, _termAtt.TermLength());
+ token.SetPositionIncrement(_posIncrAtt.GetPositionIncrement());
+ token.SetFlags(_flagsAtt.GetFlags());
+ token.SetOffset(_offsetAtt.StartOffset(), _offsetAtt.EndOffset());
+ token.SetType(_typeAtt.Type());
+ token.SetPayload(_payloadAtt.GetPayload());
+ return token;
+ }
+
+ /// <summary>
+ /// The default implementation adds last prefix token end offset to the suffix token start and end offsets.
+ /// </summary>
+ /// <param name="suffixToken">a token from the suffix stream</param>
+ /// <param name="lastPrefixToken">the last token from the prefix stream</param>
+ /// <returns>consumer token</returns>
+ public virtual Token UpdateSuffixToken(Token suffixToken, Token lastPrefixToken)
+ {
+ suffixToken.SetStartOffset(lastPrefixToken.EndOffset() + suffixToken.StartOffset());
+ suffixToken.SetEndOffset(lastPrefixToken.EndOffset() + suffixToken.EndOffset());
+ return suffixToken;
+ }
+
+ public override void Close()
+ {
+ Prefix.Close();
+ Suffix.Close();
+ }
+
+ public override void Reset()
+ {
+ base.Reset();
+
+ if (Prefix != null)
+ {
+ _prefixExhausted = false;
+ Prefix.Reset();
+ }
+
+ if (Suffix != null)
+ Suffix.Reset();
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/SingleTokenTokenStream.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/SingleTokenTokenStream.cs?rev=1147678&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/SingleTokenTokenStream.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/SingleTokenTokenStream.cs Sun Jul 17 16:31:29 2011
@@ -0,0 +1,101 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Diagnostics;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Miscellaneous
+{
+ /// <summary>
+ /// A TokenStream containing a single token.
+ /// </summary>
+ public class SingleTokenTokenStream : TokenStream
+ {
+ private readonly AttributeImpl _tokenAtt;
+ private bool _exhausted;
+
+ // The token needs to be immutable, so work with clones!
+ private Token _singleToken;
+
+ public SingleTokenTokenStream(Token token)
+ {
+ Debug.Assert(token != null, "Token was null!");
+ _singleToken = (Token) token.Clone();
+
+ // ReSharper disable DoNotCallOverridableMethodsInConstructor
+ _tokenAtt = (AttributeImpl) AddAttribute(typeof (TermAttribute));
+ // ReSharper restore DoNotCallOverridableMethodsInConstructor
+
+ Debug.Assert(_tokenAtt is Token || _tokenAtt.GetType().Name.Equals(typeof (TokenWrapper).Name),
+ "Token Attribute is the wrong type! Type was: " + _tokenAtt.GetType().Name + " but expected " +
+ typeof (TokenWrapper).Name);
+ }
+
+ public override sealed bool IncrementToken()
+ {
+ if (_exhausted)
+ return false;
+
+ ClearAttributes();
+ _singleToken.CopyTo(_tokenAtt);
+ _exhausted = true;
+
+ return true;
+ }
+
+ /// <summary>
+ /// @deprecated Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.
+ /// </summary>
+ /// <param name="reusableToken"></param>
+ /// <returns></returns>
+ [Obsolete("The new IncrementToken() and AttributeSource APIs should be used instead.")]
+ public override sealed Token Next(Token reusableToken)
+ {
+ return base.Next(reusableToken);
+ }
+
+ /// <summary>
+ /// @deprecated Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.
+ /// </summary>
+ /// <returns></returns>
+ [Obsolete(
+ "The returned Token is a \"full private copy\" (not re-used across calls to Next()) but will be slower than calling {@link #Next(Token)} or using the new IncrementToken() method with the new AttributeSource API."
+ )]
+ public override sealed Token Next()
+ {
+ return base.Next();
+ }
+
+ public override void Reset()
+ {
+ _exhausted = false;
+ }
+
+ public Token GetToken()
+ {
+ return (Token) _singleToken.Clone();
+ }
+
+ public void SetToken(Token token)
+ {
+ _singleToken = (Token) token.Clone();
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Payloads/PayloadHelper.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Payloads/PayloadHelper.cs?rev=1147678&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Payloads/PayloadHelper.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Payloads/PayloadHelper.cs Sun Jul 17 16:31:29 2011
@@ -0,0 +1,92 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Payloads
+{
+ /// <summary>
+ /// Utility methods for encoding payloads.
+ /// </summary>
+ public static class PayloadHelper
+ {
+ public static byte[] EncodeFloat(float payload)
+ {
+ return EncodeFloat(payload, new byte[4], 0);
+ }
+
+ public static byte[] EncodeFloat(float payload, byte[] data, int offset)
+ {
+ return EncodeInt(FloatToIntBits(payload), data, offset);
+ }
+
+ public static byte[] EncodeInt(int payload)
+ {
+ return EncodeInt(payload, new byte[4], 0);
+ }
+
+ public static byte[] EncodeInt(int payload, byte[] data, int offset)
+ {
+ data[offset] = (byte) (payload >> 24);
+ data[offset + 1] = (byte) (payload >> 16);
+ data[offset + 2] = (byte) (payload >> 8);
+ data[offset + 3] = (byte) payload;
+ return data;
+ }
+
+ /// <summary>
+ /// <p>Decode the payload that was encoded using encodeFloat(float)</p>
+ /// <p>NOTE: the length of the array must be at least offset + 4 long.</p>
+ /// </summary>
+ /// <param name="bytes">The bytes to decode</param>
+ /// <returns>the decoded float</returns>
+ public static float DecodeFloat(byte[] bytes)
+ {
+ return DecodeFloat(bytes, 0);
+ }
+
+ /// <summary>
+ /// <p>Decode the payload that was encoded using encodeFloat(float)</p>
+ /// <p>NOTE: the length of the array must be at least offset + 4 long.</p>
+ /// </summary>
+ /// <param name="bytes">The bytes to decode</param>
+ /// <param name="offset">The offset into the array.</param>
+ /// <returns>The float that was encoded</returns>
+ public static float DecodeFloat(byte[] bytes, int offset)
+ {
+ return IntBitsToFloat(DecodeInt(bytes, offset));
+ }
+
+ public static int DecodeInt(byte[] bytes, int offset)
+ {
+ return ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF) << 16)
+ | ((bytes[offset + 2] & 0xFF) << 8) | (bytes[offset + 3] & 0xFF);
+ }
+
+
+ static int FloatToIntBits(float value)
+ {
+ return BitConverter.ToInt32(BitConverter.GetBytes(value), 0);
+ }
+
+ static float IntBitsToFloat(int value)
+ {
+ return BitConverter.ToSingle(BitConverter.GetBytes(value), 0);
+ }
+ }
+}
\ No newline at end of file
Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Properties/AssemblyInfo.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Properties/AssemblyInfo.cs?rev=1147678&r1=1147677&r2=1147678&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Properties/AssemblyInfo.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Properties/AssemblyInfo.cs Sun Jul 17 16:31:29 2011
@@ -5,11 +5,11 @@ using System.Runtime.InteropServices;
// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
-[assembly: AssemblyTitle("Lucene.Net.Contrib.Analyzers")]
+[assembly: AssemblyTitle("Lucene.Net.Analyzers")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("The Apache Software Foundation")]
-[assembly: AssemblyProduct("Lucene.Net.Contrib.Analyzers")]
+[assembly: AssemblyProduct("Lucene.Net.Analyzers")]
[assembly: AssemblyCopyright("Copyright 2006 - 2011 The Apache Software Foundation")]
[assembly: AssemblyTrademark("Copyright 2006 - 2011 The Apache Software Foundation")]
[assembly: AssemblyCulture("")]
@@ -32,5 +32,5 @@ using System.Runtime.InteropServices;
// You can specify all the values or you can default the Build and Revision Numbers
// by using the '*' as shown below:
// [assembly: AssemblyVersion("1.0.*")]
-[assembly: AssemblyVersion("2.9.2.1")]
-[assembly: AssemblyFileVersion("2.9.2.1")]
+[assembly: AssemblyVersion("2.9.4.2")]
+[assembly: AssemblyFileVersion("2.9.4.2")]
Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/OneDimensionalNonWeightedTokenSettingsCodec.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/OneDimensionalNonWeightedTokenSettingsCodec.cs?rev=1147678&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/OneDimensionalNonWeightedTokenSettingsCodec.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/OneDimensionalNonWeightedTokenSettingsCodec.cs Sun Jul 17 16:31:29 2011
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Analysis;
+
+namespace Lucene.Net.Analysis.Shingle.Codec
+{
+ /// <summary>
+ /// Using this codec makes a ShingleMatrixFilter act like ShingleFilter.
+ /// It produces the most simple sort of shingles, ignoring token position increments, etc.
+ ///
+ /// It adds each token as a new column.
+ /// </summary>
+ public class OneDimensionalNonWeightedTokenSettingsCodec : TokenSettingsCodec
+ {
+ public override TokenPositioner GetTokenPositioner(Token token)
+ {
+ return TokenPositioner.NewColumn;
+ }
+
+ public override void SetTokenPositioner(Token token, TokenPositioner tokenPositioner)
+ {
+ }
+
+ public override float GetWeight(Token token)
+ {
+ return 1f;
+ }
+
+ public override void SetWeight(Token token, float weight)
+ {
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/SimpleThreeDimensionalTokenSettingsCodec.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/SimpleThreeDimensionalTokenSettingsCodec.cs?rev=1147678&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/SimpleThreeDimensionalTokenSettingsCodec.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/SimpleThreeDimensionalTokenSettingsCodec.cs Sun Jul 17 16:31:29 2011
@@ -0,0 +1,92 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.IO;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Payloads;
+using Lucene.Net.Index;
+
+namespace Lucene.Net.Analysis.Shingle.Codec
+{
+ /// <summary>
+ /// A full featured codec not to be used for something serious.
+ ///
+ /// It takes complete control of
+ /// payload for weight
+ /// and the bit flags for positioning in the matrix.
+ ///
+ /// Mainly exist for demonstrational purposes.
+ /// </summary>
+ public class SimpleThreeDimensionalTokenSettingsCodec : TokenSettingsCodec
+ {
+ /// <summary>
+ ///
+ /// </summary>
+ /// <param name="token"></param>
+ /// <returns>the token flags int value as TokenPosition</returns>
+ public override TokenPositioner GetTokenPositioner(Token token)
+ {
+ switch (token.GetFlags())
+ {
+ case 0:
+ return TokenPositioner.NewColumn;
+ case 1:
+ return TokenPositioner.NewRow;
+ case 2:
+ return TokenPositioner.SameRow;
+ }
+ throw new IOException("Unknown matrix positioning of token " + token);
+ }
+
+ /// <summary>
+ /// Sets the TokenPositioner as token flags int value.
+ /// </summary>
+ /// <param name="token"></param>
+ /// <param name="tokenPositioner"></param>
+ public override void SetTokenPositioner(Token token, TokenPositioner tokenPositioner)
+ {
+ token.SetFlags(tokenPositioner.Index);
+ }
+
+ /// <summary>
+ /// Returns a 32 bit float from the payload, or 1f it null.
+ /// </summary>
+ /// <param name="token"></param>
+ /// <returns></returns>
+ public override float GetWeight(Token token)
+ {
+ if (token.GetPayload() == null || token.GetPayload().GetData() == null)
+ return 1f;
+
+ return PayloadHelper.DecodeFloat(token.GetPayload().GetData());
+ }
+
+ /// <summary>
+ /// Stores a 32 bit float in the payload, or set it to null if 1f;
+ /// </summary>
+ /// <param name="token"></param>
+ /// <param name="weight"></param>
+ public override void SetWeight(Token token, float weight)
+ {
+ token.SetPayload(
+ weight == 1f
+ ? null
+ : new Payload(PayloadHelper.EncodeFloat(weight))
+ );
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/TokenSettingsCodec.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/TokenSettingsCodec.cs?rev=1147678&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/TokenSettingsCodec.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/TokenSettingsCodec.cs Sun Jul 17 16:31:29 2011
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Analysis;
+
+namespace Lucene.Net.Analysis.Shingle.Codec
+{
+ /// <summary>
+ /// Strategy used to code and decode meta data of the tokens from the input stream
+ /// regarding how to position the tokens in the matrix, set and retreive weight, etc.
+ /// </summary>
+ public abstract class TokenSettingsCodec
+ {
+ /// <summary>
+ /// Retrieves information on how a Token is to be inserted to a ShingleMatrixFilter.Matrix.
+ /// </summary>
+ /// <param name="token"></param>
+ /// <returns></returns>
+ public abstract TokenPositioner GetTokenPositioner(Token token);
+
+ /// <summary>
+ /// Sets information on how a Token is to be inserted to a ShingleMatrixFilter.Matrix.
+ /// </summary>
+ /// <param name="token"></param>
+ /// <param name="tokenPositioner"></param>
+ public abstract void SetTokenPositioner(Token token, TokenPositioner tokenPositioner);
+
+ /// <summary>
+ /// Have this method return 1f in order to 'disable' weights.
+ /// </summary>
+ /// <param name="token"></param>
+ /// <returns></returns>
+ public abstract float GetWeight(Token token);
+
+ /// <summary>
+ /// Have this method do nothing in order to 'disable' weights.
+ /// </summary>
+ /// <param name="token"></param>
+ /// <param name="weight"></param>
+ public abstract void SetWeight(Token token, float weight);
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/TwoDimensionalNonWeightedSynonymTokenSettingsCodec.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/TwoDimensionalNonWeightedSynonymTokenSettingsCodec.cs?rev=1147678&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/TwoDimensionalNonWeightedSynonymTokenSettingsCodec.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/TwoDimensionalNonWeightedSynonymTokenSettingsCodec.cs Sun Jul 17 16:31:29 2011
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis;
+
+namespace Lucene.Net.Analysis.Shingle.Codec
+{
+ /// <summary>
+ /// A codec that creates a two dimensional matrix
+ /// by treating tokens from the input stream with 0 position increment
+ /// as new rows to the current column.
+ /// </summary>
+ public class TwoDimensionalNonWeightedSynonymTokenSettingsCodec : TokenSettingsCodec
+ {
+ public override TokenPositioner GetTokenPositioner(Token token)
+ {
+ return
+ token.GetPositionIncrement() == 0
+ ? TokenPositioner.NewRow
+ : TokenPositioner.NewColumn;
+ }
+
+ public override void SetTokenPositioner(Token token, TokenPositioner tokenPositioner)
+ {
+ throw new NotSupportedException();
+ }
+
+ public override float GetWeight(Token token)
+ {
+ return 1f;
+ }
+
+ public override void SetWeight(Token token, float weight)
+ {
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/Column.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/Column.cs?rev=1147678&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/Column.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/Column.cs Sun Jul 17 16:31:29 2011
@@ -0,0 +1,68 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Analysis;
+
+namespace Lucene.Net.Analysis.Shingle.Matrix
+{
+ public class Column
+ {
+ public Column(Token token, Matrix enclosingInstance)
+ : this(enclosingInstance)
+ {
+ var row = new Row(this);
+ row.Tokens.AddLast(token);
+ }
+
+ public Column(Matrix enclosingInstance)
+ {
+ Rows = new List<Row>();
+ Matrix = enclosingInstance;
+
+ lock (Matrix)
+ {
+ if (Matrix.Columns.Count == 0)
+ IsFirst = true;
+ }
+ Matrix.Columns.Add(this);
+ }
+
+ public Matrix Matrix { get; private set; }
+
+ public List<Row> Rows { get; private set; }
+
+ public int Index
+ {
+ get { return Matrix.Columns.IndexOf(this); }
+ }
+
+ public bool IsFirst { get; set; }
+
+ public bool IsLast { get; set; }
+
+ public override String ToString()
+ {
+ return "Column{" +
+ "first=" + IsFirst +
+ ", last=" + IsLast +
+ ", rows=" + Rows +
+ '}';
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/Matrix.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/Matrix.cs?rev=1147678&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/Matrix.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/Matrix.cs Sun Jul 17 16:31:29 2011
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis.Shingle.Matrix
+{
+ /// <summary>
+ /// A column focused matrix in three dimensions:
+ ///
+ /// <pre>
+ /// Token[column][row][z-axis] {
+ /// {{hello}, {greetings, and, salutations}},
+ /// {{world}, {earth}, {tellus}}
+ /// };
+ /// </pre>
+ ///
+ /// todo consider row groups
+ /// to indicate that shingles is only to contain permutations with texts in that same row group.
+ /// </summary>
+ public class Matrix
+ {
+ public Matrix()
+ {
+ Columns = new List<Column>();
+ }
+
+ public List<Column> Columns { get; private set; }
+
+ public MatrixPermutationIterator PermutationIterator()
+ {
+ return new MatrixPermutationIterator(this);
+ }
+
+ public override string ToString()
+ {
+ return "Matrix{" +
+ "columns=" + Columns +
+ '}';
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/MatrixPermutationIterator.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/MatrixPermutationIterator.cs?rev=1147678&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/MatrixPermutationIterator.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/MatrixPermutationIterator.cs Sun Jul 17 16:31:29 2011
@@ -0,0 +1,72 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Analysis.Shingle.Matrix
+{
+ public class MatrixPermutationIterator
+ {
+ private readonly Matrix _enclosingInstance;
+
+ private readonly int[] _columnRowCounters;
+
+ public MatrixPermutationIterator(Matrix enclosingInstance)
+ {
+ _enclosingInstance = enclosingInstance;
+ _columnRowCounters = new int[_enclosingInstance.Columns.Count];
+ }
+
+ public bool HasNext()
+ {
+ var s = _columnRowCounters.Length;
+ var n = _enclosingInstance.Columns.Count;
+ return s != 0 && n >= s && _columnRowCounters[s - 1] < _enclosingInstance.Columns[s - 1].Rows.Count;
+ }
+
+ public Row[] Next()
+ {
+ if (!HasNext())
+ throw new Exception("no more elements");
+
+ var rows = new Row[_columnRowCounters.Length];
+
+ for (int i = 0; i < _columnRowCounters.Length; i++)
+ {
+ rows[i] = _enclosingInstance.Columns[i].Rows[_columnRowCounters[i]];
+ }
+
+ IncrementColumnRowCounters();
+
+ return rows;
+ }
+
+ private void IncrementColumnRowCounters()
+ {
+ for (int i = 0; i < _columnRowCounters.Length; i++)
+ {
+ _columnRowCounters[i]++;
+
+ if (_columnRowCounters[i] != _enclosingInstance.Columns[i].Rows.Count ||
+ i >= _columnRowCounters.Length - 1)
+ break;
+
+ _columnRowCounters[i] = 0;
+ }
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/Row.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/Row.cs?rev=1147678&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/Row.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/Row.cs Sun Jul 17 16:31:29 2011
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Collections.Generic;
+using Lucene.Net.Analysis;
+
+namespace Lucene.Net.Analysis.Shingle.Matrix
+{
+ public class Row
+ {
+ public Row(Column enclosingInstance)
+ {
+ Tokens = new LinkedList<Token>();
+ Column = enclosingInstance;
+ Column.Rows.Add(this);
+ }
+
+ public Column Column { get; private set; }
+
+ public int Index
+ {
+ get { return Column.Rows.IndexOf(this); }
+ }
+
+ public LinkedList<Token> Tokens { get; set; }
+
+ public override string ToString()
+ {
+ return "Row{" +
+ "index=" + Index +
+ ", tokens=" + Tokens +
+ '}';
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/ShingleAnalyzerWrapper.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/ShingleAnalyzerWrapper.cs?rev=1147678&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/ShingleAnalyzerWrapper.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/ShingleAnalyzerWrapper.cs Sun Jul 17 16:31:29 2011
@@ -0,0 +1,167 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Standard;
+
+namespace Lucene.Net.Analysis.Shingle
+{
+ /// <summary>
+ /// A ShingleAnalyzerWrapper wraps a ShingleFilter around another Analyzer.
+ ///
+ /// <p>A shingle is another name for a token based n-gram.</p>
+ /// </summary>
+ public class ShingleAnalyzerWrapper : Analyzer
+ {
+ protected Analyzer DefaultAnalyzer;
+ protected int MaxShingleSize = 2;
+ protected bool OutputUnigrams = true;
+
+ /// <summary>
+ /// Wraps StandardAnalyzer.
+ /// </summary>
+ public ShingleAnalyzerWrapper()
+ {
+ DefaultAnalyzer = new StandardAnalyzer();
+ SetOverridesTokenStreamMethod(typeof (ShingleAnalyzerWrapper));
+ }
+
+ public ShingleAnalyzerWrapper(int nGramSize)
+ : this()
+ {
+ MaxShingleSize = nGramSize;
+ }
+
+ public ShingleAnalyzerWrapper(Analyzer defaultAnalyzer)
+ {
+ DefaultAnalyzer = defaultAnalyzer;
+ SetOverridesTokenStreamMethod(typeof (ShingleAnalyzerWrapper));
+ }
+
+ public ShingleAnalyzerWrapper(Analyzer defaultAnalyzer, int maxShingleSize) : this(defaultAnalyzer)
+ {
+ MaxShingleSize = maxShingleSize;
+ }
+
+ /// <summary>
+ /// The max shingle (ngram) size
+ /// </summary>
+ /// <returns></returns>
+ public int GetMaxShingleSize()
+ {
+ return MaxShingleSize;
+ }
+
+ /// <summary>
+ /// Set the maximum size of output shingles
+ /// </summary>
+ /// <param name="maxShingleSize">max shingle size</param>
+ public void SetMaxShingleSize(int maxShingleSize)
+ {
+ MaxShingleSize = maxShingleSize;
+ }
+
+ public bool IsOutputUnigrams()
+ {
+ return OutputUnigrams;
+ }
+
+ /// <summary>
+ /// Shall the filter pass the original tokens (the "unigrams") to the output
+ /// stream?
+ /// </summary>
+ /// <param name="outputUnigrams">Whether or not the filter shall pass the original tokens to the output stream</param>
+ public void SetOutputUnigrams(bool outputUnigrams)
+ {
+ OutputUnigrams = outputUnigrams;
+ }
+
+ public override TokenStream TokenStream(String fieldName, TextReader reader)
+ {
+ TokenStream wrapped;
+ try
+ {
+ wrapped = DefaultAnalyzer.ReusableTokenStream(fieldName, reader);
+ }
+ catch (IOException)
+ {
+ wrapped = DefaultAnalyzer.TokenStream(fieldName, reader);
+ }
+
+ var filter = new ShingleFilter(wrapped);
+ filter.SetMaxShingleSize(MaxShingleSize);
+ filter.SetOutputUnigrams(OutputUnigrams);
+
+ return filter;
+ }
+
+ public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
+ {
+ if (overridesTokenStreamMethod)
+ {
+ // LUCENE-1678: force fallback to tokenStream() if we
+ // have been subclassed and that subclass overrides
+ // tokenStream but not reusableTokenStream
+ return TokenStream(fieldName, reader);
+ }
+
+ var streams = (SavedStreams) GetPreviousTokenStream();
+
+ if (streams == null)
+ {
+ streams = new SavedStreams
+ {
+ Wrapped = DefaultAnalyzer.ReusableTokenStream(fieldName, reader)
+ };
+ streams.Shingle = new ShingleFilter(streams.Wrapped);
+ SetPreviousTokenStream(streams);
+ }
+ else
+ {
+ var result = DefaultAnalyzer.ReusableTokenStream(fieldName, reader);
+ if (result == streams.Wrapped)
+ {
+ // the wrapped analyzer reused the stream
+ streams.Shingle.Reset();
+ }
+ else
+ {
+ // the wrapped analyzer did not, create a new shingle around the new one
+ streams.Wrapped = result;
+ streams.Shingle = new ShingleFilter(streams.Wrapped);
+ }
+ }
+
+ streams.Shingle.SetMaxShingleSize(MaxShingleSize);
+ streams.Shingle.SetOutputUnigrams(OutputUnigrams);
+
+ return streams.Shingle;
+ }
+
+ #region Nested type: SavedStreams
+
+ private class SavedStreams
+ {
+ public ShingleFilter Shingle;
+ public TokenStream Wrapped;
+ } ;
+
+ #endregion
+ }
+}
\ No newline at end of file