You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by th...@apache.org on 2011/07/17 04:46:03 UTC
[Lucene.Net] svn commit: r1147514 [1/3] - in /incubator/lucene.net/trunk:
src/contrib/Analyzers/ src/contrib/Analyzers/Miscellaneous/
src/contrib/Analyzers/Payloads/ src/contrib/Analyzers/Shingle/
src/contrib/Analyzers/Shingle/Codec/ src/contrib/Analyzers/Shingle/M...
Author: thoward
Date: Sun Jul 17 02:46:00 2011
New Revision: 1147514
URL: http://svn.apache.org/viewvc?rev=1147514&view=rev
Log:
[LUCENENET-437] Initial port of Contrib.Shingle/Contrib.Miscellaneous and tests (also includes some helpers)
Added:
incubator/lucene.net/trunk/src/contrib/Analyzers/Miscellaneous/
incubator/lucene.net/trunk/src/contrib/Analyzers/Miscellaneous/EmptyTokenStream.cs
incubator/lucene.net/trunk/src/contrib/Analyzers/Miscellaneous/InjectablePrefixAwareTokenFilter.cs
incubator/lucene.net/trunk/src/contrib/Analyzers/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
incubator/lucene.net/trunk/src/contrib/Analyzers/Miscellaneous/PrefixAwareTokenStream.cs
incubator/lucene.net/trunk/src/contrib/Analyzers/Miscellaneous/SingleTokenTokenStream.cs
incubator/lucene.net/trunk/src/contrib/Analyzers/Payloads/
incubator/lucene.net/trunk/src/contrib/Analyzers/Payloads/PayloadHelper.cs
incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/
incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Codec/
incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Codec/OneDimensionalNonWeightedTokenSettingsCodec.cs
incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Codec/SimpleThreeDimensionalTokenSettingsCodec.cs
incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Codec/TokenSettingsCodec.cs
incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Codec/TwoDimensionalNonWeightedSynonymTokenSettingsCodec.cs
incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Matrix/
incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Matrix/Column.cs
incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Matrix/Matrix.cs
incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Matrix/MatrixPermutationIterator.cs
incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Matrix/Row.cs
incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/ShingleAnalyzerWrapper.cs
incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/ShingleFilter.cs
incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/ShingleMatrixFilter.cs
incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/TokenPositioner.cs
incubator/lucene.net/trunk/src/contrib/Analyzers/Util/
incubator/lucene.net/trunk/src/contrib/Analyzers/Util/FloatHelper.cs
incubator/lucene.net/trunk/src/contrib/Analyzers/Util/ListComparer.cs
incubator/lucene.net/trunk/test/contrib/Analyzers/Miscellaneous/
incubator/lucene.net/trunk/test/contrib/Analyzers/Miscellaneous/TestPrefixAndSuffixAwareTokenFilter.cs
incubator/lucene.net/trunk/test/contrib/Analyzers/Miscellaneous/TestPrefixAwareTokenFilter.cs
incubator/lucene.net/trunk/test/contrib/Analyzers/Shingle/
incubator/lucene.net/trunk/test/contrib/Analyzers/Shingle/ShingleAnalyzerWrapperTest.cs
incubator/lucene.net/trunk/test/contrib/Analyzers/Shingle/ShingleFilterTest.cs
incubator/lucene.net/trunk/test/contrib/Analyzers/Shingle/TestShingleMatrixFilter.cs
Modified:
incubator/lucene.net/trunk/src/contrib/Analyzers/Contrib.Analyzers.csproj
incubator/lucene.net/trunk/test/contrib/Analyzers/Contrib.Analyzers.Test.csproj
Modified: incubator/lucene.net/trunk/src/contrib/Analyzers/Contrib.Analyzers.csproj
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Contrib.Analyzers.csproj?rev=1147514&r1=1147513&r2=1147514&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Contrib.Analyzers.csproj (original)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Contrib.Analyzers.csproj Sun Jul 17 02:46:00 2011
@@ -63,6 +63,11 @@
<Compile Include="Fr\FrenchAnalyzer.cs" />
<Compile Include="Fr\FrenchStemFilter.cs" />
<Compile Include="Fr\FrenchStemmer.cs" />
+ <Compile Include="Miscellaneous\EmptyTokenStream.cs" />
+ <Compile Include="Miscellaneous\InjectablePrefixAwareTokenFilter.cs" />
+ <Compile Include="Miscellaneous\PrefixAndSuffixAwareTokenFilter.cs" />
+ <Compile Include="Miscellaneous\PrefixAwareTokenStream.cs" />
+ <Compile Include="Miscellaneous\SingleTokenTokenStream.cs" />
<Compile Include="NGram\EdgeNGramTokenFilter.cs" />
<Compile Include="NGram\EdgeNGramTokenizer.cs" />
<Compile Include="NGram\NGramTokenFilter.cs" />
@@ -71,6 +76,7 @@
<Compile Include="Nl\DutchStemFilter.cs" />
<Compile Include="Nl\DutchStemmer.cs" />
<Compile Include="Nl\WordlistLoader.cs" />
+ <Compile Include="Payloads\PayloadHelper.cs" />
<Compile Include="Ru\RussianAnalyzer.cs" />
<Compile Include="Ru\RussianCharsets.cs" />
<Compile Include="Ru\RussianLetterTokenizer.cs" />
@@ -78,6 +84,20 @@
<Compile Include="Ru\RussianStemFilter.cs" />
<Compile Include="Ru\RussianStemmer.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
+ <Compile Include="Shingle\Matrix\Column.cs" />
+ <Compile Include="Shingle\Matrix\Matrix.cs" />
+ <Compile Include="Shingle\Matrix\MatrixPermutationIterator.cs" />
+ <Compile Include="Shingle\Matrix\Row.cs" />
+ <Compile Include="Shingle\ShingleAnalyzerWrapper.cs" />
+ <Compile Include="Shingle\ShingleFilter.cs" />
+ <Compile Include="Shingle\ShingleMatrixFilter.cs" />
+ <Compile Include="Shingle\TokenPositioner.cs" />
+ <Compile Include="Shingle\Codec\OneDimensionalNonWeightedTokenSettingsCodec.cs" />
+ <Compile Include="Shingle\Codec\SimpleThreeDimensionalTokenSettingsCodec.cs" />
+ <Compile Include="Shingle\Codec\TokenSettingsCodec.cs" />
+ <Compile Include="Shingle\Codec\TwoDimensionalNonWeightedSynonymTokenSettingsCodec.cs" />
+ <Compile Include="Util\FloatHelper.cs" />
+ <Compile Include="Util\ListComparer.cs" />
<Compile Include="WordlistLoader.cs" />
</ItemGroup>
<ItemGroup>
Added: incubator/lucene.net/trunk/src/contrib/Analyzers/Miscellaneous/EmptyTokenStream.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Miscellaneous/EmptyTokenStream.cs?rev=1147514&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Miscellaneous/EmptyTokenStream.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Miscellaneous/EmptyTokenStream.cs Sun Jul 17 02:46:00 2011
@@ -0,0 +1,31 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis;
+
+namespace Lucene.Net.Analyzers.Miscellaneous
+{
+ public class EmptyTokenStream : TokenStream
+ {
+ [Obsolete("The new IncrementToken() and AttributeSource APIs should be used instead.")]
+ public override Token Next(Token reusableToken)
+ {
+ return null;
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/trunk/src/contrib/Analyzers/Miscellaneous/InjectablePrefixAwareTokenFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Miscellaneous/InjectablePrefixAwareTokenFilter.cs?rev=1147514&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Miscellaneous/InjectablePrefixAwareTokenFilter.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Miscellaneous/InjectablePrefixAwareTokenFilter.cs Sun Jul 17 02:46:00 2011
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis;
+
+namespace Lucene.Net.Analyzers.Miscellaneous
+{
+ public class InjectablePrefixAwareTokenFilter : PrefixAwareTokenFilter
+ {
+ public InjectablePrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(prefix, suffix)
+ {
+ }
+
+ public Func<Token, Token, Token> UpdateAction { get; set; }
+
+ public override Token UpdateSuffixToken(Token suffixToken, Token lastPrefixToken)
+ {
+ return UpdateAction(suffixToken, lastPrefixToken);
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/trunk/src/contrib/Analyzers/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs?rev=1147514&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs Sun Jul 17 02:46:00 2011
@@ -0,0 +1,98 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis;
+
+namespace Lucene.Net.Analyzers.Miscellaneous
+{
+ /// <summary>
+ /// Links two PrefixAwareTokenFilter.
+ /// <p/>
+ /// <b>NOTE:</b> This filter might not behave correctly if used with custom Attributes, i.e. Attributes other than
+ /// the ones located in Lucene.Net.Analysis.Tokenattributes.
+ /// </summary>
+ public class PrefixAndSuffixAwareTokenFilter : TokenStream
+ {
+ private readonly PrefixAwareTokenFilter _suffix;
+
+ public PrefixAndSuffixAwareTokenFilter(TokenStream prefix, TokenStream input, TokenStream suffix) : base(suffix)
+ {
+ _suffix =
+ new InjectablePrefixAwareTokenFilter(
+ new InjectablePrefixAwareTokenFilter(prefix, input)
+ {
+ UpdateAction = UpdateInputToken
+ },
+ suffix)
+ {
+ UpdateAction = UpdateSuffixToken
+ };
+ }
+
+ public Token UpdateInputToken(Token inputToken, Token lastPrefixToken)
+ {
+ inputToken.SetStartOffset(lastPrefixToken.EndOffset() + inputToken.StartOffset());
+ inputToken.SetEndOffset(lastPrefixToken.EndOffset() + inputToken.EndOffset());
+ return inputToken;
+ }
+
+ public Token UpdateSuffixToken(Token suffixToken, Token lastInputToken)
+ {
+ suffixToken.SetStartOffset(lastInputToken.EndOffset() + suffixToken.StartOffset());
+ suffixToken.SetEndOffset(lastInputToken.EndOffset() + suffixToken.EndOffset());
+ return suffixToken;
+ }
+
+
+ public override sealed bool IncrementToken()
+ {
+ return _suffix.IncrementToken();
+ }
+
+ /// <summary>
+ /// @deprecated Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.
+ /// </summary>
+ /// <param name="reusableToken"></param>
+ /// <returns></returns>
+ [Obsolete("The new IncrementToken() and AttributeSource APIs should be used instead.")]
+ public override sealed Token Next(Token reusableToken)
+ {
+ return base.Next(reusableToken);
+ }
+
+ /// <summary>
+ /// @deprecated Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.
+ /// </summary>
+ [Obsolete("The returned Token is a \"full private copy\" (not re-used across calls to Next()) but will be slower than calling {@link #Next(Token)} or using the new IncrementToken() method with the new AttributeSource API.")]
+ public override sealed Token Next()
+ {
+ return base.Next();
+ }
+
+ public override void Reset()
+ {
+ _suffix.Reset();
+ }
+
+
+ public override void Close()
+ {
+ _suffix.Close();
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/trunk/src/contrib/Analyzers/Miscellaneous/PrefixAwareTokenStream.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Miscellaneous/PrefixAwareTokenStream.cs?rev=1147514&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Miscellaneous/PrefixAwareTokenStream.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Miscellaneous/PrefixAwareTokenStream.cs Sun Jul 17 02:46:00 2011
@@ -0,0 +1,207 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Index;
+using FlagsAttribute = Lucene.Net.Analysis.Tokenattributes.FlagsAttribute;
+
+namespace Lucene.Net.Analyzers.Miscellaneous
+{
+ /// <summary>
+ /// Joins two token streams and leaves the last token of the first stream available
+ /// to be used when updating the token values in the second stream based on that token.
+ ///
+ /// The default implementation adds last prefix token end offset to the suffix token start and end offsets.
+ /// <p/>
+ /// <b>NOTE:</b> This filter might not behave correctly if used with custom Attributes, i.e. Attributes other than
+ /// the ones located in Lucene.Net.Analysis.TokenAttributes.
+ /// </summary>
+ public class PrefixAwareTokenFilter : TokenStream
+ {
+ private readonly FlagsAttribute _flagsAtt;
+ private readonly OffsetAttribute _offsetAtt;
+ private readonly FlagsAttribute _pFlagsAtt;
+
+ private readonly OffsetAttribute _pOffsetAtt;
+ private readonly PayloadAttribute _pPayloadAtt;
+ private readonly PositionIncrementAttribute _pPosIncrAtt;
+ private readonly TermAttribute _pTermAtt;
+ private readonly TypeAttribute _pTypeAtt;
+ private readonly PayloadAttribute _payloadAtt;
+ private readonly PositionIncrementAttribute _posIncrAtt;
+
+ private readonly Token _previousPrefixToken = new Token();
+ private readonly Token _reusableToken = new Token();
+ private readonly TermAttribute _termAtt;
+ private readonly TypeAttribute _typeAtt;
+
+ private bool _prefixExhausted;
+
+ public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(suffix)
+ {
+ Suffix = suffix;
+ Prefix = prefix;
+ _prefixExhausted = false;
+
+ // ReSharper disable DoNotCallOverridableMethodsInConstructor
+ _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute));
+ _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute));
+ _payloadAtt = (PayloadAttribute) AddAttribute(typeof (PayloadAttribute));
+ _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute));
+ _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute));
+ _flagsAtt = (FlagsAttribute) AddAttribute(typeof (FlagsAttribute));
+ // ReSharper restore DoNotCallOverridableMethodsInConstructor
+
+ _pTermAtt = (TermAttribute) prefix.AddAttribute(typeof (TermAttribute));
+ _pPosIncrAtt = (PositionIncrementAttribute) prefix.AddAttribute(typeof (PositionIncrementAttribute));
+ _pPayloadAtt = (PayloadAttribute) prefix.AddAttribute(typeof (PayloadAttribute));
+ _pOffsetAtt = (OffsetAttribute) prefix.AddAttribute(typeof (OffsetAttribute));
+ _pTypeAtt = (TypeAttribute) prefix.AddAttribute(typeof (TypeAttribute));
+ _pFlagsAtt = (FlagsAttribute) prefix.AddAttribute(typeof (FlagsAttribute));
+ }
+
+ public TokenStream Prefix { get; set; }
+
+ public TokenStream Suffix { get; set; }
+
+ public override sealed bool IncrementToken()
+ {
+ if (!_prefixExhausted)
+ {
+ Token nextToken = GetNextPrefixInputToken(_reusableToken);
+ if (nextToken == null)
+ {
+ _prefixExhausted = true;
+ }
+ else
+ {
+ _previousPrefixToken.Reinit(nextToken);
+ // Make it a deep copy
+ Payload p = _previousPrefixToken.GetPayload();
+ if (p != null)
+ {
+ _previousPrefixToken.SetPayload((Payload) p.Clone());
+ }
+ SetCurrentToken(nextToken);
+ return true;
+ }
+ }
+
+ Token nextSuffixToken = GetNextSuffixInputToken(_reusableToken);
+ if (nextSuffixToken == null)
+ {
+ return false;
+ }
+
+ nextSuffixToken = UpdateSuffixToken(nextSuffixToken, _previousPrefixToken);
+ SetCurrentToken(nextSuffixToken);
+ return true;
+ }
+
+ /// <summary>
+ /// @deprecated Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.
+ /// </summary>
+ /// <param name="reusableToken"></param>
+ /// <returns></returns>
+ [Obsolete("The new IncrementToken() and AttributeSource APIs should be used instead.")]
+ public override sealed Token Next(Token reusableToken)
+ {
+ return base.Next(reusableToken);
+ }
+
+ /// <summary>
+ /// @deprecated Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.
+ /// </summary>
+ /// <returns></returns>
+ [Obsolete("The returned Token is a \"full private copy\" (not re-used across calls to Next()) but will be slower than calling {@link #Next(Token)} or using the new IncrementToken() method with the new AttributeSource API.")]
+ public override sealed Token Next()
+ {
+ return base.Next();
+ }
+
+ private void SetCurrentToken(Token token)
+ {
+ if (token == null) return;
+ ClearAttributes();
+ _termAtt.SetTermBuffer(token.TermBuffer(), 0, token.TermLength());
+ _posIncrAtt.SetPositionIncrement(token.GetPositionIncrement());
+ _flagsAtt.SetFlags(token.GetFlags());
+ _offsetAtt.SetOffset(token.StartOffset(), token.EndOffset());
+ _typeAtt.SetType(token.Type());
+ _payloadAtt.SetPayload(token.GetPayload());
+ }
+
+ private Token GetNextPrefixInputToken(Token token)
+ {
+ if (!Prefix.IncrementToken()) return null;
+ token.SetTermBuffer(_pTermAtt.TermBuffer(), 0, _pTermAtt.TermLength());
+ token.SetPositionIncrement(_pPosIncrAtt.GetPositionIncrement());
+ token.SetFlags(_pFlagsAtt.GetFlags());
+ token.SetOffset(_pOffsetAtt.StartOffset(), _pOffsetAtt.EndOffset());
+ token.SetType(_pTypeAtt.Type());
+ token.SetPayload(_pPayloadAtt.GetPayload());
+ return token;
+ }
+
+ private Token GetNextSuffixInputToken(Token token)
+ {
+ if (!Suffix.IncrementToken()) return null;
+ token.SetTermBuffer(_termAtt.TermBuffer(), 0, _termAtt.TermLength());
+ token.SetPositionIncrement(_posIncrAtt.GetPositionIncrement());
+ token.SetFlags(_flagsAtt.GetFlags());
+ token.SetOffset(_offsetAtt.StartOffset(), _offsetAtt.EndOffset());
+ token.SetType(_typeAtt.Type());
+ token.SetPayload(_payloadAtt.GetPayload());
+ return token;
+ }
+
+ /// <summary>
+ /// The default implementation adds last prefix token end offset to the suffix token start and end offsets.
+ /// </summary>
+ /// <param name="suffixToken">a token from the suffix stream</param>
+ /// <param name="lastPrefixToken">the last token from the prefix stream</param>
+ /// <returns>consumer token</returns>
+ public virtual Token UpdateSuffixToken(Token suffixToken, Token lastPrefixToken)
+ {
+ suffixToken.SetStartOffset(lastPrefixToken.EndOffset() + suffixToken.StartOffset());
+ suffixToken.SetEndOffset(lastPrefixToken.EndOffset() + suffixToken.EndOffset());
+ return suffixToken;
+ }
+
+ public override void Close()
+ {
+ Prefix.Close();
+ Suffix.Close();
+ }
+
+ public override void Reset()
+ {
+ base.Reset();
+
+ if (Prefix != null)
+ {
+ _prefixExhausted = false;
+ Prefix.Reset();
+ }
+
+ if (Suffix != null)
+ Suffix.Reset();
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/trunk/src/contrib/Analyzers/Miscellaneous/SingleTokenTokenStream.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Miscellaneous/SingleTokenTokenStream.cs?rev=1147514&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Miscellaneous/SingleTokenTokenStream.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Miscellaneous/SingleTokenTokenStream.cs Sun Jul 17 02:46:00 2011
@@ -0,0 +1,101 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Diagnostics;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analyzers.Miscellaneous
+{
+ /// <summary>
+ /// A TokenStream containing a single token.
+ /// </summary>
+ public class SingleTokenTokenStream : TokenStream
+ {
+ private readonly AttributeImpl _tokenAtt;
+ private bool _exhausted;
+
+ // The token needs to be immutable, so work with clones!
+ private Token _singleToken;
+
+ public SingleTokenTokenStream(Token token)
+ {
+ Debug.Assert(token != null, "Token was null!");
+ _singleToken = (Token) token.Clone();
+
+ // ReSharper disable DoNotCallOverridableMethodsInConstructor
+ _tokenAtt = (AttributeImpl) AddAttribute(typeof (TermAttribute));
+ // ReSharper restore DoNotCallOverridableMethodsInConstructor
+
+ Debug.Assert(_tokenAtt is Token || _tokenAtt.GetType().Name.Equals(typeof (TokenWrapper).Name),
+ "Token Attribute is the wrong type! Type was: " + _tokenAtt.GetType().Name + " but expected " +
+ typeof (TokenWrapper).Name);
+ }
+
+ public override sealed bool IncrementToken()
+ {
+ if (_exhausted)
+ return false;
+
+ ClearAttributes();
+ _singleToken.CopyTo(_tokenAtt);
+ _exhausted = true;
+
+ return true;
+ }
+
+ /// <summary>
+ /// @deprecated Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.
+ /// </summary>
+ /// <param name="reusableToken"></param>
+ /// <returns></returns>
+ [Obsolete("The new IncrementToken() and AttributeSource APIs should be used instead.")]
+ public override sealed Token Next(Token reusableToken)
+ {
+ return base.Next(reusableToken);
+ }
+
+ /// <summary>
+ /// @deprecated Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.
+ /// </summary>
+ /// <returns></returns>
+ [Obsolete(
+ "The returned Token is a \"full private copy\" (not re-used across calls to Next()) but will be slower than calling {@link #Next(Token)} or using the new IncrementToken() method with the new AttributeSource API."
+ )]
+ public override sealed Token Next()
+ {
+ return base.Next();
+ }
+
+ public override void Reset()
+ {
+ _exhausted = false;
+ }
+
+ public Token GetToken()
+ {
+ return (Token) _singleToken.Clone();
+ }
+
+ public void SetToken(Token token)
+ {
+ _singleToken = (Token) token.Clone();
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/trunk/src/contrib/Analyzers/Payloads/PayloadHelper.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Payloads/PayloadHelper.cs?rev=1147514&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Payloads/PayloadHelper.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Payloads/PayloadHelper.cs Sun Jul 17 02:46:00 2011
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Analyzers.Util;
+
+namespace Lucene.Net.Analyzers.Payloads
+{
+ /// <summary>
+ /// Utility methods for encoding payloads.
+ /// </summary>
+ public static class PayloadHelper
+ {
+ public static byte[] EncodeFloat(float payload)
+ {
+ return EncodeFloat(payload, new byte[4], 0);
+ }
+
+ public static byte[] EncodeFloat(float payload, byte[] data, int offset)
+ {
+ return EncodeInt(FloatHelper.FloatToIntBits(payload), data, offset);
+ }
+
+ public static byte[] EncodeInt(int payload)
+ {
+ return EncodeInt(payload, new byte[4], 0);
+ }
+
+ public static byte[] EncodeInt(int payload, byte[] data, int offset)
+ {
+ data[offset] = (byte) (payload >> 24);
+ data[offset + 1] = (byte) (payload >> 16);
+ data[offset + 2] = (byte) (payload >> 8);
+ data[offset + 3] = (byte) payload;
+ return data;
+ }
+
+ /// <summary>
+ /// <p>Decode the payload that was encoded using encodeFloat(float)</p>
+ /// <p>NOTE: the length of the array must be at least offset + 4 long.</p>
+ /// </summary>
+ /// <param name="bytes">The bytes to decode</param>
+ /// <returns>the decoded float</returns>
+ public static float DecodeFloat(byte[] bytes)
+ {
+ return DecodeFloat(bytes, 0);
+ }
+
+ /// <summary>
+ /// <p>Decode the payload that was encoded using encodeFloat(float)</p>
+ /// <p>NOTE: the length of the array must be at least offset + 4 long.</p>
+ /// </summary>
+ /// <param name="bytes">The bytes to decode</param>
+ /// <param name="offset">The offset into the array.</param>
+ /// <returns>The float that was encoded</returns>
+ public static float DecodeFloat(byte[] bytes, int offset)
+ {
+ return FloatHelper.IntBitsToFloat(DecodeInt(bytes, offset));
+ }
+
+ public static int DecodeInt(byte[] bytes, int offset)
+ {
+ return ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF) << 16)
+ | ((bytes[offset + 2] & 0xFF) << 8) | (bytes[offset + 3] & 0xFF);
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Codec/OneDimensionalNonWeightedTokenSettingsCodec.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Codec/OneDimensionalNonWeightedTokenSettingsCodec.cs?rev=1147514&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Codec/OneDimensionalNonWeightedTokenSettingsCodec.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Codec/OneDimensionalNonWeightedTokenSettingsCodec.cs Sun Jul 17 02:46:00 2011
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Analysis;
+
+namespace Lucene.Net.Analyzers.Shingle.Codec
+{
+ /// <summary>
+ /// Using this codec makes a ShingleMatrixFilter act like ShingleFilter.
+ /// It produces the most simple sort of shingles, ignoring token position increments, etc.
+ ///
+ /// It adds each token as a new column.
+ /// </summary>
+ public class OneDimensionalNonWeightedTokenSettingsCodec : TokenSettingsCodec
+ {
+ public override TokenPositioner GetTokenPositioner(Token token)
+ {
+ return TokenPositioner.NewColumn;
+ }
+
+ public override void SetTokenPositioner(Token token, TokenPositioner tokenPositioner)
+ {
+ }
+
+ public override float GetWeight(Token token)
+ {
+ return 1f;
+ }
+
+ public override void SetWeight(Token token, float weight)
+ {
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Codec/SimpleThreeDimensionalTokenSettingsCodec.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Codec/SimpleThreeDimensionalTokenSettingsCodec.cs?rev=1147514&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Codec/SimpleThreeDimensionalTokenSettingsCodec.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Codec/SimpleThreeDimensionalTokenSettingsCodec.cs Sun Jul 17 02:46:00 2011
@@ -0,0 +1,92 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.IO;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analyzers.Payloads;
+using Lucene.Net.Index;
+
+namespace Lucene.Net.Analyzers.Shingle.Codec
+{
+ /// <summary>
+ /// A full featured codec not to be used for something serious.
+ ///
+ /// It takes complete control of
+ /// payload for weight
+ /// and the bit flags for positioning in the matrix.
+ ///
+ /// Mainly exist for demonstrational purposes.
+ /// </summary>
+ public class SimpleThreeDimensionalTokenSettingsCodec : TokenSettingsCodec
+ {
+ /// <summary>
+ ///
+ /// </summary>
+ /// <param name="token"></param>
+ /// <returns>the token flags int value as TokenPosition</returns>
+ public override TokenPositioner GetTokenPositioner(Token token)
+ {
+ switch (token.GetFlags())
+ {
+ case 0:
+ return TokenPositioner.NewColumn;
+ case 1:
+ return TokenPositioner.NewRow;
+ case 2:
+ return TokenPositioner.SameRow;
+ }
+ throw new IOException("Unknown matrix positioning of token " + token);
+ }
+
+ /// <summary>
+ /// Sets the TokenPositioner as token flags int value.
+ /// </summary>
+ /// <param name="token"></param>
+ /// <param name="tokenPositioner"></param>
+ public override void SetTokenPositioner(Token token, TokenPositioner tokenPositioner)
+ {
+ token.SetFlags(tokenPositioner.Index);
+ }
+
+ /// <summary>
+ /// Returns a 32 bit float from the payload, or 1f it null.
+ /// </summary>
+ /// <param name="token"></param>
+ /// <returns></returns>
+ public override float GetWeight(Token token)
+ {
+ if (token.GetPayload() == null || token.GetPayload().GetData() == null)
+ return 1f;
+
+ return PayloadHelper.DecodeFloat(token.GetPayload().GetData());
+ }
+
+ /// <summary>
+ /// Stores a 32 bit float in the payload, or set it to null if 1f;
+ /// </summary>
+ /// <param name="token"></param>
+ /// <param name="weight"></param>
+ public override void SetWeight(Token token, float weight)
+ {
+ token.SetPayload(
+ weight == 1f
+ ? null
+ : new Payload(PayloadHelper.EncodeFloat(weight))
+ );
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Codec/TokenSettingsCodec.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Codec/TokenSettingsCodec.cs?rev=1147514&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Codec/TokenSettingsCodec.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Codec/TokenSettingsCodec.cs Sun Jul 17 02:46:00 2011
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Analysis;
+
+namespace Lucene.Net.Analyzers.Shingle.Codec
+{
+ /// <summary>
+ /// Strategy used to code and decode meta data of the tokens from the input stream
+ /// regarding how to position the tokens in the matrix, set and retreive weight, etc.
+ /// </summary>
+ public abstract class TokenSettingsCodec
+ {
+ /// <summary>
+ /// Retrieves information on how a Token is to be inserted to a ShingleMatrixFilter.Matrix.
+ /// </summary>
+ /// <param name="token"></param>
+ /// <returns></returns>
+ public abstract TokenPositioner GetTokenPositioner(Token token);
+
+ /// <summary>
+ /// Sets information on how a Token is to be inserted to a ShingleMatrixFilter.Matrix.
+ /// </summary>
+ /// <param name="token"></param>
+ /// <param name="tokenPositioner"></param>
+ public abstract void SetTokenPositioner(Token token, TokenPositioner tokenPositioner);
+
+ /// <summary>
+ /// Have this method return 1f in order to 'disable' weights.
+ /// </summary>
+ /// <param name="token"></param>
+ /// <returns></returns>
+ public abstract float GetWeight(Token token);
+
+ /// <summary>
+ /// Have this method do nothing in order to 'disable' weights.
+ /// </summary>
+ /// <param name="token"></param>
+ /// <param name="weight"></param>
+ public abstract void SetWeight(Token token, float weight);
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Codec/TwoDimensionalNonWeightedSynonymTokenSettingsCodec.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Codec/TwoDimensionalNonWeightedSynonymTokenSettingsCodec.cs?rev=1147514&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Codec/TwoDimensionalNonWeightedSynonymTokenSettingsCodec.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Codec/TwoDimensionalNonWeightedSynonymTokenSettingsCodec.cs Sun Jul 17 02:46:00 2011
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis;
+
+namespace Lucene.Net.Analyzers.Shingle.Codec
+{
+ /// <summary>
+ /// A codec that creates a two dimensional matrix
+ /// by treating tokens from the input stream with 0 position increment
+ /// as new rows to the current column.
+ /// </summary>
+ public class TwoDimensionalNonWeightedSynonymTokenSettingsCodec : TokenSettingsCodec
+ {
+ public override TokenPositioner GetTokenPositioner(Token token)
+ {
+ return
+ token.GetPositionIncrement() == 0
+ ? TokenPositioner.NewRow
+ : TokenPositioner.NewColumn;
+ }
+
+ public override void SetTokenPositioner(Token token, TokenPositioner tokenPositioner)
+ {
+ throw new NotSupportedException();
+ }
+
+ public override float GetWeight(Token token)
+ {
+ return 1f;
+ }
+
+ public override void SetWeight(Token token, float weight)
+ {
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Matrix/Column.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Matrix/Column.cs?rev=1147514&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Matrix/Column.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Matrix/Column.cs Sun Jul 17 02:46:00 2011
@@ -0,0 +1,68 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Analysis;
+
+namespace Lucene.Net.Analyzers.Shingle.Matrix
+{
+ public class Column
+ {
+ public Column(Token token, Matrix enclosingInstance)
+ : this(enclosingInstance)
+ {
+ var row = new Row(this);
+ row.Tokens.AddLast(token);
+ }
+
+ public Column(Matrix enclosingInstance)
+ {
+ Rows = new List<Row>();
+ Matrix = enclosingInstance;
+
+ lock (Matrix)
+ {
+ if (Matrix.Columns.Count == 0)
+ IsFirst = true;
+ }
+ Matrix.Columns.Add(this);
+ }
+
+ public Matrix Matrix { get; private set; }
+
+ public List<Row> Rows { get; private set; }
+
+ public int Index
+ {
+ get { return Matrix.Columns.IndexOf(this); }
+ }
+
+ public bool IsFirst { get; set; }
+
+ public bool IsLast { get; set; }
+
+ public override String ToString()
+ {
+ return "Column{" +
+ "first=" + IsFirst +
+ ", last=" + IsLast +
+ ", rows=" + Rows +
+ '}';
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Matrix/Matrix.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Matrix/Matrix.cs?rev=1147514&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Matrix/Matrix.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Matrix/Matrix.cs Sun Jul 17 02:46:00 2011
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analyzers.Shingle.Matrix
+{
+ /// <summary>
+ /// A column focused matrix in three dimensions:
+ ///
+ /// <pre>
+ /// Token[column][row][z-axis] {
+ /// {{hello}, {greetings, and, salutations}},
+ /// {{world}, {earth}, {tellus}}
+ /// };
+ /// </pre>
+ ///
+ /// todo consider row groups
+ /// to indicate that shingles is only to contain permutations with texts in that same row group.
+ /// </summary>
+ public class Matrix
+ {
+ public Matrix()
+ {
+ Columns = new List<Column>();
+ }
+
+ public List<Column> Columns { get; private set; }
+
+ public MatrixPermutationIterator PermutationIterator()
+ {
+ return new MatrixPermutationIterator(this);
+ }
+
+ public override string ToString()
+ {
+ return "Matrix{" +
+ "columns=" + Columns +
+ '}';
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Matrix/MatrixPermutationIterator.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Matrix/MatrixPermutationIterator.cs?rev=1147514&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Matrix/MatrixPermutationIterator.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Matrix/MatrixPermutationIterator.cs Sun Jul 17 02:46:00 2011
@@ -0,0 +1,72 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Analyzers.Shingle.Matrix
+{
+ public class MatrixPermutationIterator
+ {
+ private readonly Matrix _enclosingInstance;
+
+ private readonly int[] _columnRowCounters;
+
+ public MatrixPermutationIterator(Matrix enclosingInstance)
+ {
+ _enclosingInstance = enclosingInstance;
+ _columnRowCounters = new int[_enclosingInstance.Columns.Count];
+ }
+
+ public bool HasNext()
+ {
+ var s = _columnRowCounters.Length;
+ var n = _enclosingInstance.Columns.Count;
+ return s != 0 && n >= s && _columnRowCounters[s - 1] < _enclosingInstance.Columns[s - 1].Rows.Count;
+ }
+
+ public Row[] Next()
+ {
+ if (!HasNext())
+ throw new Exception("no more elements");
+
+ var rows = new Row[_columnRowCounters.Length];
+
+ for (int i = 0; i < _columnRowCounters.Length; i++)
+ {
+ rows[i] = _enclosingInstance.Columns[i].Rows[_columnRowCounters[i]];
+ }
+
+ IncrementColumnRowCounters();
+
+ return rows;
+ }
+
+ private void IncrementColumnRowCounters()
+ {
+ for (int i = 0; i < _columnRowCounters.Length; i++)
+ {
+ _columnRowCounters[i]++;
+
+ if (_columnRowCounters[i] != _enclosingInstance.Columns[i].Rows.Count ||
+ i >= _columnRowCounters.Length - 1)
+ break;
+
+ _columnRowCounters[i] = 0;
+ }
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Matrix/Row.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Matrix/Row.cs?rev=1147514&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Matrix/Row.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/Matrix/Row.cs Sun Jul 17 02:46:00 2011
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Collections.Generic;
+using Lucene.Net.Analysis;
+
+namespace Lucene.Net.Analyzers.Shingle.Matrix
+{
+ public class Row
+ {
+ public Row(Column enclosingInstance)
+ {
+ Tokens = new LinkedList<Token>();
+ Column = enclosingInstance;
+ Column.Rows.Add(this);
+ }
+
+ public Column Column { get; private set; }
+
+ public int Index
+ {
+ get { return Column.Rows.IndexOf(this); }
+ }
+
+ public LinkedList<Token> Tokens { get; set; }
+
+ public override string ToString()
+ {
+ return "Row{" +
+ "index=" + Index +
+ ", tokens=" + Tokens +
+ '}';
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/ShingleAnalyzerWrapper.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/ShingleAnalyzerWrapper.cs?rev=1147514&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/ShingleAnalyzerWrapper.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/ShingleAnalyzerWrapper.cs Sun Jul 17 02:46:00 2011
@@ -0,0 +1,167 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Standard;
+
+namespace Lucene.Net.Analyzers.Shingle
+{
+ /// <summary>
+ /// A ShingleAnalyzerWrapper wraps a ShingleFilter around another Analyzer.
+ ///
+ /// <p>A shingle is another name for a token based n-gram.</p>
+ /// </summary>
+ public class ShingleAnalyzerWrapper : Analyzer
+ {
+ protected Analyzer DefaultAnalyzer;
+ protected int MaxShingleSize = 2;
+ protected bool OutputUnigrams = true;
+
+ /// <summary>
+ /// Wraps StandardAnalyzer.
+ /// </summary>
+ public ShingleAnalyzerWrapper()
+ {
+ DefaultAnalyzer = new StandardAnalyzer();
+ SetOverridesTokenStreamMethod(typeof (ShingleAnalyzerWrapper));
+ }
+
+ public ShingleAnalyzerWrapper(int nGramSize)
+ : this()
+ {
+ MaxShingleSize = nGramSize;
+ }
+
+ public ShingleAnalyzerWrapper(Analyzer defaultAnalyzer)
+ {
+ DefaultAnalyzer = defaultAnalyzer;
+ SetOverridesTokenStreamMethod(typeof (ShingleAnalyzerWrapper));
+ }
+
+ public ShingleAnalyzerWrapper(Analyzer defaultAnalyzer, int maxShingleSize) : this(defaultAnalyzer)
+ {
+ MaxShingleSize = maxShingleSize;
+ }
+
+ /// <summary>
+ /// The max shingle (ngram) size
+ /// </summary>
+ /// <returns></returns>
+ public int GetMaxShingleSize()
+ {
+ return MaxShingleSize;
+ }
+
+ /// <summary>
+ /// Set the maximum size of output shingles
+ /// </summary>
+ /// <param name="maxShingleSize">max shingle size</param>
+ public void SetMaxShingleSize(int maxShingleSize)
+ {
+ MaxShingleSize = maxShingleSize;
+ }
+
+ public bool IsOutputUnigrams()
+ {
+ return OutputUnigrams;
+ }
+
+ /// <summary>
+ /// Shall the filter pass the original tokens (the "unigrams") to the output
+ /// stream?
+ /// </summary>
+ /// <param name="outputUnigrams">Whether or not the filter shall pass the original tokens to the output stream</param>
+ public void SetOutputUnigrams(bool outputUnigrams)
+ {
+ OutputUnigrams = outputUnigrams;
+ }
+
+ public override TokenStream TokenStream(String fieldName, TextReader reader)
+ {
+ TokenStream wrapped;
+ try
+ {
+ wrapped = DefaultAnalyzer.ReusableTokenStream(fieldName, reader);
+ }
+ catch (IOException)
+ {
+ wrapped = DefaultAnalyzer.TokenStream(fieldName, reader);
+ }
+
+ var filter = new ShingleFilter(wrapped);
+ filter.SetMaxShingleSize(MaxShingleSize);
+ filter.SetOutputUnigrams(OutputUnigrams);
+
+ return filter;
+ }
+
+ public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
+ {
+ if (overridesTokenStreamMethod)
+ {
+ // LUCENE-1678: force fallback to tokenStream() if we
+ // have been subclassed and that subclass overrides
+ // tokenStream but not reusableTokenStream
+ return TokenStream(fieldName, reader);
+ }
+
+ var streams = (SavedStreams) GetPreviousTokenStream();
+
+ if (streams == null)
+ {
+ streams = new SavedStreams
+ {
+ Wrapped = DefaultAnalyzer.ReusableTokenStream(fieldName, reader)
+ };
+ streams.Shingle = new ShingleFilter(streams.Wrapped);
+ SetPreviousTokenStream(streams);
+ }
+ else
+ {
+ var result = DefaultAnalyzer.ReusableTokenStream(fieldName, reader);
+ if (result == streams.Wrapped)
+ {
+ // the wrapped analyzer reused the stream
+ streams.Shingle.Reset();
+ }
+ else
+ {
+ // the wrapped analyzer did not, create a new shingle around the new one
+ streams.Wrapped = result;
+ streams.Shingle = new ShingleFilter(streams.Wrapped);
+ }
+ }
+
+ streams.Shingle.SetMaxShingleSize(MaxShingleSize);
+ streams.Shingle.SetOutputUnigrams(OutputUnigrams);
+
+ return streams.Shingle;
+ }
+
+ #region Nested type: SavedStreams
+
+ private class SavedStreams
+ {
+ public ShingleFilter Shingle;
+ public TokenStream Wrapped;
+ } ;
+
+ #endregion
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/ShingleFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/ShingleFilter.cs?rev=1147514&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/ShingleFilter.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/ShingleFilter.cs Sun Jul 17 02:46:00 2011
@@ -0,0 +1,384 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+
+namespace Lucene.Net.Analyzers.Shingle
+{
+ /// <summary>
+ /// <p>A ShingleFilter constructs shingles (token n-grams) from a token stream.
+ /// In other words, it creates combinations of tokens as a single token.</p>
+ ///
+ /// <p>For example, the sentence "please divide this sentence into shingles"
+ /// might be tokenized into shingles "please divide", "divide this",
+ /// "this sentence", "sentence into", and "into shingles".</p>
+ ///
+ /// <p>This filter handles position increments > 1 by inserting filler tokens
+ /// (tokens with termtext "_"). It does not handle a position increment of 0. </p>
+ /// </summary>
+ public class ShingleFilter : TokenFilter
+ {
+ /// <summary>
+ /// Filler token for when positionIncrement is more than 1
+ /// </summary>
+ public static readonly char[] FillerToken = {'_'};
+
+ /// <summary>
+ /// Default maximum shingle size is 2.
+ /// </summary>
+ public static readonly int DefaultMaxShingleSize = 2;
+
+ /// <summary>
+ /// The string to use when joining adjacent tokens to form a shingle
+ /// </summary>
+ public static readonly string TokenSeparator = " ";
+
+ private readonly OffsetAttribute _offsetAtt;
+ private readonly PositionIncrementAttribute _posIncrAtt;
+
+ private readonly LinkedList<State> _shingleBuf = new LinkedList<State>();
+ private readonly TermAttribute _termAtt;
+ private readonly TypeAttribute _typeAtt;
+ private State _currentToken;
+ private int[] _endOffsets;
+ private bool _hasCurrentToken;
+
+ /// <summary>
+ /// Maximum shingle size (number of tokens)
+ /// </summary>
+ private int _maxShingleSize;
+
+ private State _nextToken;
+ private int _numFillerTokensToInsert;
+
+ /// <summary>
+ /// By default, we output unigrams (individual tokens) as well as shingles (token n-grams).
+ /// </summary>
+ private bool _outputUnigrams = true;
+
+ private int _shingleBufferPosition;
+ private StringBuilder[] _shingles;
+ private String _tokenType = "shingle";
+
+ /// <summary>
+ /// Constructs a ShingleFilter with the specified single size from the TokenStream
+ /// </summary>
+ /// <param name="input">input token stream</param>
+ /// <param name="maxShingleSize">maximum shingle size produced by the filter.</param>
+ public ShingleFilter(TokenStream input, int maxShingleSize) : base(input)
+ {
+ SetMaxShingleSize(maxShingleSize);
+
+ // ReSharper disable DoNotCallOverridableMethodsInConstructor
+ _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute));
+ _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute));
+ _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute));
+ _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute));
+ // ReSharper restore DoNotCallOverridableMethodsInConstructor
+ }
+
+ /// <summary>
+ /// Construct a ShingleFilter with default shingle size.
+ /// </summary>
+ /// <param name="input">input stream</param>
+ public ShingleFilter(TokenStream input) :
+ this(input, DefaultMaxShingleSize)
+ {
+ }
+
+ /// <summary>
+ /// Construct a ShingleFilter with the specified token type for shingle tokens.
+ /// </summary>
+ /// <param name="input">input stream</param>
+ /// <param name="tokenType">token type for shingle tokens</param>
+ public ShingleFilter(TokenStream input, String tokenType) :
+ this(input, DefaultMaxShingleSize)
+ {
+ SetTokenType(tokenType);
+ }
+
+ /// <summary>
+ /// Set the type of the shingle tokens produced by this filter. (default: "shingle")
+ /// </summary>
+ /// <param name="tokenType">token TokenType</param>
+ public void SetTokenType(String tokenType)
+ {
+ _tokenType = tokenType;
+ }
+
+ /// <summary>
+ /// Shall the output stream contain the input tokens (unigrams) as well as shingles? (default: true.)
+ /// </summary>
+ /// <param name="outputUnigrams">Whether or not the output stream shall contain the input tokens (unigrams)</param>
+ public void SetOutputUnigrams(bool outputUnigrams)
+ {
+ _outputUnigrams = outputUnigrams;
+ }
+
+ /// <summary>
+ /// Set the max shingle size (default: 2)
+ /// </summary>
+ /// <param name="maxShingleSize">max size of output shingles</param>
+ public void SetMaxShingleSize(int maxShingleSize)
+ {
+ if (maxShingleSize < 2)
+ throw new ArgumentException("Max shingle size must be >= 2", "maxShingleSize");
+
+ _shingles = new StringBuilder[maxShingleSize];
+
+ for (int i = 0; i < _shingles.Length; i++)
+ {
+ _shingles[i] = new StringBuilder();
+ }
+
+ _maxShingleSize = maxShingleSize;
+ }
+
+ /// <summary>
+ /// Clear the StringBuilders that are used for storing the output shingles.
+ /// </summary>
+ private void ClearShingles()
+ {
+ foreach (StringBuilder t in _shingles)
+ {
+ t.Length = 0;
+ }
+ }
+
+ /// <summary>
+ /// See Lucene.Net.Analysis.TokenStream.Next()
+ /// </summary>
+ /// <returns></returns>
+ public override bool IncrementToken()
+ {
+ while (true)
+ {
+ if (_nextToken == null)
+ {
+ if (!FillShingleBuffer())
+ return false;
+ }
+
+ _nextToken = _shingleBuf.First.Value;
+
+ if (_outputUnigrams)
+ {
+ if (_shingleBufferPosition == 0)
+ {
+ RestoreState(_nextToken);
+ _posIncrAtt.SetPositionIncrement(1);
+ _shingleBufferPosition++;
+ return true;
+ }
+ }
+ else if (_shingleBufferPosition%_maxShingleSize == 0)
+ {
+ _shingleBufferPosition++;
+ }
+
+ if (_shingleBufferPosition < _shingleBuf.Count)
+ {
+ RestoreState(_nextToken);
+ _typeAtt.SetType(_tokenType);
+ _offsetAtt.SetOffset(_offsetAtt.StartOffset(), _endOffsets[_shingleBufferPosition]);
+ StringBuilder buf = _shingles[_shingleBufferPosition];
+ int termLength = buf.Length;
+ char[] termBuffer = _termAtt.TermBuffer();
+ if (termBuffer.Length < termLength)
+ termBuffer = _termAtt.ResizeTermBuffer(termLength);
+ buf.CopyTo(0, termBuffer, 0, termLength);
+ _termAtt.SetTermLength(termLength);
+ if ((! _outputUnigrams) && _shingleBufferPosition%_maxShingleSize == 1)
+ {
+ _posIncrAtt.SetPositionIncrement(1);
+ }
+ else
+ {
+ _posIncrAtt.SetPositionIncrement(0);
+ }
+ _shingleBufferPosition++;
+ if (_shingleBufferPosition == _shingleBuf.Count)
+ {
+ _nextToken = null;
+ _shingleBufferPosition = 0;
+ }
+ return true;
+ }
+
+ _nextToken = null;
+ _shingleBufferPosition = 0;
+ }
+ }
+
+ /// <summary>
+ /// <p>
+ /// Get the next token from the input stream and push it on the token buffer.
+ /// If we encounter a token with position increment > 1, we put filler tokens
+ /// on the token buffer.
+ /// </p>
+ /// Returns null when the end of the input stream is reached.
+ /// </summary>
+ /// <returns>the next token, or null if at end of input stream</returns>
+ private bool GetNextToken()
+ {
+ while (true)
+ {
+ if (_numFillerTokensToInsert > 0)
+ {
+ if (_currentToken == null)
+ {
+ _currentToken = CaptureState();
+ }
+ else
+ {
+ RestoreState(_currentToken);
+ }
+ _numFillerTokensToInsert--;
+ // A filler token occupies no space
+ _offsetAtt.SetOffset(_offsetAtt.StartOffset(), _offsetAtt.StartOffset());
+ _termAtt.SetTermBuffer(FillerToken, 0, FillerToken.Length);
+ return true;
+ }
+
+ if (_hasCurrentToken)
+ {
+ if (_currentToken != null)
+ {
+ RestoreState(_currentToken);
+ _currentToken = null;
+ }
+ _hasCurrentToken = false;
+ return true;
+ }
+
+ if (!input.IncrementToken())
+ return false;
+
+ _hasCurrentToken = true;
+
+ if (_posIncrAtt.GetPositionIncrement() > 1)
+ _numFillerTokensToInsert = _posIncrAtt.GetPositionIncrement() - 1;
+ }
+ }
+
+ /// <summary>
+ /// Fill the output buffer with new shingles.
+ /// </summary>
+ /// <exception cref="IOException">throws IOException if there's a problem getting the next token</exception>
+ /// <returns></returns>
+ private bool FillShingleBuffer()
+ {
+ bool addedToken = false;
+
+ // Try to fill the shingle buffer.
+
+ do
+ {
+ if (!GetNextToken())
+ break;
+
+ _shingleBuf.AddLast(CaptureState());
+
+ if (_shingleBuf.Count > _maxShingleSize)
+ _shingleBuf.RemoveFirst();
+
+ addedToken = true;
+ } while (_shingleBuf.Count < _maxShingleSize);
+
+ if (_shingleBuf.Count == 0)
+ return false;
+
+
+ // If no new token could be added to the shingle buffer, we have reached
+ // the end of the input stream and have to discard the least recent token.
+
+ if (! addedToken)
+ _shingleBuf.RemoveFirst();
+
+ if (_shingleBuf.Count == 0)
+ return false;
+
+ ClearShingles();
+
+ _endOffsets = new int[_shingleBuf.Count];
+ for (int i = 0; i < _endOffsets.Length; i++)
+ {
+ _endOffsets[i] = 0;
+ }
+
+ int shingleIndex = 0;
+
+ foreach (State state in _shingleBuf)
+ {
+ RestoreState(state);
+
+ for (int j = shingleIndex; j < _shingles.Length; j++)
+ {
+ if (_shingles[j].Length != 0)
+ _shingles[j].Append(TokenSeparator);
+
+ _shingles[j].Append(_termAtt.TermBuffer(), 0, _termAtt.TermLength());
+ }
+
+ _endOffsets[shingleIndex] = _offsetAtt.EndOffset();
+ shingleIndex++;
+ }
+
+ return true;
+ }
+
+ /// <summary>
+ /// Deprecated: Will be removed in Lucene 3.0. This method is readonly, as it should not be overridden.
+ /// Delegates to the backwards compatibility layer.
+ /// </summary>
+ /// <param name="reusableToken"></param>
+ /// <returns></returns>
+ [Obsolete("The new IncrementToken() and AttributeSource APIs should be used instead.")]
+ public override sealed Token Next(Token reusableToken)
+ {
+ return base.Next(reusableToken);
+ }
+
+ /// <summary>
+ /// Deprecated: Will be removed in Lucene 3.0. This method is readonly, as it should not be overridden.
+ /// Delegates to the backwards compatibility layer.
+ /// </summary>
+ /// <returns></returns>
+ [Obsolete("The returned Token is a \"full private copy\" (not re-used across calls to Next()) but will be slower than calling {@link #Next(Token)} or using the new IncrementToken() method with the new AttributeSource API.")]
+ public override sealed Token Next()
+ {
+ return base.Next();
+ }
+
+ public override void Reset()
+ {
+ base.Reset();
+
+ _nextToken = null;
+ _shingleBufferPosition = 0;
+ _shingleBuf.Clear();
+ _numFillerTokensToInsert = 0;
+ _currentToken = null;
+ _hasCurrentToken = false;
+ }
+ }
+}
\ No newline at end of file