You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by th...@apache.org on 2011/07/17 04:46:03 UTC

[Lucene.Net] svn commit: r1147514 [3/3] - in /incubator/lucene.net/trunk: src/contrib/Analyzers/ src/contrib/Analyzers/Miscellaneous/ src/contrib/Analyzers/Payloads/ src/contrib/Analyzers/Shingle/ src/contrib/Analyzers/Shingle/Codec/ src/contrib/Analyzers/Shingle/M...

Added: incubator/lucene.net/trunk/test/contrib/Analyzers/Shingle/ShingleFilterTest.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/test/contrib/Analyzers/Shingle/ShingleFilterTest.cs?rev=1147514&view=auto
==============================================================================
--- incubator/lucene.net/trunk/test/contrib/Analyzers/Shingle/ShingleFilterTest.cs (added)
+++ incubator/lucene.net/trunk/test/contrib/Analyzers/Shingle/ShingleFilterTest.cs Sun Jul 17 02:46:00 2011
@@ -0,0 +1,530 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analyzers.Shingle
+{
+    public class ShingleFilterTests : BaseTokenStreamTestCase
+    {
+        public static readonly Token[] TestToken = new[]
+                                                       {
+                                                           CreateToken("please", 0, 6),
+                                                           CreateToken("divide", 7, 13),
+                                                           CreateToken("this", 14, 18),
+                                                           CreateToken("sentence", 19, 27),
+                                                           CreateToken("into", 28, 32),
+                                                           CreateToken("shingles", 33, 39),
+                                                       };
+
+        public static Token[] TestTokenWithHoles;
+
+        public static readonly Token[] BiGramTokens = new[]
+                                                          {
+                                                              CreateToken("please", 0, 6),
+                                                              CreateToken("please divide", 0, 13),
+                                                              CreateToken("divide", 7, 13),
+                                                              CreateToken("divide this", 7, 18),
+                                                              CreateToken("this", 14, 18),
+                                                              CreateToken("this sentence", 14, 27),
+                                                              CreateToken("sentence", 19, 27),
+                                                              CreateToken("sentence into", 19, 32),
+                                                              CreateToken("into", 28, 32),
+                                                              CreateToken("into shingles", 28, 39),
+                                                              CreateToken("shingles", 33, 39),
+                                                          };
+
+        public static readonly int[] BiGramPositionIncrements = new[]
+                                                                    {
+                                                                        1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
+                                                                    };
+
+        public static readonly String[] BiGramTypes = new[]
+                                                          {
+                                                              "word", "shingle", "word", "shingle", "word", "shingle",
+                                                              "word",
+                                                              "shingle", "word", "shingle", "word"
+                                                          };
+
+        public static readonly Token[] BiGramTokensWithHoles = new[]
+                                                                   {
+                                                                       CreateToken("please", 0, 6),
+                                                                       CreateToken("please divide", 0, 13),
+                                                                       CreateToken("divide", 7, 13),
+                                                                       CreateToken("divide _", 7, 19),
+                                                                       CreateToken("_", 19, 19),
+                                                                       CreateToken("_ sentence", 19, 27),
+                                                                       CreateToken("sentence", 19, 27),
+                                                                       CreateToken("sentence _", 19, 33),
+                                                                       CreateToken("_", 33, 33),
+                                                                       CreateToken("_ shingles", 33, 39),
+                                                                       CreateToken("shingles", 33, 39),
+                                                                   };
+
+        public static readonly int[] BiGramPositionIncrementsWithHoles = new[]
+                                                                             {
+                                                                                 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
+                                                                             };
+
+        public static readonly Token[] BiGramTokensWithoutUnigrams = new[]
+                                                                         {
+                                                                             CreateToken("please divide", 0, 13),
+                                                                             CreateToken("divide this", 7, 18),
+                                                                             CreateToken("this sentence", 14, 27),
+                                                                             CreateToken("sentence into", 19, 32),
+                                                                             CreateToken("into shingles", 28, 39),
+                                                                         };
+
+        public static readonly int[] BiGramPositionIncrementsWithoutUnigrams = new[]
+                                                                                   {
+                                                                                       1, 1, 1, 1, 1
+                                                                                   };
+
+        public static readonly String[] BiGramTypesWithoutUnigrams = new[]
+                                                                         {
+                                                                             "shingle", "shingle", "shingle",
+                                                                             "shingle", "shingle"
+                                                                         };
+
+        public static readonly Token[] BiGramTokensWithHolesWithoutUnigrams = new[]
+                                                                                  {
+                                                                                      CreateToken(
+                                                                                          "please divide", 0, 13),
+                                                                                      CreateToken("divide _", 7,
+                                                                                                  19),
+                                                                                      CreateToken("_ sentence", 19,
+                                                                                                  27),
+                                                                                      CreateToken("sentence _", 19,
+                                                                                                  33),
+                                                                                      CreateToken("_ shingles", 33,
+                                                                                                  39),
+                                                                                  };
+
+        public static readonly int[] BiGramPositionIncrementsWithHolesWithoutUnigrams = new[]
+                                                                                            {
+                                                                                                1, 1, 1, 1, 1, 1
+                                                                                            };
+
+
+        public static readonly Token[] TestSingleToken = new[] { CreateToken("please", 0, 6) };
+
+        public static readonly Token[] SingleToken = new[] { CreateToken("please", 0, 6) };
+
+        public static readonly int[] SingleTokenIncrements = new[] { 1 };
+
+        public static readonly String[] SingleTokenTypes = new[] { "word" };
+
+        public static readonly Token[] EmptyTokenArray = new Token[] { };
+
+        public static readonly int[] EmptyTokenIncrementsArray = new int[] { };
+
+        public static readonly String[] EmptyTokenTypesArray = new String[] { };
+
+        public static readonly Token[] TriGramTokens = new[]
+                                                           {
+                                                               CreateToken("please", 0, 6),
+                                                               CreateToken("please divide", 0, 13),
+                                                               CreateToken("please divide this", 0, 18),
+                                                               CreateToken("divide", 7, 13),
+                                                               CreateToken("divide this", 7, 18),
+                                                               CreateToken("divide this sentence", 7, 27),
+                                                               CreateToken("this", 14, 18),
+                                                               CreateToken("this sentence", 14, 27),
+                                                               CreateToken("this sentence into", 14, 32),
+                                                               CreateToken("sentence", 19, 27),
+                                                               CreateToken("sentence into", 19, 32),
+                                                               CreateToken("sentence into shingles", 19, 39),
+                                                               CreateToken("into", 28, 32),
+                                                               CreateToken("into shingles", 28, 39),
+                                                               CreateToken("shingles", 33, 39)
+                                                           };
+
+        public static readonly int[] TriGramPositionIncrements = new[]
+                                                                     {
+                                                                         1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1
+                                                                     };
+
+        public static readonly String[] TriGramTypes = new[]
+                                                           {
+                                                               "word", "shingle", "shingle",
+                                                               "word", "shingle", "shingle",
+                                                               "word", "shingle", "shingle",
+                                                               "word", "shingle", "shingle",
+                                                               "word", "shingle",
+                                                               "word"
+                                                           };
+
+        public static readonly Token[] TriGramTokensWithoutUnigrams = new[]
+                                                                          {
+                                                                              CreateToken("please divide", 0, 13),
+                                                                              CreateToken("please divide this", 0,
+                                                                                          18),
+                                                                              CreateToken("divide this", 7, 18),
+                                                                              CreateToken("divide this sentence", 7,
+                                                                                          27),
+                                                                              CreateToken("this sentence", 14, 27),
+                                                                              CreateToken("this sentence into", 14,
+                                                                                          32),
+                                                                              CreateToken("sentence into", 19, 32),
+                                                                              CreateToken("sentence into shingles",
+                                                                                          19, 39),
+                                                                              CreateToken("into shingles", 28, 39),
+                                                                          };
+
+        public static readonly int[] TriGramPositionIncrementsWithoutUnigrams = new[]
+                                                                                    {
+                                                                                        1, 0, 1, 0, 1, 0, 1, 0, 1
+                                                                                    };
+
+        public static readonly String[] TriGramTypesWithoutUnigrams = new[]
+                                                                          {
+                                                                              "shingle", "shingle",
+                                                                              "shingle", "shingle",
+                                                                              "shingle", "shingle",
+                                                                              "shingle", "shingle",
+                                                                              "shingle",
+                                                                          };
+
+        public static readonly Token[] FourGramTokens = new[]
+                                                            {
+                                                                CreateToken("please", 0, 6),
+                                                                CreateToken("please divide", 0, 13),
+                                                                CreateToken("please divide this", 0, 18),
+                                                                CreateToken("please divide this sentence", 0, 27),
+                                                                CreateToken("divide", 7, 13),
+                                                                CreateToken("divide this", 7, 18),
+                                                                CreateToken("divide this sentence", 7, 27),
+                                                                CreateToken("divide this sentence into", 7, 32),
+                                                                CreateToken("this", 14, 18),
+                                                                CreateToken("this sentence", 14, 27),
+                                                                CreateToken("this sentence into", 14, 32),
+                                                                CreateToken("this sentence into shingles", 14, 39),
+                                                                CreateToken("sentence", 19, 27),
+                                                                CreateToken("sentence into", 19, 32),
+                                                                CreateToken("sentence into shingles", 19, 39),
+                                                                CreateToken("into", 28, 32),
+                                                                CreateToken("into shingles", 28, 39),
+                                                                CreateToken("shingles", 33, 39)
+                                                            };
+
+        public static readonly int[] FourGramPositionIncrements = new[]
+                                                                      {
+                                                                          1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0
+                                                                          , 1, 0, 1
+                                                                      };
+
+        public static readonly String[] FourGramTypes = new[]
+                                                            {
+                                                                "word", "shingle", "shingle", "shingle",
+                                                                "word", "shingle", "shingle", "shingle",
+                                                                "word", "shingle", "shingle", "shingle",
+                                                                "word", "shingle", "shingle",
+                                                                "word", "shingle",
+                                                                "word"
+                                                            };
+
+        public static readonly Token[] FourGramTokensWithoutUnigrams = new[]
+                                                                           {
+                                                                               CreateToken("please divide", 0, 13),
+                                                                               CreateToken("please divide this", 0,
+                                                                                           18),
+                                                                               CreateToken(
+                                                                                   "please divide this sentence", 0,
+                                                                                   27),
+                                                                               CreateToken("divide this", 7, 18),
+                                                                               CreateToken("divide this sentence", 7,
+                                                                                           27),
+                                                                               CreateToken(
+                                                                                   "divide this sentence into", 7,
+                                                                                   32),
+                                                                               CreateToken("this sentence", 14, 27),
+                                                                               CreateToken("this sentence into", 14,
+                                                                                           32),
+                                                                               CreateToken(
+                                                                                   "this sentence into shingles", 14,
+                                                                                   39),
+                                                                               CreateToken("sentence into", 19, 32),
+                                                                               CreateToken(
+                                                                                   "sentence into shingles", 19, 39)
+                                                                               ,
+                                                                               CreateToken("into shingles", 28, 39),
+                                                                           };
+
+        public static readonly int[] FourGramPositionIncrementsWithoutUnigrams = new[]
+                                                                                     {
+                                                                                         1, 0, 0, 1, 0, 0, 1, 0, 0,
+                                                                                         1, 0, 1
+                                                                                     };
+
+        public static readonly String[] FourGramTypesWithoutUnigrams = new[]
+                                                                           {
+                                                                               "shingle", "shingle",
+                                                                               "shingle", "shingle",
+                                                                               "shingle", "shingle",
+                                                                               "shingle", "shingle",
+                                                                               "shingle", "shingle",
+                                                                               "shingle", "shingle",
+                                                                           };
+
+        private static Token CreateToken(String term, int start, int offset)
+        {
+            var token = new Token(start, offset);
+            token.SetTermBuffer(term);
+            return token;
+        }
+
+        [SetUp]
+        public override void SetUp()
+        {
+            base.SetUp();
+            TestTokenWithHoles = new[]
+                                     {
+                                         CreateToken("please", 0, 6),
+                                         CreateToken("divide", 7, 13),
+                                         CreateToken("sentence", 19, 27),
+                                         CreateToken("shingles", 33, 39),
+                                     };
+
+            TestTokenWithHoles[2].SetPositionIncrement(2);
+            TestTokenWithHoles[3].SetPositionIncrement(2);
+        }
+
+
+        /// <summary>
+        /// Class under test for void ShingleFilter(TokenStream, int)
+        /// </summary>
+        [Test]
+        public void TestBiGramFilter()
+        {
+            ShingleFilterTest(2, TestToken, BiGramTokens,
+                              BiGramPositionIncrements, BiGramTypes,
+                              true);
+        }
+
+        [Test]
+        public void TestBiGramFilterWithHoles()
+        {
+            ShingleFilterTest(2, TestTokenWithHoles, BiGramTokensWithHoles,
+                              BiGramPositionIncrements, BiGramTypes,
+                              true);
+        }
+
+        [Test]
+        public void TestBiGramFilterWithoutUnigrams()
+        {
+            ShingleFilterTest(2, TestToken, BiGramTokensWithoutUnigrams,
+                              BiGramPositionIncrementsWithoutUnigrams, BiGramTypesWithoutUnigrams,
+                              false);
+        }
+
+        [Test]
+        public void TestBiGramFilterWithHolesWithoutUnigrams()
+        {
+            ShingleFilterTest(2, TestTokenWithHoles, BiGramTokensWithHolesWithoutUnigrams,
+                              BiGramPositionIncrementsWithHolesWithoutUnigrams, BiGramTypesWithoutUnigrams,
+                              false);
+        }
+
+        [Test]
+        public void TestBiGramFilterWithSingleToken()
+        {
+            ShingleFilterTest(2, TestSingleToken, SingleToken,
+                              SingleTokenIncrements, SingleTokenTypes,
+                              true);
+        }
+
+        [Test]
+        public void TestBiGramFilterWithSingleTokenWithoutUnigrams()
+        {
+            ShingleFilterTest(2, TestSingleToken, EmptyTokenArray,
+                              EmptyTokenIncrementsArray, EmptyTokenTypesArray,
+                              false);
+        }
+
+        [Test]
+        public void TestBiGramFilterWithEmptyTokenStream()
+        {
+            ShingleFilterTest(2, EmptyTokenArray, EmptyTokenArray,
+                              EmptyTokenIncrementsArray, EmptyTokenTypesArray,
+                              true);
+        }
+
+        [Test]
+        public void TestBiGramFilterWithEmptyTokenStreamWithoutUnigrams()
+        {
+            ShingleFilterTest(2, EmptyTokenArray, EmptyTokenArray,
+                              EmptyTokenIncrementsArray, EmptyTokenTypesArray,
+                              false);
+        }
+
+        [Test]
+        public void TestTriGramFilter()
+        {
+            ShingleFilterTest(3, TestToken, TriGramTokens,
+                              TriGramPositionIncrements, TriGramTypes,
+                              true);
+        }
+
+        [Test]
+        public void TestTriGramFilterWithoutUnigrams()
+        {
+            ShingleFilterTest(3, TestToken, TriGramTokensWithoutUnigrams,
+                              TriGramPositionIncrementsWithoutUnigrams, TriGramTypesWithoutUnigrams,
+                              false);
+        }
+
+        [Test]
+        public void TestFourGramFilter()
+        {
+            ShingleFilterTest(4, TestToken, FourGramTokens,
+                              FourGramPositionIncrements, FourGramTypes,
+                              true);
+        }
+
+        [Test]
+        public void TestFourGramFilterWithoutUnigrams()
+        {
+            ShingleFilterTest(4, TestToken, FourGramTokensWithoutUnigrams,
+                              FourGramPositionIncrementsWithoutUnigrams,
+                              FourGramTypesWithoutUnigrams, false);
+        }
+
+        [Test]
+        public void TestReset()
+        {
+            Tokenizer wsTokenizer = new WhitespaceTokenizer(new StringReader("please divide this sentence"));
+            TokenStream filter = new ShingleFilter(wsTokenizer, 2);
+
+            AssertTokenStreamContents(filter,
+                                      new[]
+                                          {
+                                              "please", "please divide", "divide", "divide this", "this",
+                                              "this sentence",
+                                              "sentence"
+                                          },
+                                      new[] {0, 0, 7, 7, 14, 14, 19}, new[] {6, 13, 13, 18, 18, 27, 27},
+                                      new[]
+                                          {
+                                              TypeAttributeImpl.DEFAULT_TYPE, "shingle", TypeAttributeImpl.DEFAULT_TYPE,
+                                              "shingle", TypeAttributeImpl.DEFAULT_TYPE, "shingle",
+                                              TypeAttributeImpl.DEFAULT_TYPE
+                                          },
+                                      new[] {1, 0, 1, 0, 1, 0, 1}
+                );
+
+            wsTokenizer.Reset(new StringReader("please divide this sentence"));
+
+            AssertTokenStreamContents(filter,
+                                      new[]
+                                          {
+                                              "please", "please divide", "divide", "divide this", "this",
+                                              "this sentence",
+                                              "sentence"
+                                          },
+                                      new[] {0, 0, 7, 7, 14, 14, 19}, new[] {6, 13, 13, 18, 18, 27, 27},
+                                      new[]
+                                          {
+                                              TypeAttributeImpl.DEFAULT_TYPE, "shingle", TypeAttributeImpl.DEFAULT_TYPE,
+                                              "shingle", TypeAttributeImpl.DEFAULT_TYPE, "shingle",
+                                              TypeAttributeImpl.DEFAULT_TYPE
+                                          },
+                                      new[] {1, 0, 1, 0, 1, 0, 1}
+                );
+        }
+
+        protected void ShingleFilterTest(int maxSize, Token[] tokensToShingle, Token[] tokensToCompare,
+                                         int[] positionIncrements, String[] types, bool outputUnigrams)
+        {
+            var filter = new ShingleFilter(new TestTokenStream(tokensToShingle), maxSize);
+            filter.SetOutputUnigrams(outputUnigrams);
+
+            var termAtt = (TermAttribute) filter.AddAttribute(typeof (TermAttribute));
+            var offsetAtt = (OffsetAttribute) filter.AddAttribute(typeof (OffsetAttribute));
+            var posIncrAtt = (PositionIncrementAttribute) filter.AddAttribute(typeof (PositionIncrementAttribute));
+            var typeAtt = (TypeAttribute) filter.AddAttribute(typeof (TypeAttribute));
+
+            int i = 0;
+            while (filter.IncrementToken())
+            {
+                Assert.IsTrue(i < tokensToCompare.Length, "ShingleFilter outputted more tokens than expected");
+
+                String termText = termAtt.Term();
+                String goldText = tokensToCompare[i].Term();
+
+                Assert.AreEqual(goldText, termText, "Wrong termText");
+                Assert.AreEqual(tokensToCompare[i].StartOffset(), offsetAtt.StartOffset(),
+                                "Wrong startOffset for token \"" + termText + "\"");
+                Assert.AreEqual(tokensToCompare[i].EndOffset(), offsetAtt.EndOffset(),
+                                "Wrong endOffset for token \"" + termText + "\"");
+                Assert.AreEqual(positionIncrements[i], posIncrAtt.GetPositionIncrement(),
+                                "Wrong positionIncrement for token \"" + termText + "\"");
+                Assert.AreEqual(types[i], typeAtt.Type(), "Wrong type for token \"" + termText + "\"");
+
+                i++;
+            }
+
+            Assert.AreEqual(tokensToCompare.Length, i,
+                            "ShingleFilter outputted wrong # of tokens. (# output = " + i + "; # expected =" +
+                            tokensToCompare.Length + ")");
+        }
+
+        #region Nested type: TestTokenStream
+
+        public sealed class TestTokenStream : TokenStream
+        {
+            private readonly OffsetAttribute _offsetAtt;
+            private readonly PositionIncrementAttribute _posIncrAtt;
+            private readonly TermAttribute _termAtt;
+            private readonly Token[] _testToken;
+            private readonly TypeAttribute _typeAtt;
+            private int _index;
+
+            public TestTokenStream(Token[] testToken)
+            {
+                _testToken = testToken;
+
+                _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute));
+                _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute));
+                _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute));
+                _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute));
+            }
+
+            public override bool IncrementToken()
+            {
+                ClearAttributes();
+
+                if (_index >= _testToken.Length)
+                    return false;
+
+                Token t = _testToken[_index++];
+
+                _termAtt.SetTermBuffer(t.TermBuffer(), 0, t.TermLength());
+                _offsetAtt.SetOffset(t.StartOffset(), t.EndOffset());
+                _posIncrAtt.SetPositionIncrement(t.GetPositionIncrement());
+                _typeAtt.SetType(TypeAttributeImpl.DEFAULT_TYPE);
+
+                return true;
+            }
+        }
+
+        #endregion
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/trunk/test/contrib/Analyzers/Shingle/TestShingleMatrixFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/test/contrib/Analyzers/Shingle/TestShingleMatrixFilter.cs?rev=1147514&view=auto
==============================================================================
--- incubator/lucene.net/trunk/test/contrib/Analyzers/Shingle/TestShingleMatrixFilter.cs (added)
+++ incubator/lucene.net/trunk/test/contrib/Analyzers/Shingle/TestShingleMatrixFilter.cs Sun Jul 17 02:46:00 2011
@@ -0,0 +1,594 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.IO;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Analyzers.Miscellaneous;
+using Lucene.Net.Analyzers.Payloads;
+using Lucene.Net.Analyzers.Shingle.Codec;
+using Lucene.Net.Analyzers.Shingle.Matrix;
+using NUnit.Framework;
+using FlagsAttribute = Lucene.Net.Analysis.Tokenattributes.FlagsAttribute;
+
+namespace Lucene.Net.Analyzers.Shingle
+{
+    public class TestShingleMatrixFilter : BaseTokenStreamTestCase
+    {
+        public TestShingleMatrixFilter() : this(typeof (TestShingleMatrixFilter).Name)
+        {
+        }
+
+        // use this ctor, because SingleTokenTokenStream only uses next(Token), so exclude it
+        public TestShingleMatrixFilter(String name) :
+            base(
+            name,
+            new Hashtable(
+                new Dictionary<string, string[]>
+                    {
+                        {
+                            "TestShingleMatrixFilter",
+                            new[]
+                                {
+                                    "testBehavingAsShingleFilter",
+                                    "testMatrix",
+                                    "testIterator"
+                                }
+                            }
+                    }
+                ))
+        {
+        }
+
+        [Test]
+        public void TestIterator()
+        {
+            var wst = new WhitespaceTokenizer(new StringReader("one two three four five"));
+            var smf = new ShingleMatrixFilter(wst, 2, 2, '_', false,
+                                              new OneDimensionalNonWeightedTokenSettingsCodec());
+
+            int i;
+            for (i = 0; smf.IncrementToken(); i++) { }
+
+            Assert.AreEqual(4, i);
+
+            // call next once more. this should return false again rather than throwing an exception (LUCENE-1939)
+            Assert.IsFalse(smf.IncrementToken());
+
+            //System.DateTime.Now;
+        }
+
+        [Test]
+        public void TestBehavingAsShingleFilter()
+        {
+            ShingleMatrixFilter.DefaultSettingsCodec = null;
+
+            TokenStream ts = new ShingleMatrixFilter(new EmptyTokenStream(), 1, 2, ' ', false,
+                                                     new OneDimensionalNonWeightedTokenSettingsCodec
+                                                         ());
+            Assert.IsFalse(ts.IncrementToken());
+
+            // test a plain old token stream with synonyms translated to rows.
+
+            var tokens = new LinkedList<Token>();
+            tokens.AddLast(CreateToken("please", 0, 6));
+            tokens.AddLast(CreateToken("divide", 7, 13));
+            tokens.AddLast(CreateToken("this", 14, 18));
+            tokens.AddLast(CreateToken("sentence", 19, 27));
+            tokens.AddLast(CreateToken("into", 28, 32));
+            tokens.AddLast(CreateToken("shingles", 33, 39));
+
+            var tls = new TokenListStream(tokens);
+
+            // bi-grams
+
+            ts = new ShingleMatrixFilter(tls, 1, 2, ' ', false, new OneDimensionalNonWeightedTokenSettingsCodec());
+
+            //for (Token token = ts.Next(new Token()); token != null; token = ts.Next(token))
+            //{
+            //    Console.Out.WriteLine("AssertNext(ts, \"" + token.Term() + "\", " + token.GetPositionIncrement() + ", " + (token.GetPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.GetPayload().GetData()).ToString()) + "f, " + token.StartOffset() + ", " + token.EndOffset() + ");");
+            //    token.Clear();
+            //}
+
+            AssertTokenStreamContents(ts,
+                                      new[]
+                                          {
+                                              "please", "please divide", "divide", "divide this",
+                                              "this", "this sentence", "sentence", "sentence into", "into",
+                                              "into shingles", "shingles"
+                                          },
+                                      new[] {0, 0, 7, 7, 14, 14, 19, 19, 28, 28, 33},
+                                      new[] {6, 13, 13, 18, 18, 27, 27, 32, 32, 39, 39});
+        }
+
+
+        /// <summary>
+        /// Extracts a matrix from a token stream.
+        /// </summary>
+        [Test]
+        public void TestTokenStream()
+        {
+            ShingleMatrixFilter.DefaultSettingsCodec = null;
+            //new ShingleMatrixFilter.SimpleThreeDimensionalTokenSettingsCodec();
+
+            // test a plain old token stream with synonyms tranlated to rows.
+
+            var tokens = new LinkedList<Token>();
+            tokens.AddLast(TokenFactory("hello", 1, 0, 4));
+            tokens.AddLast(TokenFactory("greetings", 0, 0, 4));
+            tokens.AddLast(TokenFactory("world", 1, 5, 10));
+            tokens.AddLast(TokenFactory("earth", 0, 5, 10));
+            tokens.AddLast(TokenFactory("tellus", 0, 5, 10));
+
+            TokenStream tls = new TokenListStream(tokens);
+
+            // bi-grams
+
+            TokenStream ts = new ShingleMatrixFilter(tls, 2, 2, '_', false,
+                                                     new TwoDimensionalNonWeightedSynonymTokenSettingsCodec());
+
+            AssertNext(ts, "hello_world");
+            AssertNext(ts, "greetings_world");
+            AssertNext(ts, "hello_earth");
+            AssertNext(ts, "greetings_earth");
+            AssertNext(ts, "hello_tellus");
+            AssertNext(ts, "greetings_tellus");
+            Assert.IsFalse(ts.IncrementToken());
+
+            // bi-grams with no spacer character, start offset, end offset
+
+            tls.Reset();
+            ts = new ShingleMatrixFilter(tls, 2, 2, null, false,
+                                         new TwoDimensionalNonWeightedSynonymTokenSettingsCodec());
+            AssertNext(ts, "helloworld", 0, 10);
+            AssertNext(ts, "greetingsworld", 0, 10);
+            AssertNext(ts, "helloearth", 0, 10);
+            AssertNext(ts, "greetingsearth", 0, 10);
+            AssertNext(ts, "hellotellus", 0, 10);
+            AssertNext(ts, "greetingstellus", 0, 10);
+            Assert.IsFalse(ts.IncrementToken());
+
+
+            // add ^_prefix_and_suffix_$
+            //
+            // using 3d codec as it supports weights
+
+            ShingleMatrixFilter.DefaultSettingsCodec =
+                new SimpleThreeDimensionalTokenSettingsCodec();
+
+            tokens = new LinkedList<Token>();
+            tokens.AddLast(TokenFactory("hello", 1, 1f, 0, 4, TokenPositioner.NewColumn));
+            tokens.AddLast(TokenFactory("greetings", 0, 1f, 0, 4, TokenPositioner.NewRow));
+            tokens.AddLast(TokenFactory("world", 1, 1f, 5, 10, TokenPositioner.NewColumn));
+            tokens.AddLast(TokenFactory("earth", 0, 1f, 5, 10, TokenPositioner.NewRow));
+            tokens.AddLast(TokenFactory("tellus", 0, 1f, 5, 10, TokenPositioner.NewRow));
+
+            tls = new TokenListStream(tokens);
+
+            // bi-grams, position incrememnt, weight, start offset, end offset
+
+            ts = new PrefixAndSuffixAwareTokenFilter(
+                new SingleTokenTokenStream(TokenFactory("^", 1, 100f, 0, 0)),
+                tls,
+                new SingleTokenTokenStream(TokenFactory("$", 1, 50f, 0, 0))
+                );
+            tls = new CachingTokenFilter(ts);
+
+            ts = new ShingleMatrixFilter(tls, 2, 2, '_', false);
+
+            //for (Token token = ts.Next(new Token()); token != null; token = ts.Next(token)) {
+            //    Console.Out.WriteLine("AssertNext(ts, \"" + token.Term() + "\", " + token.GetPositionIncrement() + ", " + (token.GetPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.GetPayload().GetData()).ToString()) + "f, " + token.StartOffset() + ", " + token.EndOffset() + ");");
+            //    token.Clear();
+            //}
+
+            AssertNext(ts, "^_hello", 1, 10.049875f, 0, 4);
+            AssertNext(ts, "^_greetings", 1, 10.049875f, 0, 4);
+            AssertNext(ts, "hello_world", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "greetings_world", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "hello_earth", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "greetings_earth", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "hello_tellus", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "greetings_tellus", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "world_$", 1, 7.1414285f, 5, 10);
+            AssertNext(ts, "earth_$", 1, 7.1414285f, 5, 10);
+            AssertNext(ts, "tellus_$", 1, 7.1414285f, 5, 10);
+            Assert.IsFalse(ts.IncrementToken());
+
+            // test unlimited size and allow single boundary token as shingle
+            tls.Reset();
+
+            ts = new ShingleMatrixFilter(tls, 1, Int32.MaxValue, '_', false);
+
+
+            //for (Token token = ts.Next(new Token()); token != null; token = ts.Next(token))
+            //{
+            //    Console.Out.WriteLine("AssertNext(ts, \"" + token.Term() + "\", " + token.GetPositionIncrement() + ", " + (token.GetPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.GetPayload().GetData()).ToString()) + "f, " + token.StartOffset() + ", " + token.EndOffset() + ");");
+            //    token.Clear();
+            //}
+
+            AssertNext(ts, "^", 1, 10.0f, 0, 0);
+            AssertNext(ts, "^_hello", 1, 10.049875f, 0, 4);
+            AssertNext(ts, "^_hello_world", 1, 10.099504f, 0, 10);
+            AssertNext(ts, "^_hello_world_$", 1, 12.328828f, 0, 10);
+            AssertNext(ts, "hello", 1, 1.0f, 0, 4);
+            AssertNext(ts, "hello_world", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "hello_world_$", 1, 7.2111025f, 0, 10);
+            AssertNext(ts, "world", 1, 1.0f, 5, 10);
+            AssertNext(ts, "world_$", 1, 7.1414285f, 5, 10);
+            AssertNext(ts, "$", 1, 7.071068f, 10, 10);
+            AssertNext(ts, "^_greetings", 1, 10.049875f, 0, 4);
+            AssertNext(ts, "^_greetings_world", 1, 10.099504f, 0, 10);
+            AssertNext(ts, "^_greetings_world_$", 1, 12.328828f, 0, 10);
+            AssertNext(ts, "greetings", 1, 1.0f, 0, 4);
+            AssertNext(ts, "greetings_world", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "greetings_world_$", 1, 7.2111025f, 0, 10);
+            AssertNext(ts, "^_hello_earth", 1, 10.099504f, 0, 10);
+            AssertNext(ts, "^_hello_earth_$", 1, 12.328828f, 0, 10);
+            AssertNext(ts, "hello_earth", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "hello_earth_$", 1, 7.2111025f, 0, 10);
+            AssertNext(ts, "earth", 1, 1.0f, 5, 10);
+            AssertNext(ts, "earth_$", 1, 7.1414285f, 5, 10);
+            AssertNext(ts, "^_greetings_earth", 1, 10.099504f, 0, 10);
+            AssertNext(ts, "^_greetings_earth_$", 1, 12.328828f, 0, 10);
+            AssertNext(ts, "greetings_earth", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "greetings_earth_$", 1, 7.2111025f, 0, 10);
+            AssertNext(ts, "^_hello_tellus", 1, 10.099504f, 0, 10);
+            AssertNext(ts, "^_hello_tellus_$", 1, 12.328828f, 0, 10);
+            AssertNext(ts, "hello_tellus", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "hello_tellus_$", 1, 7.2111025f, 0, 10);
+            AssertNext(ts, "tellus", 1, 1.0f, 5, 10);
+            AssertNext(ts, "tellus_$", 1, 7.1414285f, 5, 10);
+            AssertNext(ts, "^_greetings_tellus", 1, 10.099504f, 0, 10);
+            AssertNext(ts, "^_greetings_tellus_$", 1, 12.328828f, 0, 10);
+            AssertNext(ts, "greetings_tellus", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "greetings_tellus_$", 1, 7.2111025f, 0, 10);
+
+            Assert.IsFalse(ts.IncrementToken());
+
+            // test unlimited size but don't allow single boundary token as shingle
+
+            tls.Reset();
+            ts = new ShingleMatrixFilter(tls, 1, Int32.MaxValue, '_', true);
+
+            //  for (Token token = ts.next(new Token()); token != null; token = ts.next(token)) {
+            //      System.out.println("assertNext(ts, \"" + token.term() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");");
+            //      token.clear();
+            //    }
+
+            AssertNext(ts, "^_hello", 1, 10.049875f, 0, 4);
+            AssertNext(ts, "^_hello_world", 1, 10.099504f, 0, 10);
+            AssertNext(ts, "^_hello_world_$", 1, 12.328828f, 0, 10);
+            AssertNext(ts, "hello", 1, 1.0f, 0, 4);
+            AssertNext(ts, "hello_world", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "hello_world_$", 1, 7.2111025f, 0, 10);
+            AssertNext(ts, "world", 1, 1.0f, 5, 10);
+            AssertNext(ts, "world_$", 1, 7.1414285f, 5, 10);
+            AssertNext(ts, "^_greetings", 1, 10.049875f, 0, 4);
+            AssertNext(ts, "^_greetings_world", 1, 10.099504f, 0, 10);
+            AssertNext(ts, "^_greetings_world_$", 1, 12.328828f, 0, 10);
+            AssertNext(ts, "greetings", 1, 1.0f, 0, 4);
+            AssertNext(ts, "greetings_world", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "greetings_world_$", 1, 7.2111025f, 0, 10);
+            AssertNext(ts, "^_hello_earth", 1, 10.099504f, 0, 10);
+            AssertNext(ts, "^_hello_earth_$", 1, 12.328828f, 0, 10);
+            AssertNext(ts, "hello_earth", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "hello_earth_$", 1, 7.2111025f, 0, 10);
+            AssertNext(ts, "earth", 1, 1.0f, 5, 10);
+            AssertNext(ts, "earth_$", 1, 7.1414285f, 5, 10);
+            AssertNext(ts, "^_greetings_earth", 1, 10.099504f, 0, 10);
+            AssertNext(ts, "^_greetings_earth_$", 1, 12.328828f, 0, 10);
+            AssertNext(ts, "greetings_earth", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "greetings_earth_$", 1, 7.2111025f, 0, 10);
+            AssertNext(ts, "^_hello_tellus", 1, 10.099504f, 0, 10);
+            AssertNext(ts, "^_hello_tellus_$", 1, 12.328828f, 0, 10);
+            AssertNext(ts, "hello_tellus", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "hello_tellus_$", 1, 7.2111025f, 0, 10);
+            AssertNext(ts, "tellus", 1, 1.0f, 5, 10);
+            AssertNext(ts, "tellus_$", 1, 7.1414285f, 5, 10);
+            AssertNext(ts, "^_greetings_tellus", 1, 10.099504f, 0, 10);
+            AssertNext(ts, "^_greetings_tellus_$", 1, 12.328828f, 0, 10);
+            AssertNext(ts, "greetings_tellus", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "greetings_tellus_$", 1, 7.2111025f, 0, 10);
+
+
+            Assert.IsFalse(ts.IncrementToken());
+
+            //System.currentTimeMillis();
+
+            // multi-token synonyms
+            //
+            // Token[][][] {
+            //    {{hello}, {greetings, and, salutations},
+            //    {{world}, {earth}, {tellus}}
+            // }
+            //
+
+
+            tokens = new LinkedList<Token>();
+            tokens.AddLast(TokenFactory("hello", 1, 1f, 0, 4, TokenPositioner.NewColumn));
+            tokens.AddLast(TokenFactory("greetings", 1, 1f, 0, 4, TokenPositioner.NewRow));
+            tokens.AddLast(TokenFactory("and", 1, 1f, 0, 4, TokenPositioner.SameRow));
+            tokens.AddLast(TokenFactory("salutations", 1, 1f, 0, 4, TokenPositioner.SameRow));
+            tokens.AddLast(TokenFactory("world", 1, 1f, 5, 10, TokenPositioner.NewColumn));
+            tokens.AddLast(TokenFactory("earth", 1, 1f, 5, 10, TokenPositioner.NewRow));
+            tokens.AddLast(TokenFactory("tellus", 1, 1f, 5, 10, TokenPositioner.NewRow));
+
+            tls = new TokenListStream(tokens);
+
+            // 2-3 grams
+
+            ts = new ShingleMatrixFilter(tls, 2, 3, '_', false);
+
+            //  for (Token token = ts.next(new Token()); token != null; token = ts.next(token)) {
+            //      System.out.println("assertNext(ts, \"" + token.term() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");");
+            //      token.clear();
+            //    }
+
+            // shingle, position increment, weight, start offset, end offset
+
+            AssertNext(ts, "hello_world", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "greetings_and", 1, 1.4142135f, 0, 4);
+            AssertNext(ts, "greetings_and_salutations", 1, 1.7320508f, 0, 4);
+            AssertNext(ts, "and_salutations", 1, 1.4142135f, 0, 4);
+            AssertNext(ts, "and_salutations_world", 1, 1.7320508f, 0, 10);
+            AssertNext(ts, "salutations_world", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "hello_earth", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "and_salutations_earth", 1, 1.7320508f, 0, 10);
+            AssertNext(ts, "salutations_earth", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "hello_tellus", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "and_salutations_tellus", 1, 1.7320508f, 0, 10);
+            AssertNext(ts, "salutations_tellus", 1, 1.4142135f, 0, 10);
+
+            Assert.IsFalse(ts.IncrementToken());
+
+            //System.currentTimeMillis();
+        }
+
+        /// <summary>
+        /// Tests creat shingles from a pre-assembled matrix
+        /// 
+        /// Tests the row token z-axis, multi token synonyms. 
+        /// </summary>
+        [Test]
+        public void TestMatrix()
+        {
+            // some other tests set this to null.
+            // set it here in case tests are run out of the usual order.
+            ShingleMatrixFilter.DefaultSettingsCodec = new SimpleThreeDimensionalTokenSettingsCodec();
+
+            var matrix = new Matrix.Matrix();
+
+            new Column(TokenFactory("no", 1), matrix);
+            new Column(TokenFactory("surprise", 1), matrix);
+            new Column(TokenFactory("to", 1), matrix);
+            new Column(TokenFactory("see", 1), matrix);
+            new Column(TokenFactory("england", 1), matrix);
+            new Column(TokenFactory("manager", 1), matrix);
+
+            var col = new Column(matrix);
+
+            // sven göran eriksson is a multi token synonym to svennis
+            new Row(col).Tokens.AddLast(TokenFactory("svennis", 1));
+
+            var row = new Row(col);
+            row.Tokens.AddLast(TokenFactory("sven", 1));
+            row.Tokens.AddLast(TokenFactory("göran", 1));
+            row.Tokens.AddLast(TokenFactory("eriksson", 1));
+
+            new Column(TokenFactory("in", 1), matrix);
+            new Column(TokenFactory("the", 1), matrix);
+            new Column(TokenFactory("croud", 1), matrix);
+
+            TokenStream ts = new ShingleMatrixFilter(matrix, 2, 4, '_', true,
+                                                     new SimpleThreeDimensionalTokenSettingsCodec());
+
+            //  for (Token token = ts.next(new Token()); token != null; token = ts.next(token)) {
+            //      System.out.println("assertNext(ts, \"" + token.term() + "\", " + token.getPositionIncrement() + ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData())) + "f, " + token.startOffset() + ", " + token.endOffset() + ");");
+            //      token.clear();
+            //    }
+
+            AssertNext(ts, "no_surprise", 1, 1.4142135f, 0, 0);
+            AssertNext(ts, "no_surprise_to", 1, 1.7320508f, 0, 0);
+            AssertNext(ts, "no_surprise_to_see", 1, 2.0f, 0, 0);
+            AssertNext(ts, "surprise_to", 1, 1.4142135f, 0, 0);
+            AssertNext(ts, "surprise_to_see", 1, 1.7320508f, 0, 0);
+            AssertNext(ts, "surprise_to_see_england", 1, 2.0f, 0, 0);
+            AssertNext(ts, "to_see", 1, 1.4142135f, 0, 0);
+            AssertNext(ts, "to_see_england", 1, 1.7320508f, 0, 0);
+            AssertNext(ts, "to_see_england_manager", 1, 2.0f, 0, 0);
+            AssertNext(ts, "see_england", 1, 1.4142135f, 0, 0);
+            AssertNext(ts, "see_england_manager", 1, 1.7320508f, 0, 0);
+            AssertNext(ts, "see_england_manager_svennis", 1, 2.0f, 0, 0);
+            AssertNext(ts, "england_manager", 1, 1.4142135f, 0, 0);
+            AssertNext(ts, "england_manager_svennis", 1, 1.7320508f, 0, 0);
+            AssertNext(ts, "england_manager_svennis_in", 1, 2.0f, 0, 0);
+            AssertNext(ts, "manager_svennis", 1, 1.4142135f, 0, 0);
+            AssertNext(ts, "manager_svennis_in", 1, 1.7320508f, 0, 0);
+            AssertNext(ts, "manager_svennis_in_the", 1, 2.0f, 0, 0);
+            AssertNext(ts, "svennis_in", 1, 1.4142135f, 0, 0);
+            AssertNext(ts, "svennis_in_the", 1, 1.7320508f, 0, 0);
+            AssertNext(ts, "svennis_in_the_croud", 1, 2.0f, 0, 0);
+            AssertNext(ts, "in_the", 1, 1.4142135f, 0, 0);
+            AssertNext(ts, "in_the_croud", 1, 1.7320508f, 0, 0);
+            AssertNext(ts, "the_croud", 1, 1.4142135f, 0, 0);
+            AssertNext(ts, "see_england_manager_sven", 1, 2.0f, 0, 0);
+            AssertNext(ts, "england_manager_sven", 1, 1.7320508f, 0, 0);
+            AssertNext(ts, "england_manager_sven_göran", 1, 2.0f, 0, 0);
+            AssertNext(ts, "manager_sven", 1, 1.4142135f, 0, 0);
+            AssertNext(ts, "manager_sven_göran", 1, 1.7320508f, 0, 0);
+            AssertNext(ts, "manager_sven_göran_eriksson", 1, 2.0f, 0, 0);
+            AssertNext(ts, "sven_göran", 1, 1.4142135f, 0, 0);
+            AssertNext(ts, "sven_göran_eriksson", 1, 1.7320508f, 0, 0);
+            AssertNext(ts, "sven_göran_eriksson_in", 1, 2.0f, 0, 0);
+            AssertNext(ts, "göran_eriksson", 1, 1.4142135f, 0, 0);
+            AssertNext(ts, "göran_eriksson_in", 1, 1.7320508f, 0, 0);
+            AssertNext(ts, "göran_eriksson_in_the", 1, 2.0f, 0, 0);
+            AssertNext(ts, "eriksson_in", 1, 1.4142135f, 0, 0);
+            AssertNext(ts, "eriksson_in_the", 1, 1.7320508f, 0, 0);
+            AssertNext(ts, "eriksson_in_the_croud", 1, 2.0f, 0, 0);
+
+            Assert.IsFalse(ts.IncrementToken());
+        }
+
+        private static Token TokenFactory(String text, int posIncr, int startOffset, int endOffset)
+        {
+            var token = new Token(startOffset, endOffset);
+
+            token.SetTermBuffer(text);
+            token.SetPositionIncrement(posIncr);
+
+            return token;
+        }
+        
+        private static Token TokenFactory(String text, int posIncr)
+        {
+            return TokenFactory(text, posIncr, 1f, 0, 0);
+        }
+
+        private static Token TokenFactory(String text, int posIncr, float weight, int startOffset, int endOffset)
+        {
+            var token = new Token(startOffset, endOffset);
+
+            token.SetTermBuffer(text);
+            token.SetPositionIncrement(posIncr);
+
+            ShingleMatrixFilter.DefaultSettingsCodec.SetWeight(token, weight);
+
+            return token;
+        }
+
+        private static Token TokenFactory(String text, int posIncr, float weight, int startOffset, int endOffset,
+                                          TokenPositioner positioner)
+        {
+            var token = new Token(startOffset, endOffset);
+
+            token.SetTermBuffer(text);
+            token.SetPositionIncrement(posIncr);
+
+            ShingleMatrixFilter.DefaultSettingsCodec.SetWeight(token, weight);
+            ShingleMatrixFilter.DefaultSettingsCodec.SetTokenPositioner(token, positioner);
+
+            return token;
+        }
+
+        // assert-methods start here
+
+        private static void AssertNext(TokenStream ts, String text)
+        {
+            var termAtt = (TermAttribute) ts.AddAttribute(typeof (TermAttribute));
+
+            Assert.IsTrue(ts.IncrementToken());
+            Assert.AreEqual(text, termAtt.Term());
+        }
+
+        private static void AssertNext(TokenStream ts, String text, int positionIncrement, float boost, int startOffset,
+                                       int endOffset)
+        {
+            var termAtt = (TermAttribute) ts.AddAttribute(typeof (TermAttribute));
+            var posIncrAtt = (PositionIncrementAttribute) ts.AddAttribute(typeof (PositionIncrementAttribute));
+            var payloadAtt = (PayloadAttribute) ts.AddAttribute(typeof (PayloadAttribute));
+            var offsetAtt = (OffsetAttribute) ts.AddAttribute(typeof (OffsetAttribute));
+
+            Assert.IsTrue(ts.IncrementToken());
+            Assert.AreEqual(text, termAtt.Term());
+            Assert.AreEqual(positionIncrement, posIncrAtt.GetPositionIncrement());
+            Assert.AreEqual(boost,
+                            payloadAtt.GetPayload() == null
+                                ? 1f
+                                : PayloadHelper.DecodeFloat(payloadAtt.GetPayload().GetData()), 0);
+            Assert.AreEqual(startOffset, offsetAtt.StartOffset());
+            Assert.AreEqual(endOffset, offsetAtt.EndOffset());
+        }
+
+        private static void AssertNext(TokenStream ts, String text, int startOffset, int endOffset)
+        {
+            var termAtt = (TermAttribute) ts.AddAttribute(typeof (TermAttribute));
+            var offsetAtt = (OffsetAttribute) ts.AddAttribute(typeof (OffsetAttribute));
+
+            Assert.IsTrue(ts.IncrementToken());
+            Assert.AreEqual(text, termAtt.Term());
+            Assert.AreEqual(startOffset, offsetAtt.StartOffset());
+            Assert.AreEqual(endOffset, offsetAtt.EndOffset());
+        }
+
+        private static Token CreateToken(String term, int start, int offset)
+        {
+            var token = new Token(start, offset);
+            token.SetTermBuffer(term);
+            return token;
+        }
+
+        #region Nested type: TokenListStream
+
+        public sealed class TokenListStream : TokenStream
+        {
+            private readonly FlagsAttribute _flagsAtt;
+            private readonly OffsetAttribute _offsetAtt;
+            private readonly PayloadAttribute _payloadAtt;
+            private readonly PositionIncrementAttribute _posIncrAtt;
+            private readonly TermAttribute _termAtt;
+            private readonly ICollection<Token> _tokens;
+            private readonly TypeAttribute _typeAtt;
+
+            private IEnumerator<Token> _iterator;
+
+            public TokenListStream(ICollection<Token> tokens)
+            {
+                _tokens = tokens;
+                _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute));
+                _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute));
+                _payloadAtt = (PayloadAttribute) AddAttribute(typeof (PayloadAttribute));
+                _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute));
+                _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute));
+                _flagsAtt = (FlagsAttribute) AddAttribute(typeof (FlagsAttribute));
+            }
+
+            public override bool IncrementToken()
+            {
+                if (_iterator == null)
+                    _iterator = _tokens.GetEnumerator();
+
+                if (!_iterator.MoveNext())
+                    return false;
+
+                Token prototype = _iterator.Current;
+
+                ClearAttributes();
+
+                _termAtt.SetTermBuffer(prototype.TermBuffer(), 0, prototype.TermLength());
+                _posIncrAtt.SetPositionIncrement(prototype.GetPositionIncrement());
+                _flagsAtt.SetFlags(prototype.GetFlags());
+                _offsetAtt.SetOffset(prototype.StartOffset(), prototype.EndOffset());
+                _typeAtt.SetType(prototype.Type());
+                _payloadAtt.SetPayload(prototype.GetPayload());
+
+                return true;
+            }
+
+
+            public override void Reset()
+            {
+                _iterator = null;
+            }
+        }
+
+        #endregion
+    }
+}
\ No newline at end of file