You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2016/12/07 13:48:20 UTC
[02/11] lucenenet git commit: Ported enough of Sandbox to support QueryParser.Xml

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3395a8b4/Lucene.Net.Tests.Sandbox/Queries/FuzzyLikeThisQueryTest.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Tests.Sandbox/Queries/FuzzyLikeThisQueryTest.cs b/Lucene.Net.Tests.Sandbox/Queries/FuzzyLikeThisQueryTest.cs
new file mode 100644
index 0000000..4b830c6
--- /dev/null
+++ b/Lucene.Net.Tests.Sandbox/Queries/FuzzyLikeThisQueryTest.cs
@@ -0,0 +1,159 @@
+\ufeffusing Lucene.Net.Analysis;
+using Lucene.Net.Documents;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Store;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Sandbox.Queries
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    public class FuzzyLikeThisQueryTest : LuceneTestCase
+    {
+        private Directory directory;
+        private IndexSearcher searcher;
+        private IndexReader reader;
+        private Analyzer analyzer;
+
+        public override void SetUp()
+        {
+            base.SetUp();
+
+            analyzer = new MockAnalyzer(Random());
+            directory = NewDirectory();
+            RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));
+
+            //Add series of docs with misspelt names
+            AddDoc(writer, "jonathon smythe", "1");
+            AddDoc(writer, "jonathan smith", "2");
+            AddDoc(writer, "johnathon smyth", "3");
+            AddDoc(writer, "johnny smith", "4");
+            AddDoc(writer, "jonny smith", "5");
+            AddDoc(writer, "johnathon smythe", "6");
+            reader = writer.Reader;
+            writer.Dispose();
+            searcher = NewSearcher(reader);
+        }
+
+        public override void TearDown()
+        {
+            reader.Dispose();
+            directory.Dispose();
+            base.TearDown();
+        }
+
+        private void AddDoc(RandomIndexWriter writer, string name, string id)
+        {
+            Document doc = new Document();
+            doc.Add(NewTextField("name", name, Field.Store.YES));
+            doc.Add(NewTextField("id", id, Field.Store.YES));
+            writer.AddDocument(doc);
+        }
+
+
+        //Tests that idf ranking is not favouring rare mis-spellings over a strong edit-distance match
+        [Test]
+        public void TestClosestEditDistanceMatchComesFirst()
+        {
+            FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer);
+            flt.AddTerms("smith", "name", 0.3f, 1);
+            Query q = flt.Rewrite(searcher.IndexReader);
+            HashSet<Term> queryTerms = new HashSet<Term>();
+            q.ExtractTerms(queryTerms);
+            assertTrue("Should have variant smythe", queryTerms.contains(new Term("name", "smythe")));
+            assertTrue("Should have variant smith", queryTerms.contains(new Term("name", "smith")));
+            assertTrue("Should have variant smyth", queryTerms.contains(new Term("name", "smyth")));
+            TopDocs topDocs = searcher.Search(flt, 1);
+            ScoreDoc[] sd = topDocs.ScoreDocs;
+            assertTrue("score docs must match 1 doc", (sd != null) && (sd.Length > 0));
+            Document doc = searcher.Doc(sd[0].Doc);
+            assertEquals("Should match most similar not most rare variant", "2", doc.Get("id"));
+        }
+
+        //Test multiple input words are having variants produced
+        [Test]
+        public void TestMultiWord()
+        {
+            FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer);
+            flt.AddTerms("jonathin smoth", "name", 0.3f, 1);
+            Query q = flt.Rewrite(searcher.IndexReader);
+            HashSet<Term> queryTerms = new HashSet<Term>();
+            q.ExtractTerms(queryTerms);
+            assertTrue("Should have variant jonathan", queryTerms.contains(new Term("name", "jonathan")));
+            assertTrue("Should have variant smith", queryTerms.contains(new Term("name", "smith")));
+            TopDocs topDocs = searcher.Search(flt, 1);
+            ScoreDoc[] sd = topDocs.ScoreDocs;
+            assertTrue("score docs must match 1 doc", (sd != null) && (sd.Length > 0));
+            Document doc = searcher.Doc(sd[0].Doc);
+            assertEquals("Should match most similar when using 2 words", "2", doc.Get("id"));
+        }
+
+        // LUCENE-4809
+        [Test]
+        public void TestNonExistingField()
+        {
+            FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer);
+            flt.AddTerms("jonathin smoth", "name", 0.3f, 1);
+            flt.AddTerms("jonathin smoth", "this field does not exist", 0.3f, 1);
+            // don't fail here just because the field doesn't exits
+            Query q = flt.Rewrite(searcher.IndexReader);
+            HashSet<Term> queryTerms = new HashSet<Term>();
+            q.ExtractTerms(queryTerms);
+            assertTrue("Should have variant jonathan", queryTerms.contains(new Term("name", "jonathan")));
+            assertTrue("Should have variant smith", queryTerms.contains(new Term("name", "smith")));
+            TopDocs topDocs = searcher.Search(flt, 1);
+            ScoreDoc[] sd = topDocs.ScoreDocs;
+            assertTrue("score docs must match 1 doc", (sd != null) && (sd.Length > 0));
+            Document doc = searcher.Doc(sd[0].Doc);
+            assertEquals("Should match most similar when using 2 words", "2", doc.Get("id"));
+        }
+
+
+        //Test bug found when first query word does not match anything
+        [Test]
+        public void TestNoMatchFirstWordBug()
+        {
+            FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer);
+            flt.AddTerms("fernando smith", "name", 0.3f, 1);
+            Query q = flt.Rewrite(searcher.IndexReader);
+            HashSet<Term> queryTerms = new HashSet<Term>();
+            q.ExtractTerms(queryTerms);
+            assertTrue("Should have variant smith", queryTerms.contains(new Term("name", "smith")));
+            TopDocs topDocs = searcher.Search(flt, 1);
+            ScoreDoc[] sd = topDocs.ScoreDocs;
+            assertTrue("score docs must match 1 doc", (sd != null) && (sd.Length > 0));
+            Document doc = searcher.Doc(sd[0].Doc);
+            assertEquals("Should match most similar when using 2 words", "2", doc.Get("id"));
+        }
+
+        [Test]
+        public void TestFuzzyLikeThisQueryEquals()
+        {
+            Analyzer analyzer = new MockAnalyzer(Random());
+            FuzzyLikeThisQuery fltq1 = new FuzzyLikeThisQuery(10, analyzer);
+            fltq1.AddTerms("javi", "subject", 0.5f, 2);
+            FuzzyLikeThisQuery fltq2 = new FuzzyLikeThisQuery(10, analyzer);
+            fltq2.AddTerms("javi", "subject", 0.5f, 2);
+            assertEquals("FuzzyLikeThisQuery with same attributes is not equal", fltq1,
+                fltq2);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3395a8b4/Lucene.Net.Tests.Sandbox/Queries/TestSlowFuzzyQuery.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Tests.Sandbox/Queries/TestSlowFuzzyQuery.cs b/Lucene.Net.Tests.Sandbox/Queries/TestSlowFuzzyQuery.cs
new file mode 100644
index 0000000..4baa0e7
--- /dev/null
+++ b/Lucene.Net.Tests.Sandbox/Queries/TestSlowFuzzyQuery.cs
@@ -0,0 +1,516 @@
+\ufeffusing Lucene.Net.Documents;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Store;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+
+#pragma warning disable 612, 618
+namespace Lucene.Net.Sandbox.Queries
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests <see cref="SlowFuzzyQuery"/>
+    /// </summary>
+    public class TestSlowFuzzyQuery : LuceneTestCase
+    {
+        [Test]
+        public void TestFuzziness()
+        {
+            //every test with SlowFuzzyQuery.defaultMinSimilarity
+            //is exercising the Automaton, not the brute force linear method
+
+            Directory directory = NewDirectory();
+            RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone);
+            addDoc("aaaaa", writer);
+            addDoc("aaaab", writer);
+            addDoc("aaabb", writer);
+            addDoc("aabbb", writer);
+            addDoc("abbbb", writer);
+            addDoc("bbbbb", writer);
+            addDoc("ddddd", writer);
+
+            IndexReader reader = writer.Reader;
+            IndexSearcher searcher = NewSearcher(reader);
+            writer.Dispose();
+
+            SlowFuzzyQuery query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 0);
+            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(3, hits.Length);
+
+            // same with prefix
+            query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 1);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(3, hits.Length);
+            query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 2);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(3, hits.Length);
+            query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 3);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(3, hits.Length);
+            query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 4);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(2, hits.Length);
+            query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 5);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(1, hits.Length);
+            query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 6);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(1, hits.Length);
+
+            // test scoring
+            query = new SlowFuzzyQuery(new Term("field", "bbbbb"), SlowFuzzyQuery.defaultMinSimilarity, 0);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals("3 documents should match", 3, hits.Length);
+            List<String> order = Arrays.AsList("bbbbb", "abbbb", "aabbb");
+            for (int i = 0; i < hits.Length; i++)
+            {
+                string term = searcher.Doc(hits[i].Doc).Get("field");
+                //System.out.println(hits[i].score);
+                assertEquals(order[i], term);
+            }
+
+            // test pq size by supplying maxExpansions=2
+            // This query would normally return 3 documents, because 3 terms match (see above):
+            query = new SlowFuzzyQuery(new Term("field", "bbbbb"), SlowFuzzyQuery.defaultMinSimilarity, 0, 2);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals("only 2 documents should match", 2, hits.Length);
+            order = Arrays.AsList("bbbbb", "abbbb");
+            for (int i = 0; i < hits.Length; i++)
+            {
+                string term = searcher.Doc(hits[i].Doc).Get("field");
+                //System.out.println(hits[i].score);
+                assertEquals(order[i], term);
+            }
+
+            // not similar enough:
+            query = new SlowFuzzyQuery(new Term("field", "xxxxx"), SlowFuzzyQuery.defaultMinSimilarity, 0);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(0, hits.Length);
+            query = new SlowFuzzyQuery(new Term("field", "aaccc"), SlowFuzzyQuery.defaultMinSimilarity, 0);   // edit distance to "aaaaa" = 3
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(0, hits.Length);
+
+            // query identical to a word in the index:
+            query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 0);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(3, hits.Length);
+            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
+            // default allows for up to two edits:
+            assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
+            assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
+
+            // query similar to a word in the index:
+            query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 0);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(3, hits.Length);
+            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
+            assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
+            assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
+
+            // now with prefix
+            query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 1);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(3, hits.Length);
+            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
+            assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
+            assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
+            query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 2);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(3, hits.Length);
+            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
+            assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
+            assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
+            query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 3);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(3, hits.Length);
+            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
+            assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
+            assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
+            query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 4);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(2, hits.Length);
+            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
+            assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
+            query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 5);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(0, hits.Length);
+
+
+            query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 0);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(1, hits.Length);
+            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
+
+            // now with prefix
+            query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 1);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(1, hits.Length);
+            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
+            query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 2);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(1, hits.Length);
+            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
+            query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 3);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(1, hits.Length);
+            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
+            query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 4);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(1, hits.Length);
+            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
+            query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 5);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(0, hits.Length);
+
+
+            // different field = no match:
+            query = new SlowFuzzyQuery(new Term("anotherfield", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 0);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(0, hits.Length);
+
+            reader.Dispose();
+            directory.Dispose();
+        }
+
+        [Test]
+        public void TestFuzzinessLong2()
+        {
+            //Lucene-5033
+            Directory directory = NewDirectory();
+            RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone);
+            addDoc("abcdef", writer);
+            addDoc("segment", writer);
+
+            IndexReader reader = writer.Reader;
+            IndexSearcher searcher = NewSearcher(reader);
+            writer.Dispose();
+
+            SlowFuzzyQuery query;
+
+            query = new SlowFuzzyQuery(new Term("field", "abcxxxx"), 3f, 0);
+            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(0, hits.Length);
+
+            query = new SlowFuzzyQuery(new Term("field", "abcxxxx"), 4f, 0);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(1, hits.Length);
+            reader.Dispose();
+            directory.Dispose();
+        }
+
+        [Test]
+        public void TestFuzzinessLong()
+        {
+            Directory directory = NewDirectory();
+            RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone);
+            addDoc("aaaaaaa", writer);
+            addDoc("segment", writer);
+
+            IndexReader reader = writer.Reader;
+            IndexSearcher searcher = NewSearcher(reader);
+            writer.Dispose();
+
+            SlowFuzzyQuery query;
+            // not similar enough:
+            query = new SlowFuzzyQuery(new Term("field", "xxxxx"), 0.5f, 0);
+            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(0, hits.Length);
+            // edit distance to "aaaaaaa" = 3, this matches because the string is longer than
+            // in testDefaultFuzziness so a bigger difference is allowed:
+            query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 0);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(1, hits.Length);
+            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaaaa"));
+
+            // now with prefix
+            query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 1);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(1, hits.Length);
+            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaaaa"));
+            query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 4);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(1, hits.Length);
+            assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaaaa"));
+            query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 5);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(0, hits.Length);
+
+            // no match, more than half of the characters is wrong:
+            query = new SlowFuzzyQuery(new Term("field", "aaacccc"), 0.5f, 0);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(0, hits.Length);
+
+            // now with prefix
+            query = new SlowFuzzyQuery(new Term("field", "aaacccc"), 0.5f, 2);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(0, hits.Length);
+
+            // "student" and "stellent" are indeed similar to "segment" by default:
+            query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 0);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(1, hits.Length);
+            query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 0);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(1, hits.Length);
+
+            // now with prefix
+            query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 1);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(1, hits.Length);
+            query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 1);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(1, hits.Length);
+            query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 2);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(0, hits.Length);
+            query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 2);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(0, hits.Length);
+
+            // "student" doesn't match anymore thanks to increased minimum similarity:
+            query = new SlowFuzzyQuery(new Term("field", "student"), 0.6f, 0);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(0, hits.Length);
+
+            try
+            {
+                query = new SlowFuzzyQuery(new Term("field", "student"), 1.1f);
+                fail("Expected IllegalArgumentException");
+            }
+#pragma warning disable 168
+            catch (ArgumentException e)
+#pragma warning restore 168
+            {
+                // expecting exception
+            }
+            try
+            {
+                query = new SlowFuzzyQuery(new Term("field", "student"), -0.1f);
+                fail("Expected IllegalArgumentException");
+            }
+#pragma warning disable 168
+            catch (ArgumentException e)
+#pragma warning restore 168
+            {
+                // expecting exception
+            }
+
+            reader.Dispose();
+            directory.Dispose();
+        }
+
+        /** 
+         * MultiTermQuery provides (via attribute) information about which values
+         * must be competitive to enter the priority queue. 
+         * 
+         * SlowFuzzyQuery optimizes itself around this information, if the attribute
+         * is not implemented correctly, there will be problems!
+         */
+        [Test]
+        public void TestTieBreaker()
+        {
+            Directory directory = NewDirectory();
+            RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone);
+            addDoc("a123456", writer);
+            addDoc("c123456", writer);
+            addDoc("d123456", writer);
+            addDoc("e123456", writer);
+
+            Directory directory2 = NewDirectory();
+            RandomIndexWriter writer2 = new RandomIndexWriter(Random(), directory2, Similarity, TimeZone);
+            addDoc("a123456", writer2);
+            addDoc("b123456", writer2);
+            addDoc("b123456", writer2);
+            addDoc("b123456", writer2);
+            addDoc("c123456", writer2);
+            addDoc("f123456", writer2);
+
+            IndexReader ir1 = writer.Reader;
+            IndexReader ir2 = writer2.Reader;
+
+            MultiReader mr = new MultiReader(ir1, ir2);
+            IndexSearcher searcher = NewSearcher(mr);
+            SlowFuzzyQuery fq = new SlowFuzzyQuery(new Term("field", "z123456"), 1f, 0, 2);
+            TopDocs docs = searcher.Search(fq, 2);
+            assertEquals(5, docs.TotalHits); // 5 docs, from the a and b's
+            mr.Dispose();
+            ir1.Dispose();
+            ir2.Dispose();
+            writer.Dispose();
+            writer2.Dispose();
+            directory.Dispose();
+            directory2.Dispose();
+        }
+
+        [Test]
+        public void TestTokenLengthOpt()
+        {
+            Directory directory = NewDirectory();
+            RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone);
+            addDoc("12345678911", writer);
+            addDoc("segment", writer);
+
+            IndexReader reader = writer.Reader;
+            IndexSearcher searcher = NewSearcher(reader);
+            writer.Dispose();
+
+            Query query;
+            // term not over 10 chars, so optimization shortcuts
+            query = new SlowFuzzyQuery(new Term("field", "1234569"), 0.9f);
+            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(0, hits.Length);
+
+            // 10 chars, so no optimization
+            query = new SlowFuzzyQuery(new Term("field", "1234567891"), 0.9f);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(0, hits.Length);
+
+            // over 10 chars, so no optimization
+            query = new SlowFuzzyQuery(new Term("field", "12345678911"), 0.9f);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(1, hits.Length);
+
+            // over 10 chars, no match
+            query = new SlowFuzzyQuery(new Term("field", "sdfsdfsdfsdf"), 0.9f);
+            hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(0, hits.Length);
+
+            reader.Dispose();
+            directory.Dispose();
+        }
+
+        /** Test the TopTermsBoostOnlyBooleanQueryRewrite rewrite method. */
+        [Test]
+        public void TestBoostOnlyRewrite()
+        {
+            Directory directory = NewDirectory();
+            RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone);
+            addDoc("Lucene", writer);
+            addDoc("Lucene", writer);
+            addDoc("Lucenne", writer);
+
+            IndexReader reader = writer.Reader;
+            IndexSearcher searcher = NewSearcher(reader);
+            writer.Dispose();
+
+            SlowFuzzyQuery query = new SlowFuzzyQuery(new Term("field", "lucene"));
+            query.SetRewriteMethod(new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(50));
+            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
+            assertEquals(3, hits.Length);
+            // normally, 'Lucenne' would be the first result as IDF will skew the score.
+            assertEquals("Lucene", reader.Document(hits[0].Doc).Get("field"));
+            assertEquals("Lucene", reader.Document(hits[1].Doc).Get("field"));
+            assertEquals("Lucenne", reader.Document(hits[2].Doc).Get("field"));
+            reader.Dispose();
+            directory.Dispose();
+        }
+
+        [Test]
+        public void TestGiga()
+        {
+
+            Directory index = NewDirectory();
+            RandomIndexWriter w = new RandomIndexWriter(Random(), index, Similarity, TimeZone);
+
+            addDoc("Lucene in Action", w);
+            addDoc("Lucene for Dummies", w);
+
+            //addDoc("Giga", w);
+            addDoc("Giga byte", w);
+
+            addDoc("ManagingGigabytesManagingGigabyte", w);
+            addDoc("ManagingGigabytesManagingGigabytes", w);
+
+            addDoc("The Art of Computer Science", w);
+            addDoc("J. K. Rowling", w);
+            addDoc("JK Rowling", w);
+            addDoc("Joanne K Roling", w);
+            addDoc("Bruce Willis", w);
+            addDoc("Willis bruce", w);
+            addDoc("Brute willis", w);
+            addDoc("B. willis", w);
+            IndexReader r = w.Reader;
+            w.Dispose();
+
+            Query q = new SlowFuzzyQuery(new Term("field", "giga"), 0.9f);
+
+            // 3. search
+            IndexSearcher searcher = NewSearcher(r);
+            ScoreDoc[] hits = searcher.Search(q, 10).ScoreDocs;
+            assertEquals(1, hits.Length);
+            assertEquals("Giga byte", searcher.Doc(hits[0].Doc).Get("field"));
+            r.Dispose();
+            index.Dispose();
+        }
+
+        [Test]
+        public void TestDistanceAsEditsSearching()
+        {
+            Directory index = NewDirectory();
+            RandomIndexWriter w = new RandomIndexWriter(Random(), index, Similarity, TimeZone);
+            addDoc("foobar", w);
+            addDoc("test", w);
+            addDoc("working", w);
+            IndexReader reader = w.Reader;
+            IndexSearcher searcher = NewSearcher(reader);
+            w.Dispose();
+
+            SlowFuzzyQuery q = new SlowFuzzyQuery(new Term("field", "fouba"), 2);
+            ScoreDoc[] hits = searcher.Search(q, 10).ScoreDocs;
+            assertEquals(1, hits.Length);
+            assertEquals("foobar", searcher.Doc(hits[0].Doc).Get("field"));
+
+            q = new SlowFuzzyQuery(new Term("field", "foubara"), 2);
+            hits = searcher.Search(q, 10).ScoreDocs;
+            assertEquals(1, hits.Length);
+            assertEquals("foobar", searcher.Doc(hits[0].Doc).Get("field"));
+
+            q = new SlowFuzzyQuery(new Term("field", "t"), 3);
+            hits = searcher.Search(q, 10).ScoreDocs;
+            assertEquals(1, hits.Length);
+            assertEquals("test", searcher.Doc(hits[0].Doc).Get("field"));
+
+            q = new SlowFuzzyQuery(new Term("field", "a"), 4f, 0, 50);
+            hits = searcher.Search(q, 10).ScoreDocs;
+            assertEquals(1, hits.Length);
+            assertEquals("test", searcher.Doc(hits[0].Doc).Get("field"));
+
+            q = new SlowFuzzyQuery(new Term("field", "a"), 6f, 0, 50);
+            hits = searcher.Search(q, 10).ScoreDocs;
+            assertEquals(2, hits.Length);
+            assertEquals("test", searcher.Doc(hits[0].Doc).Get("field"));
+            assertEquals("foobar", searcher.Doc(hits[1].Doc).Get("field"));
+
+            reader.Dispose();
+            index.Dispose();
+        }
+
+        private void addDoc(string text, RandomIndexWriter writer)
+        {
+            Document doc = new Document();
+            doc.Add(NewTextField("field", text, Field.Store.YES));
+            writer.AddDocument(doc);
+        }
+    }
+}
+#pragma warning restore 612, 618
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3395a8b4/Lucene.Net.Tests.Sandbox/Queries/TestSlowFuzzyQuery2.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Tests.Sandbox/Queries/TestSlowFuzzyQuery2.cs b/Lucene.Net.Tests.Sandbox/Queries/TestSlowFuzzyQuery2.cs
new file mode 100644
index 0000000..6a2988e
--- /dev/null
+++ b/Lucene.Net.Tests.Sandbox/Queries/TestSlowFuzzyQuery2.cs
@@ -0,0 +1,194 @@
+\ufeffusing Lucene.Net.Analysis;
+using Lucene.Net.Documents;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Search.Similarities;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+using System.Globalization;
+using System.IO;
+using System.Text;
+
+namespace Lucene.Net.Sandbox.Queries
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /** 
+     * Tests the results of fuzzy against pre-recorded output 
+     * The format of the file is the following:
+     * 
+     * Header Row: # of bits: generate 2^n sequential documents 
+     * with a value of Integer.toBinaryString
+     * 
+     * Entries: an entry is a param spec line, a resultCount line, and
+     * then 'resultCount' results lines. The results lines are in the
+     * expected order.
+     * 
+     * param spec line: a comma-separated list of params to FuzzyQuery
+     *   (query, prefixLen, pqSize, minScore)
+     * query = query text as a number (expand with Integer.toBinaryString)
+     * prefixLen = prefix length
+     * pqSize = priority queue maximum size for TopTermsBoostOnlyBooleanQueryRewrite
+     * minScore = minimum similarity
+     * 
+     * resultCount line: total number of expected hits.
+     * 
+     * results line: comma-separated docID, score pair
+     **/
+    public class TestSlowFuzzyQuery2 : LuceneTestCase
+    {
+        /** epsilon for score comparisons */
+        static readonly float epsilon = 0.00001f;
+
+        static int[][] mappings = new int[][] {
+            new int[] { 0x40, 0x41 },
+            new int[] { 0x40, 0x0195 },
+            new int[] { 0x40, 0x0906 },
+            new int[] { 0x40, 0x1040F },
+            new int[] { 0x0194, 0x0195 },
+            new int[] { 0x0194, 0x0906 },
+            new int[] { 0x0194, 0x1040F },
+            new int[] { 0x0905, 0x0906 },
+            new int[] { 0x0905, 0x1040F },
+            new int[] { 0x1040E, 0x1040F }
+          };
+
+        [Test]
+        public void TestFromTestData()
+        {
+            // TODO: randomize!
+            assertFromTestData(mappings[Random().nextInt(mappings.Length)]);
+        }
+
+        public void assertFromTestData(int[] codePointTable)
+        {
+            if (VERBOSE)
+            {
+                Console.WriteLine("TEST: codePointTable=" + codePointTable);
+            }
+            //Stream stream = GetType().getResourceAsStream("fuzzyTestData.txt");
+            Stream stream = GetType().Assembly.GetManifestResourceStream("Lucene.Net.Sandbox.Queries.fuzzyTestData.txt");
+            TextReader reader = new StreamReader(stream, Encoding.UTF8);
+
+            int bits = int.Parse(reader.ReadLine(), CultureInfo.InvariantCulture);
+            int terms = (int)Math.Pow(2, bits);
+
+            Store.Directory dir = NewDirectory();
+            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetMergePolicy(NewLogMergePolicy()));
+
+            Document doc = new Document();
+            Field field = NewTextField("field", "", Field.Store.NO);
+            doc.Add(field);
+
+            for (int i = 0; i < terms; i++)
+            {
+                field.StringValue = (MapInt(codePointTable, i));
+                writer.AddDocument(doc);
+            }
+
+            IndexReader r = writer.Reader;
+            IndexSearcher searcher = NewSearcher(r);
+            if (VERBOSE)
+            {
+                Console.WriteLine("TEST: searcher=" + searcher);
+            }
+            // even though this uses a boost-only rewrite, this test relies upon queryNorm being the default implementation,
+            // otherwise scores are different!
+            searcher.Similarity = (new DefaultSimilarity());
+
+            writer.Dispose();
+            String line;
+            int lineNum = 0;
+            while ((line = reader.ReadLine()) != null)
+            {
+                lineNum++;
+                String[] @params = line.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
+                String query = MapInt(codePointTable, int.Parse(@params[0], CultureInfo.InvariantCulture));
+                int prefix = int.Parse(@params[1], CultureInfo.InvariantCulture);
+                int pqSize = int.Parse(@params[2], CultureInfo.InvariantCulture);
+                float minScore = float.Parse(@params[3], CultureInfo.InvariantCulture);
+#pragma warning disable 612, 618
+                SlowFuzzyQuery q = new SlowFuzzyQuery(new Term("field", query), minScore, prefix);
+#pragma warning restore 612, 618
+                q.SetRewriteMethod(new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(pqSize));
+                int expectedResults = int.Parse(reader.ReadLine(), CultureInfo.InvariantCulture);
+                TopDocs docs = searcher.Search(q, expectedResults);
+                assertEquals(expectedResults, docs.TotalHits);
+                for (int i = 0; i < expectedResults; i++)
+                {
+                    String[] scoreDoc = reader.ReadLine().Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
+                    assertEquals(int.Parse(scoreDoc[0], CultureInfo.InvariantCulture), docs.ScoreDocs[i].Doc);
+                    assertEquals(float.Parse(scoreDoc[1], CultureInfo.InvariantCulture), docs.ScoreDocs[i].Score, epsilon);
+                }
+            }
+            r.Dispose();
+            dir.Dispose();
+        }
+
+        /* map bits to unicode codepoints */
+        private static String MapInt(int[] codePointTable, int i)
+        {
+            StringBuilder sb = new StringBuilder();
+            String binary = Number.ToBinaryString(i);
+            for (int j = 0; j < binary.Length; j++)
+                sb.AppendCodePoint(codePointTable[binary[j] - '0']);
+            return sb.toString();
+        }
+
+        /* Code to generate test data
+        public static void main(String args[]) throws Exception {
+          int bits = 3;
+          System.out.println(bits);
+          int terms = (int) Math.pow(2, bits);
+
+          RAMDirectory dir = new RAMDirectory();
+          IndexWriter writer = new IndexWriter(dir, new KeywordAnalyzer(),
+              IndexWriter.MaxFieldLength.UNLIMITED);
+
+          Document doc = new Document();
+          Field field = newField("field", "", Field.Store.NO, Field.Index.ANALYZED);
+          doc.add(field);
+
+          for (int i = 0; i < terms; i++) {
+            field.setValue(Integer.toBinaryString(i));
+            writer.addDocument(doc);
+          }
+
+          writer.forceMerge(1);
+          writer.close();
+
+          IndexSearcher searcher = new IndexSearcher(dir);
+          for (int prefix = 0; prefix < bits; prefix++)
+            for (int pqsize = 1; pqsize <= terms; pqsize++)
+              for (float minscore = 0.1F; minscore < 1F; minscore += 0.2F)
+                for (int query = 0; query < terms; query++) {
+                  FuzzyQuery q = new FuzzyQuery(
+                      new Term("field", Integer.toBinaryString(query)), minscore, prefix);
+                  q.setRewriteMethod(new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(pqsize));
+                  System.out.println(query + "," + prefix + "," + pqsize + "," + minscore);
+                  TopDocs docs = searcher.search(q, terms);
+                  System.out.println(docs.totalHits);
+                  for (int i = 0; i < docs.totalHits; i++)
+                    System.out.println(docs.scoreDocs[i].doc + "," + docs.scoreDocs[i].score);
+                }
+        }
+        */
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3395a8b4/Lucene.Net.Tests.Sandbox/Queries/TestSortedSetSortField.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Tests.Sandbox/Queries/TestSortedSetSortField.cs b/Lucene.Net.Tests.Sandbox/Queries/TestSortedSetSortField.cs
new file mode 100644
index 0000000..4a14356
--- /dev/null
+++ b/Lucene.Net.Tests.Sandbox/Queries/TestSortedSetSortField.cs
@@ -0,0 +1,235 @@
+\ufeffusing Lucene.Net.Documents;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Store;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Sandbox.Queries
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>Simple tests for SortedSetSortField</summary>
+    public class TestSortedSetSortField : LuceneTestCase
+    {
+        [Test]
+        public void TestForward()
+        {
+            Directory dir = NewDirectory();
+            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
+            Document doc = new Document();
+            doc.Add(NewStringField("value", "baz", Field.Store.NO));
+            doc.Add(NewStringField("id", "2", Field.Store.YES));
+            writer.AddDocument(doc);
+            doc = new Document();
+            doc.Add(NewStringField("value", "foo", Field.Store.NO));
+            doc.Add(NewStringField("value", "bar", Field.Store.NO));
+            doc.Add(NewStringField("id", "1", Field.Store.YES));
+            writer.AddDocument(doc);
+            IndexReader ir = writer.Reader;
+            writer.Dispose();
+
+            IndexSearcher searcher = NewSearcher(ir);
+            Sort sort = new Sort(new SortedSetSortField("value", false));
+
+            TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
+            assertEquals(2, td.TotalHits);
+            // 'bar' comes before 'baz'
+            assertEquals("1", searcher.Doc(td.ScoreDocs[0].Doc).Get("id"));
+            assertEquals("2", searcher.Doc(td.ScoreDocs[1].Doc).Get("id"));
+
+            ir.Dispose();
+            dir.Dispose();
+        }
+
+        [Test]
+        public void TestReverse()
+        {
+            Directory dir = NewDirectory();
+            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
+            Document doc = new Document();
+            doc.Add(NewStringField("value", "foo", Field.Store.NO));
+            doc.Add(NewStringField("value", "bar", Field.Store.NO));
+            doc.Add(NewStringField("id", "1", Field.Store.YES));
+            writer.AddDocument(doc);
+            doc = new Document();
+            doc.Add(NewStringField("value", "baz", Field.Store.NO));
+            doc.Add(NewStringField("id", "2", Field.Store.YES));
+            writer.AddDocument(doc);
+
+            IndexReader ir = writer.Reader;
+            writer.Dispose();
+
+            IndexSearcher searcher = NewSearcher(ir);
+            Sort sort = new Sort(new SortedSetSortField("value", true));
+
+            TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
+            assertEquals(2, td.TotalHits);
+            // 'bar' comes before 'baz'
+            assertEquals("2", searcher.Doc(td.ScoreDocs[0].Doc).Get("id"));
+            assertEquals("1", searcher.Doc(td.ScoreDocs[1].Doc).Get("id"));
+
+            ir.Dispose();
+            dir.Dispose();
+        }
+
+        [Test]
+        public void TestMissingFirst()
+        {
+            Directory dir = NewDirectory();
+            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
+            Document doc = new Document();
+            doc.Add(NewStringField("value", "baz", Field.Store.NO));
+            doc.Add(NewStringField("id", "2", Field.Store.YES));
+            writer.AddDocument(doc);
+            doc = new Document();
+            doc.Add(NewStringField("value", "foo", Field.Store.NO));
+            doc.Add(NewStringField("value", "bar", Field.Store.NO));
+            doc.Add(NewStringField("id", "1", Field.Store.YES));
+            writer.AddDocument(doc);
+            doc = new Document();
+            doc.Add(NewStringField("id", "3", Field.Store.YES));
+            writer.AddDocument(doc);
+            IndexReader ir = writer.Reader;
+            writer.Dispose();
+
+            IndexSearcher searcher = NewSearcher(ir);
+            SortField sortField = new SortedSetSortField("value", false);
+            sortField.MissingValue = (SortField.STRING_FIRST);
+            Sort sort = new Sort(sortField);
+
+            TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
+            assertEquals(3, td.TotalHits);
+            // 'bar' comes before 'baz'
+            // null comes first
+            assertEquals("3", searcher.Doc(td.ScoreDocs[0].Doc).Get("id"));
+            assertEquals("1", searcher.Doc(td.ScoreDocs[1].Doc).Get("id"));
+            assertEquals("2", searcher.Doc(td.ScoreDocs[2].Doc).Get("id"));
+
+            ir.Dispose();
+            dir.Dispose();
+        }
+
+        [Test]
+        public void TestMissingLast()
+        {
+            Directory dir = NewDirectory();
+            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
+            Document doc = new Document();
+            doc.Add(NewStringField("value", "baz", Field.Store.NO));
+            doc.Add(NewStringField("id", "2", Field.Store.YES));
+            writer.AddDocument(doc);
+            doc = new Document();
+            doc.Add(NewStringField("value", "foo", Field.Store.NO));
+            doc.Add(NewStringField("value", "bar", Field.Store.NO));
+            doc.Add(NewStringField("id", "1", Field.Store.YES));
+            writer.AddDocument(doc);
+            doc = new Document();
+            doc.Add(NewStringField("id", "3", Field.Store.YES));
+            writer.AddDocument(doc);
+            IndexReader ir = writer.Reader;
+            writer.Dispose();
+
+            IndexSearcher searcher = NewSearcher(ir);
+            SortField sortField = new SortedSetSortField("value", false);
+            sortField.MissingValue = (SortField.STRING_LAST);
+            Sort sort = new Sort(sortField);
+
+            TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
+            assertEquals(3, td.TotalHits);
+            // 'bar' comes before 'baz'
+            assertEquals("1", searcher.Doc(td.ScoreDocs[0].Doc).Get("id"));
+            assertEquals("2", searcher.Doc(td.ScoreDocs[1].Doc).Get("id"));
+            // null comes last
+            assertEquals("3", searcher.Doc(td.ScoreDocs[2].Doc).Get("id"));
+
+            ir.Dispose();
+            dir.Dispose();
+        }
+
+        [Test]
+        public void TestSingleton()
+        {
+            Directory dir = NewDirectory();
+            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
+            Document doc = new Document();
+            doc.Add(NewStringField("value", "baz", Field.Store.NO));
+            doc.Add(NewStringField("id", "2", Field.Store.YES));
+            writer.AddDocument(doc);
+            doc = new Document();
+            doc.Add(NewStringField("value", "bar", Field.Store.NO));
+            doc.Add(NewStringField("id", "1", Field.Store.YES));
+            writer.AddDocument(doc);
+            IndexReader ir = writer.Reader;
+            writer.Dispose();
+
+            IndexSearcher searcher = NewSearcher(ir);
+            Sort sort = new Sort(new SortedSetSortField("value", false));
+
+            TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
+            assertEquals(2, td.TotalHits);
+            // 'bar' comes before 'baz'
+            assertEquals("1", searcher.Doc(td.ScoreDocs[0].Doc).Get("id"));
+            assertEquals("2", searcher.Doc(td.ScoreDocs[1].Doc).Get("id"));
+
+            ir.Dispose();
+            dir.Dispose();
+        }
+
+        [Test]
+        public void TestEmptyIndex()
+        {
+            IndexSearcher empty = NewSearcher(new MultiReader());
+            Query query = new TermQuery(new Term("contents", "foo"));
+
+            Sort sort = new Sort();
+            sort.SetSort(new SortedSetSortField("sortedset", false));
+            TopDocs td = empty.Search(query, null, 10, sort, true, true);
+            assertEquals(0, td.TotalHits);
+
+            // for an empty index, any selector should work
+            foreach (Selector v in Enum.GetValues(typeof(Selector)))
+            {
+                sort.SetSort(new SortedSetSortField("sortedset", false, v));
+                td = empty.Search(query, null, 10, sort, true, true);
+                assertEquals(0, td.TotalHits);
+            }
+        }
+
+        [Test]
+        public void TestEquals()
+        {
+            SortField sf = new SortedSetSortField("a", false);
+            assertFalse(sf.equals(null));
+
+
+            assertEquals(sf, sf);
+
+            SortField sf2 = new SortedSetSortField("a", false);
+            assertEquals(sf, sf2);
+            assertEquals(sf.GetHashCode(), sf2.GetHashCode());
+
+
+            assertFalse(sf.equals(new SortedSetSortField("a", true)));
+            assertFalse(sf.equals(new SortedSetSortField("b", false)));
+            assertFalse(sf.equals(new SortedSetSortField("a", false, Selector.MAX)));
+            assertFalse(sf.equals("foo"));
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3395a8b4/Lucene.Net.Tests.Sandbox/Queries/TestSortedSetSortFieldDocValues.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Tests.Sandbox/Queries/TestSortedSetSortFieldDocValues.cs b/Lucene.Net.Tests.Sandbox/Queries/TestSortedSetSortFieldDocValues.cs
new file mode 100644
index 0000000..342f679
--- /dev/null
+++ b/Lucene.Net.Tests.Sandbox/Queries/TestSortedSetSortFieldDocValues.cs
@@ -0,0 +1,213 @@
+\ufeffusing Lucene.Net.Documents;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Store;
+using Lucene.Net.Util;
+using NUnit.Framework;
+
+namespace Lucene.Net.Sandbox.Queries
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>Simple tests for SortedSetSortField, indexing the sortedset up front</summary>
+    [SuppressCodecs("Lucene40", "Lucene41", "Appending", "Lucene3x")]// avoid codecs that don't support sortedset
+    public class TestSortedSetSortFieldDocValues : LuceneTestCase
+    {
+        public override void SetUp()
+        {
+            base.SetUp();
+            // ensure there is nothing in fieldcache before test starts
+            FieldCache.DEFAULT.PurgeAllCaches();
+        }
+
+        private void assertNoFieldCaches()
+        {
+            // docvalues sorting should NOT create any fieldcache entries!
+            assertEquals(0, FieldCache.DEFAULT.CacheEntries.Length);
+        }
+
+        [Test]
+        public void TestForward()
+        {
+            Directory dir = NewDirectory();
+            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
+            Document doc = new Document();
+            doc.Add(new SortedSetDocValuesField("value", new BytesRef("baz")));
+            doc.Add(NewStringField("id", "2", Field.Store.YES));
+            writer.AddDocument(doc);
+            doc = new Document();
+            doc.Add(new SortedSetDocValuesField("value", new BytesRef("foo")));
+            doc.Add(new SortedSetDocValuesField("value", new BytesRef("bar")));
+            doc.Add(NewStringField("id", "1", Field.Store.YES));
+            writer.AddDocument(doc);
+            IndexReader ir = writer.Reader;
+            writer.Dispose();
+
+            IndexSearcher searcher = NewSearcher(ir);
+            Sort sort = new Sort(new SortedSetSortField("value", false));
+
+            TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
+            assertEquals(2, td.TotalHits);
+            // 'bar' comes before 'baz'
+            assertEquals("1", searcher.Doc(td.ScoreDocs[0].Doc).Get("id"));
+            assertEquals("2", searcher.Doc(td.ScoreDocs[1].Doc).Get("id"));
+            assertNoFieldCaches();
+
+            ir.Dispose();
+            dir.Dispose();
+        }
+
+        [Test]
+        public void TestReverse()
+        {
+            Directory dir = NewDirectory();
+            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
+            Document doc = new Document();
+            doc.Add(new SortedSetDocValuesField("value", new BytesRef("foo")));
+            doc.Add(new SortedSetDocValuesField("value", new BytesRef("bar")));
+            doc.Add(NewStringField("id", "1", Field.Store.YES));
+            writer.AddDocument(doc);
+            doc = new Document();
+            doc.Add(new SortedSetDocValuesField("value", new BytesRef("baz")));
+            doc.Add(NewStringField("id", "2", Field.Store.YES));
+            writer.AddDocument(doc);
+
+            IndexReader ir = writer.Reader;
+            writer.Dispose();
+
+            IndexSearcher searcher = NewSearcher(ir);
+            Sort sort = new Sort(new SortedSetSortField("value", true));
+
+            TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
+            assertEquals(2, td.TotalHits);
+            // 'bar' comes before 'baz'
+            assertEquals("2", searcher.Doc(td.ScoreDocs[0].Doc).Get("id"));
+            assertEquals("1", searcher.Doc(td.ScoreDocs[1].Doc).Get("id"));
+            assertNoFieldCaches();
+
+            ir.Dispose();
+            dir.Dispose();
+        }
+
+        [Test]
+        public void TestMissingFirst()
+        {
+            Directory dir = NewDirectory();
+            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
+            Document doc = new Document();
+            doc.Add(new SortedSetDocValuesField("value", new BytesRef("baz")));
+            doc.Add(NewStringField("id", "2", Field.Store.YES));
+            writer.AddDocument(doc);
+            doc = new Document();
+            doc.Add(new SortedSetDocValuesField("value", new BytesRef("foo")));
+            doc.Add(new SortedSetDocValuesField("value", new BytesRef("bar")));
+            doc.Add(NewStringField("id", "1", Field.Store.YES));
+            writer.AddDocument(doc);
+            doc = new Document();
+            doc.Add(NewStringField("id", "3", Field.Store.YES));
+            writer.AddDocument(doc);
+            IndexReader ir = writer.Reader;
+            writer.Dispose();
+
+            IndexSearcher searcher = NewSearcher(ir);
+            SortField sortField = new SortedSetSortField("value", false);
+            sortField.MissingValue = (SortField.STRING_FIRST);
+            Sort sort = new Sort(sortField);
+
+            TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
+            assertEquals(3, td.TotalHits);
+            // 'bar' comes before 'baz'
+            // null comes first
+            assertEquals("3", searcher.Doc(td.ScoreDocs[0].Doc).Get("id"));
+            assertEquals("1", searcher.Doc(td.ScoreDocs[1].Doc).Get("id"));
+            assertEquals("2", searcher.Doc(td.ScoreDocs[2].Doc).Get("id"));
+            assertNoFieldCaches();
+
+            ir.Dispose();
+            dir.Dispose();
+        }
+
+        [Test]
+        public void TestMissingLast()
+        {
+            Directory dir = NewDirectory();
+            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
+            Document doc = new Document();
+            doc.Add(new SortedSetDocValuesField("value", new BytesRef("baz")));
+            doc.Add(NewStringField("id", "2", Field.Store.YES));
+            writer.AddDocument(doc);
+            doc = new Document();
+            doc.Add(new SortedSetDocValuesField("value", new BytesRef("foo")));
+            doc.Add(new SortedSetDocValuesField("value", new BytesRef("bar")));
+            doc.Add(NewStringField("id", "1", Field.Store.YES));
+            writer.AddDocument(doc);
+            doc = new Document();
+            doc.Add(NewStringField("id", "3", Field.Store.YES));
+            writer.AddDocument(doc);
+            IndexReader ir = writer.Reader;
+            writer.Dispose();
+
+            IndexSearcher searcher = NewSearcher(ir);
+            SortField sortField = new SortedSetSortField("value", false);
+            sortField.MissingValue = (SortField.STRING_LAST);
+            Sort sort = new Sort(sortField);
+
+            TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
+            assertEquals(3, td.TotalHits);
+            // 'bar' comes before 'baz'
+            assertEquals("1", searcher.Doc(td.ScoreDocs[0].Doc).Get("id"));
+            assertEquals("2", searcher.Doc(td.ScoreDocs[1].Doc).Get("id"));
+            // null comes last
+            assertEquals("3", searcher.Doc(td.ScoreDocs[2].Doc).Get("id"));
+            assertNoFieldCaches();
+
+            ir.Dispose();
+            dir.Dispose();
+        }
+
+        [Test]
+        public void TestSingleton()
+        {
+            Directory dir = NewDirectory();
+            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
+            Document doc = new Document();
+            doc.Add(new SortedSetDocValuesField("value", new BytesRef("baz")));
+            doc.Add(NewStringField("id", "2", Field.Store.YES));
+            writer.AddDocument(doc);
+            doc = new Document();
+            doc.Add(new SortedSetDocValuesField("value", new BytesRef("bar")));
+            doc.Add(NewStringField("id", "1", Field.Store.YES));
+            writer.AddDocument(doc);
+            IndexReader ir = writer.Reader;
+            writer.Dispose();
+
+            IndexSearcher searcher = NewSearcher(ir);
+            Sort sort = new Sort(new SortedSetSortField("value", false));
+
+            TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
+            assertEquals(2, td.TotalHits);
+            // 'bar' comes before 'baz'
+            assertEquals("1", searcher.Doc(td.ScoreDocs[0].Doc).Get("id"));
+            assertEquals("2", searcher.Doc(td.ScoreDocs[1].Doc).Get("id"));
+            assertNoFieldCaches();
+
+            ir.Dispose();
+            dir.Dispose();
+        }
+    }
+}