You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2016/12/07 13:48:20 UTC
[02/11] lucenenet git commit: Ported enough of Sandbox to support
QueryParser.Xml
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3395a8b4/Lucene.Net.Tests.Sandbox/Queries/FuzzyLikeThisQueryTest.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Tests.Sandbox/Queries/FuzzyLikeThisQueryTest.cs b/Lucene.Net.Tests.Sandbox/Queries/FuzzyLikeThisQueryTest.cs
new file mode 100644
index 0000000..4b830c6
--- /dev/null
+++ b/Lucene.Net.Tests.Sandbox/Queries/FuzzyLikeThisQueryTest.cs
@@ -0,0 +1,159 @@
+\ufeffusing Lucene.Net.Analysis;
+using Lucene.Net.Documents;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Store;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Sandbox.Queries
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ public class FuzzyLikeThisQueryTest : LuceneTestCase
+ {
+ private Directory directory;
+ private IndexSearcher searcher;
+ private IndexReader reader;
+ private Analyzer analyzer;
+
+ public override void SetUp()
+ {
+ base.SetUp();
+
+ analyzer = new MockAnalyzer(Random());
+ directory = NewDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));
+
+ //Add series of docs with misspelt names
+ AddDoc(writer, "jonathon smythe", "1");
+ AddDoc(writer, "jonathan smith", "2");
+ AddDoc(writer, "johnathon smyth", "3");
+ AddDoc(writer, "johnny smith", "4");
+ AddDoc(writer, "jonny smith", "5");
+ AddDoc(writer, "johnathon smythe", "6");
+ reader = writer.Reader;
+ writer.Dispose();
+ searcher = NewSearcher(reader);
+ }
+
+ public override void TearDown()
+ {
+ reader.Dispose();
+ directory.Dispose();
+ base.TearDown();
+ }
+
+ private void AddDoc(RandomIndexWriter writer, string name, string id)
+ {
+ Document doc = new Document();
+ doc.Add(NewTextField("name", name, Field.Store.YES));
+ doc.Add(NewTextField("id", id, Field.Store.YES));
+ writer.AddDocument(doc);
+ }
+
+
+ //Tests that idf ranking is not favouring rare mis-spellings over a strong edit-distance match
+ [Test]
+ public void TestClosestEditDistanceMatchComesFirst()
+ {
+ FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer);
+ flt.AddTerms("smith", "name", 0.3f, 1);
+ Query q = flt.Rewrite(searcher.IndexReader);
+ HashSet<Term> queryTerms = new HashSet<Term>();
+ q.ExtractTerms(queryTerms);
+ assertTrue("Should have variant smythe", queryTerms.contains(new Term("name", "smythe")));
+ assertTrue("Should have variant smith", queryTerms.contains(new Term("name", "smith")));
+ assertTrue("Should have variant smyth", queryTerms.contains(new Term("name", "smyth")));
+ TopDocs topDocs = searcher.Search(flt, 1);
+ ScoreDoc[] sd = topDocs.ScoreDocs;
+ assertTrue("score docs must match 1 doc", (sd != null) && (sd.Length > 0));
+ Document doc = searcher.Doc(sd[0].Doc);
+ assertEquals("Should match most similar not most rare variant", "2", doc.Get("id"));
+ }
+
+ //Test multiple input words are having variants produced
+ [Test]
+ public void TestMultiWord()
+ {
+ FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer);
+ flt.AddTerms("jonathin smoth", "name", 0.3f, 1);
+ Query q = flt.Rewrite(searcher.IndexReader);
+ HashSet<Term> queryTerms = new HashSet<Term>();
+ q.ExtractTerms(queryTerms);
+ assertTrue("Should have variant jonathan", queryTerms.contains(new Term("name", "jonathan")));
+ assertTrue("Should have variant smith", queryTerms.contains(new Term("name", "smith")));
+ TopDocs topDocs = searcher.Search(flt, 1);
+ ScoreDoc[] sd = topDocs.ScoreDocs;
+ assertTrue("score docs must match 1 doc", (sd != null) && (sd.Length > 0));
+ Document doc = searcher.Doc(sd[0].Doc);
+ assertEquals("Should match most similar when using 2 words", "2", doc.Get("id"));
+ }
+
+ // LUCENE-4809
+ [Test]
+ public void TestNonExistingField()
+ {
+ FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer);
+ flt.AddTerms("jonathin smoth", "name", 0.3f, 1);
+ flt.AddTerms("jonathin smoth", "this field does not exist", 0.3f, 1);
+ // don't fail here just because the field doesn't exits
+ Query q = flt.Rewrite(searcher.IndexReader);
+ HashSet<Term> queryTerms = new HashSet<Term>();
+ q.ExtractTerms(queryTerms);
+ assertTrue("Should have variant jonathan", queryTerms.contains(new Term("name", "jonathan")));
+ assertTrue("Should have variant smith", queryTerms.contains(new Term("name", "smith")));
+ TopDocs topDocs = searcher.Search(flt, 1);
+ ScoreDoc[] sd = topDocs.ScoreDocs;
+ assertTrue("score docs must match 1 doc", (sd != null) && (sd.Length > 0));
+ Document doc = searcher.Doc(sd[0].Doc);
+ assertEquals("Should match most similar when using 2 words", "2", doc.Get("id"));
+ }
+
+
+ //Test bug found when first query word does not match anything
+ [Test]
+ public void TestNoMatchFirstWordBug()
+ {
+ FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer);
+ flt.AddTerms("fernando smith", "name", 0.3f, 1);
+ Query q = flt.Rewrite(searcher.IndexReader);
+ HashSet<Term> queryTerms = new HashSet<Term>();
+ q.ExtractTerms(queryTerms);
+ assertTrue("Should have variant smith", queryTerms.contains(new Term("name", "smith")));
+ TopDocs topDocs = searcher.Search(flt, 1);
+ ScoreDoc[] sd = topDocs.ScoreDocs;
+ assertTrue("score docs must match 1 doc", (sd != null) && (sd.Length > 0));
+ Document doc = searcher.Doc(sd[0].Doc);
+ assertEquals("Should match most similar when using 2 words", "2", doc.Get("id"));
+ }
+
+ [Test]
+ public void TestFuzzyLikeThisQueryEquals()
+ {
+ Analyzer analyzer = new MockAnalyzer(Random());
+ FuzzyLikeThisQuery fltq1 = new FuzzyLikeThisQuery(10, analyzer);
+ fltq1.AddTerms("javi", "subject", 0.5f, 2);
+ FuzzyLikeThisQuery fltq2 = new FuzzyLikeThisQuery(10, analyzer);
+ fltq2.AddTerms("javi", "subject", 0.5f, 2);
+ assertEquals("FuzzyLikeThisQuery with same attributes is not equal", fltq1,
+ fltq2);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3395a8b4/Lucene.Net.Tests.Sandbox/Queries/TestSlowFuzzyQuery.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Tests.Sandbox/Queries/TestSlowFuzzyQuery.cs b/Lucene.Net.Tests.Sandbox/Queries/TestSlowFuzzyQuery.cs
new file mode 100644
index 0000000..4baa0e7
--- /dev/null
+++ b/Lucene.Net.Tests.Sandbox/Queries/TestSlowFuzzyQuery.cs
@@ -0,0 +1,516 @@
+\ufeffusing Lucene.Net.Documents;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Store;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+
+#pragma warning disable 612, 618
+namespace Lucene.Net.Sandbox.Queries
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Tests <see cref="SlowFuzzyQuery"/>
+ /// </summary>
+ public class TestSlowFuzzyQuery : LuceneTestCase
+ {
+ [Test]
+ public void TestFuzziness()
+ {
+ //every test with SlowFuzzyQuery.defaultMinSimilarity
+ //is exercising the Automaton, not the brute force linear method
+
+ Directory directory = NewDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone);
+ addDoc("aaaaa", writer);
+ addDoc("aaaab", writer);
+ addDoc("aaabb", writer);
+ addDoc("aabbb", writer);
+ addDoc("abbbb", writer);
+ addDoc("bbbbb", writer);
+ addDoc("ddddd", writer);
+
+ IndexReader reader = writer.Reader;
+ IndexSearcher searcher = NewSearcher(reader);
+ writer.Dispose();
+
+ SlowFuzzyQuery query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 0);
+ ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(3, hits.Length);
+
+ // same with prefix
+ query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 1);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(3, hits.Length);
+ query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 2);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(3, hits.Length);
+ query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 3);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(3, hits.Length);
+ query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 4);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(2, hits.Length);
+ query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 5);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(1, hits.Length);
+ query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 6);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(1, hits.Length);
+
+ // test scoring
+ query = new SlowFuzzyQuery(new Term("field", "bbbbb"), SlowFuzzyQuery.defaultMinSimilarity, 0);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals("3 documents should match", 3, hits.Length);
+ List<String> order = Arrays.AsList("bbbbb", "abbbb", "aabbb");
+ for (int i = 0; i < hits.Length; i++)
+ {
+ string term = searcher.Doc(hits[i].Doc).Get("field");
+ //System.out.println(hits[i].score);
+ assertEquals(order[i], term);
+ }
+
+ // test pq size by supplying maxExpansions=2
+ // This query would normally return 3 documents, because 3 terms match (see above):
+ query = new SlowFuzzyQuery(new Term("field", "bbbbb"), SlowFuzzyQuery.defaultMinSimilarity, 0, 2);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals("only 2 documents should match", 2, hits.Length);
+ order = Arrays.AsList("bbbbb", "abbbb");
+ for (int i = 0; i < hits.Length; i++)
+ {
+ string term = searcher.Doc(hits[i].Doc).Get("field");
+ //System.out.println(hits[i].score);
+ assertEquals(order[i], term);
+ }
+
+ // not similar enough:
+ query = new SlowFuzzyQuery(new Term("field", "xxxxx"), SlowFuzzyQuery.defaultMinSimilarity, 0);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(0, hits.Length);
+ query = new SlowFuzzyQuery(new Term("field", "aaccc"), SlowFuzzyQuery.defaultMinSimilarity, 0); // edit distance to "aaaaa" = 3
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(0, hits.Length);
+
+ // query identical to a word in the index:
+ query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 0);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(3, hits.Length);
+ assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
+ // default allows for up to two edits:
+ assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
+ assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
+
+ // query similar to a word in the index:
+ query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 0);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(3, hits.Length);
+ assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
+ assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
+ assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
+
+ // now with prefix
+ query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 1);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(3, hits.Length);
+ assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
+ assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
+ assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
+ query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 2);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(3, hits.Length);
+ assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
+ assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
+ assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
+ query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 3);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(3, hits.Length);
+ assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
+ assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
+ assertEquals(searcher.Doc(hits[2].Doc).Get("field"), ("aaabb"));
+ query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 4);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(2, hits.Length);
+ assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaa"));
+ assertEquals(searcher.Doc(hits[1].Doc).Get("field"), ("aaaab"));
+ query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 5);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(0, hits.Length);
+
+
+ query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 0);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(1, hits.Length);
+ assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
+
+ // now with prefix
+ query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 1);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(1, hits.Length);
+ assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
+ query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 2);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(1, hits.Length);
+ assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
+ query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 3);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(1, hits.Length);
+ assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
+ query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 4);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(1, hits.Length);
+ assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("ddddd"));
+ query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 5);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(0, hits.Length);
+
+
+ // different field = no match:
+ query = new SlowFuzzyQuery(new Term("anotherfield", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 0);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(0, hits.Length);
+
+ reader.Dispose();
+ directory.Dispose();
+ }
+
+ [Test]
+ public void TestFuzzinessLong2()
+ {
+ //Lucene-5033
+ Directory directory = NewDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone);
+ addDoc("abcdef", writer);
+ addDoc("segment", writer);
+
+ IndexReader reader = writer.Reader;
+ IndexSearcher searcher = NewSearcher(reader);
+ writer.Dispose();
+
+ SlowFuzzyQuery query;
+
+ query = new SlowFuzzyQuery(new Term("field", "abcxxxx"), 3f, 0);
+ ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(0, hits.Length);
+
+ query = new SlowFuzzyQuery(new Term("field", "abcxxxx"), 4f, 0);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(1, hits.Length);
+ reader.Dispose();
+ directory.Dispose();
+ }
+
+ [Test]
+ public void TestFuzzinessLong()
+ {
+ Directory directory = NewDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone);
+ addDoc("aaaaaaa", writer);
+ addDoc("segment", writer);
+
+ IndexReader reader = writer.Reader;
+ IndexSearcher searcher = NewSearcher(reader);
+ writer.Dispose();
+
+ SlowFuzzyQuery query;
+ // not similar enough:
+ query = new SlowFuzzyQuery(new Term("field", "xxxxx"), 0.5f, 0);
+ ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(0, hits.Length);
+ // edit distance to "aaaaaaa" = 3, this matches because the string is longer than
+ // in testDefaultFuzziness so a bigger difference is allowed:
+ query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 0);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(1, hits.Length);
+ assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaaaa"));
+
+ // now with prefix
+ query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 1);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(1, hits.Length);
+ assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaaaa"));
+ query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 4);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(1, hits.Length);
+ assertEquals(searcher.Doc(hits[0].Doc).Get("field"), ("aaaaaaa"));
+ query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 5);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(0, hits.Length);
+
+ // no match, more than half of the characters is wrong:
+ query = new SlowFuzzyQuery(new Term("field", "aaacccc"), 0.5f, 0);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(0, hits.Length);
+
+ // now with prefix
+ query = new SlowFuzzyQuery(new Term("field", "aaacccc"), 0.5f, 2);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(0, hits.Length);
+
+ // "student" and "stellent" are indeed similar to "segment" by default:
+ query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 0);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(1, hits.Length);
+ query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 0);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(1, hits.Length);
+
+ // now with prefix
+ query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 1);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(1, hits.Length);
+ query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 1);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(1, hits.Length);
+ query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 2);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(0, hits.Length);
+ query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 2);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(0, hits.Length);
+
+ // "student" doesn't match anymore thanks to increased minimum similarity:
+ query = new SlowFuzzyQuery(new Term("field", "student"), 0.6f, 0);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(0, hits.Length);
+
+ try
+ {
+ query = new SlowFuzzyQuery(new Term("field", "student"), 1.1f);
+ fail("Expected IllegalArgumentException");
+ }
+#pragma warning disable 168
+ catch (ArgumentException e)
+#pragma warning restore 168
+ {
+ // expecting exception
+ }
+ try
+ {
+ query = new SlowFuzzyQuery(new Term("field", "student"), -0.1f);
+ fail("Expected IllegalArgumentException");
+ }
+#pragma warning disable 168
+ catch (ArgumentException e)
+#pragma warning restore 168
+ {
+ // expecting exception
+ }
+
+ reader.Dispose();
+ directory.Dispose();
+ }
+
+ /**
+ * MultiTermQuery provides (via attribute) information about which values
+ * must be competitive to enter the priority queue.
+ *
+ * SlowFuzzyQuery optimizes itself around this information, if the attribute
+ * is not implemented correctly, there will be problems!
+ */
+ [Test]
+ public void TestTieBreaker()
+ {
+ Directory directory = NewDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone);
+ addDoc("a123456", writer);
+ addDoc("c123456", writer);
+ addDoc("d123456", writer);
+ addDoc("e123456", writer);
+
+ Directory directory2 = NewDirectory();
+ RandomIndexWriter writer2 = new RandomIndexWriter(Random(), directory2, Similarity, TimeZone);
+ addDoc("a123456", writer2);
+ addDoc("b123456", writer2);
+ addDoc("b123456", writer2);
+ addDoc("b123456", writer2);
+ addDoc("c123456", writer2);
+ addDoc("f123456", writer2);
+
+ IndexReader ir1 = writer.Reader;
+ IndexReader ir2 = writer2.Reader;
+
+ MultiReader mr = new MultiReader(ir1, ir2);
+ IndexSearcher searcher = NewSearcher(mr);
+ SlowFuzzyQuery fq = new SlowFuzzyQuery(new Term("field", "z123456"), 1f, 0, 2);
+ TopDocs docs = searcher.Search(fq, 2);
+ assertEquals(5, docs.TotalHits); // 5 docs, from the a and b's
+ mr.Dispose();
+ ir1.Dispose();
+ ir2.Dispose();
+ writer.Dispose();
+ writer2.Dispose();
+ directory.Dispose();
+ directory2.Dispose();
+ }
+
+ [Test]
+ public void TestTokenLengthOpt()
+ {
+ Directory directory = NewDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone);
+ addDoc("12345678911", writer);
+ addDoc("segment", writer);
+
+ IndexReader reader = writer.Reader;
+ IndexSearcher searcher = NewSearcher(reader);
+ writer.Dispose();
+
+ Query query;
+ // term not over 10 chars, so optimization shortcuts
+ query = new SlowFuzzyQuery(new Term("field", "1234569"), 0.9f);
+ ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(0, hits.Length);
+
+ // 10 chars, so no optimization
+ query = new SlowFuzzyQuery(new Term("field", "1234567891"), 0.9f);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(0, hits.Length);
+
+ // over 10 chars, so no optimization
+ query = new SlowFuzzyQuery(new Term("field", "12345678911"), 0.9f);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(1, hits.Length);
+
+ // over 10 chars, no match
+ query = new SlowFuzzyQuery(new Term("field", "sdfsdfsdfsdf"), 0.9f);
+ hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(0, hits.Length);
+
+ reader.Dispose();
+ directory.Dispose();
+ }
+
+ /** Test the TopTermsBoostOnlyBooleanQueryRewrite rewrite method. */
+ [Test]
+ public void TestBoostOnlyRewrite()
+ {
+ Directory directory = NewDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, Similarity, TimeZone);
+ addDoc("Lucene", writer);
+ addDoc("Lucene", writer);
+ addDoc("Lucenne", writer);
+
+ IndexReader reader = writer.Reader;
+ IndexSearcher searcher = NewSearcher(reader);
+ writer.Dispose();
+
+ SlowFuzzyQuery query = new SlowFuzzyQuery(new Term("field", "lucene"));
+ query.SetRewriteMethod(new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(50));
+ ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
+ assertEquals(3, hits.Length);
+ // normally, 'Lucenne' would be the first result as IDF will skew the score.
+ assertEquals("Lucene", reader.Document(hits[0].Doc).Get("field"));
+ assertEquals("Lucene", reader.Document(hits[1].Doc).Get("field"));
+ assertEquals("Lucenne", reader.Document(hits[2].Doc).Get("field"));
+ reader.Dispose();
+ directory.Dispose();
+ }
+
+ [Test]
+ public void TestGiga()
+ {
+
+ Directory index = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), index, Similarity, TimeZone);
+
+ addDoc("Lucene in Action", w);
+ addDoc("Lucene for Dummies", w);
+
+ //addDoc("Giga", w);
+ addDoc("Giga byte", w);
+
+ addDoc("ManagingGigabytesManagingGigabyte", w);
+ addDoc("ManagingGigabytesManagingGigabytes", w);
+
+ addDoc("The Art of Computer Science", w);
+ addDoc("J. K. Rowling", w);
+ addDoc("JK Rowling", w);
+ addDoc("Joanne K Roling", w);
+ addDoc("Bruce Willis", w);
+ addDoc("Willis bruce", w);
+ addDoc("Brute willis", w);
+ addDoc("B. willis", w);
+ IndexReader r = w.Reader;
+ w.Dispose();
+
+ Query q = new SlowFuzzyQuery(new Term("field", "giga"), 0.9f);
+
+ // 3. search
+ IndexSearcher searcher = NewSearcher(r);
+ ScoreDoc[] hits = searcher.Search(q, 10).ScoreDocs;
+ assertEquals(1, hits.Length);
+ assertEquals("Giga byte", searcher.Doc(hits[0].Doc).Get("field"));
+ r.Dispose();
+ index.Dispose();
+ }
+
+ [Test]
+ public void TestDistanceAsEditsSearching()
+ {
+ Directory index = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), index, Similarity, TimeZone);
+ addDoc("foobar", w);
+ addDoc("test", w);
+ addDoc("working", w);
+ IndexReader reader = w.Reader;
+ IndexSearcher searcher = NewSearcher(reader);
+ w.Dispose();
+
+ SlowFuzzyQuery q = new SlowFuzzyQuery(new Term("field", "fouba"), 2);
+ ScoreDoc[] hits = searcher.Search(q, 10).ScoreDocs;
+ assertEquals(1, hits.Length);
+ assertEquals("foobar", searcher.Doc(hits[0].Doc).Get("field"));
+
+ q = new SlowFuzzyQuery(new Term("field", "foubara"), 2);
+ hits = searcher.Search(q, 10).ScoreDocs;
+ assertEquals(1, hits.Length);
+ assertEquals("foobar", searcher.Doc(hits[0].Doc).Get("field"));
+
+ q = new SlowFuzzyQuery(new Term("field", "t"), 3);
+ hits = searcher.Search(q, 10).ScoreDocs;
+ assertEquals(1, hits.Length);
+ assertEquals("test", searcher.Doc(hits[0].Doc).Get("field"));
+
+ q = new SlowFuzzyQuery(new Term("field", "a"), 4f, 0, 50);
+ hits = searcher.Search(q, 10).ScoreDocs;
+ assertEquals(1, hits.Length);
+ assertEquals("test", searcher.Doc(hits[0].Doc).Get("field"));
+
+ q = new SlowFuzzyQuery(new Term("field", "a"), 6f, 0, 50);
+ hits = searcher.Search(q, 10).ScoreDocs;
+ assertEquals(2, hits.Length);
+ assertEquals("test", searcher.Doc(hits[0].Doc).Get("field"));
+ assertEquals("foobar", searcher.Doc(hits[1].Doc).Get("field"));
+
+ reader.Dispose();
+ index.Dispose();
+ }
+
+ private void addDoc(string text, RandomIndexWriter writer)
+ {
+ Document doc = new Document();
+ doc.Add(NewTextField("field", text, Field.Store.YES));
+ writer.AddDocument(doc);
+ }
+ }
+}
+#pragma warning restore 612, 618
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3395a8b4/Lucene.Net.Tests.Sandbox/Queries/TestSlowFuzzyQuery2.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Tests.Sandbox/Queries/TestSlowFuzzyQuery2.cs b/Lucene.Net.Tests.Sandbox/Queries/TestSlowFuzzyQuery2.cs
new file mode 100644
index 0000000..6a2988e
--- /dev/null
+++ b/Lucene.Net.Tests.Sandbox/Queries/TestSlowFuzzyQuery2.cs
@@ -0,0 +1,194 @@
+\ufeffusing Lucene.Net.Analysis;
+using Lucene.Net.Documents;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Search.Similarities;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+using System.Globalization;
+using System.IO;
+using System.Text;
+
+namespace Lucene.Net.Sandbox.Queries
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /**
+ * Tests the results of fuzzy against pre-recorded output
+ * The format of the file is the following:
+ *
+ * Header Row: # of bits: generate 2^n sequential documents
+ * with a value of Integer.toBinaryString
+ *
+ * Entries: an entry is a param spec line, a resultCount line, and
+ * then 'resultCount' results lines. The results lines are in the
+ * expected order.
+ *
+ * param spec line: a comma-separated list of params to FuzzyQuery
+ * (query, prefixLen, pqSize, minScore)
+ * query = query text as a number (expand with Integer.toBinaryString)
+ * prefixLen = prefix length
+ * pqSize = priority queue maximum size for TopTermsBoostOnlyBooleanQueryRewrite
+ * minScore = minimum similarity
+ *
+ * resultCount line: total number of expected hits.
+ *
+ * results line: comma-separated docID, score pair
+ **/
+ public class TestSlowFuzzyQuery2 : LuceneTestCase
+ {
+ /** epsilon for score comparisons */
+ static readonly float epsilon = 0.00001f;
+
+ static int[][] mappings = new int[][] {
+ new int[] { 0x40, 0x41 },
+ new int[] { 0x40, 0x0195 },
+ new int[] { 0x40, 0x0906 },
+ new int[] { 0x40, 0x1040F },
+ new int[] { 0x0194, 0x0195 },
+ new int[] { 0x0194, 0x0906 },
+ new int[] { 0x0194, 0x1040F },
+ new int[] { 0x0905, 0x0906 },
+ new int[] { 0x0905, 0x1040F },
+ new int[] { 0x1040E, 0x1040F }
+ };
+
+ [Test]
+ public void TestFromTestData()
+ {
+ // TODO: randomize!
+ assertFromTestData(mappings[Random().nextInt(mappings.Length)]);
+ }
+
+ public void assertFromTestData(int[] codePointTable)
+ {
+ if (VERBOSE)
+ {
+ Console.WriteLine("TEST: codePointTable=" + codePointTable);
+ }
+ //Stream stream = GetType().getResourceAsStream("fuzzyTestData.txt");
+ Stream stream = GetType().Assembly.GetManifestResourceStream("Lucene.Net.Sandbox.Queries.fuzzyTestData.txt");
+ TextReader reader = new StreamReader(stream, Encoding.UTF8);
+
+ int bits = int.Parse(reader.ReadLine(), CultureInfo.InvariantCulture);
+ int terms = (int)Math.Pow(2, bits);
+
+ Store.Directory dir = NewDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetMergePolicy(NewLogMergePolicy()));
+
+ Document doc = new Document();
+ Field field = NewTextField("field", "", Field.Store.NO);
+ doc.Add(field);
+
+ for (int i = 0; i < terms; i++)
+ {
+ field.StringValue = (MapInt(codePointTable, i));
+ writer.AddDocument(doc);
+ }
+
+ IndexReader r = writer.Reader;
+ IndexSearcher searcher = NewSearcher(r);
+ if (VERBOSE)
+ {
+ Console.WriteLine("TEST: searcher=" + searcher);
+ }
+ // even though this uses a boost-only rewrite, this test relies upon queryNorm being the default implementation,
+ // otherwise scores are different!
+ searcher.Similarity = (new DefaultSimilarity());
+
+ writer.Dispose();
+ String line;
+ int lineNum = 0;
+ while ((line = reader.ReadLine()) != null)
+ {
+ lineNum++;
+ String[] @params = line.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
+ String query = MapInt(codePointTable, int.Parse(@params[0], CultureInfo.InvariantCulture));
+ int prefix = int.Parse(@params[1], CultureInfo.InvariantCulture);
+ int pqSize = int.Parse(@params[2], CultureInfo.InvariantCulture);
+ float minScore = float.Parse(@params[3], CultureInfo.InvariantCulture);
+#pragma warning disable 612, 618
+ SlowFuzzyQuery q = new SlowFuzzyQuery(new Term("field", query), minScore, prefix);
+#pragma warning restore 612, 618
+ q.SetRewriteMethod(new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(pqSize));
+ int expectedResults = int.Parse(reader.ReadLine(), CultureInfo.InvariantCulture);
+ TopDocs docs = searcher.Search(q, expectedResults);
+ assertEquals(expectedResults, docs.TotalHits);
+ for (int i = 0; i < expectedResults; i++)
+ {
+ String[] scoreDoc = reader.ReadLine().Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
+ assertEquals(int.Parse(scoreDoc[0], CultureInfo.InvariantCulture), docs.ScoreDocs[i].Doc);
+ assertEquals(float.Parse(scoreDoc[1], CultureInfo.InvariantCulture), docs.ScoreDocs[i].Score, epsilon);
+ }
+ }
+ r.Dispose();
+ dir.Dispose();
+ }
+
+ /* map bits to unicode codepoints */
+ private static String MapInt(int[] codePointTable, int i)
+ {
+ StringBuilder sb = new StringBuilder();
+ String binary = Number.ToBinaryString(i);
+ for (int j = 0; j < binary.Length; j++)
+ sb.AppendCodePoint(codePointTable[binary[j] - '0']);
+ return sb.toString();
+ }
+
+ /* Code to generate test data
+ public static void main(String args[]) throws Exception {
+ int bits = 3;
+ System.out.println(bits);
+ int terms = (int) Math.pow(2, bits);
+
+ RAMDirectory dir = new RAMDirectory();
+ IndexWriter writer = new IndexWriter(dir, new KeywordAnalyzer(),
+ IndexWriter.MaxFieldLength.UNLIMITED);
+
+ Document doc = new Document();
+ Field field = newField("field", "", Field.Store.NO, Field.Index.ANALYZED);
+ doc.add(field);
+
+ for (int i = 0; i < terms; i++) {
+ field.setValue(Integer.toBinaryString(i));
+ writer.addDocument(doc);
+ }
+
+ writer.forceMerge(1);
+ writer.close();
+
+ IndexSearcher searcher = new IndexSearcher(dir);
+ for (int prefix = 0; prefix < bits; prefix++)
+ for (int pqsize = 1; pqsize <= terms; pqsize++)
+ for (float minscore = 0.1F; minscore < 1F; minscore += 0.2F)
+ for (int query = 0; query < terms; query++) {
+ FuzzyQuery q = new FuzzyQuery(
+ new Term("field", Integer.toBinaryString(query)), minscore, prefix);
+ q.setRewriteMethod(new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(pqsize));
+ System.out.println(query + "," + prefix + "," + pqsize + "," + minscore);
+ TopDocs docs = searcher.search(q, terms);
+ System.out.println(docs.totalHits);
+ for (int i = 0; i < docs.totalHits; i++)
+ System.out.println(docs.scoreDocs[i].doc + "," + docs.scoreDocs[i].score);
+ }
+ }
+ */
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3395a8b4/Lucene.Net.Tests.Sandbox/Queries/TestSortedSetSortField.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Tests.Sandbox/Queries/TestSortedSetSortField.cs b/Lucene.Net.Tests.Sandbox/Queries/TestSortedSetSortField.cs
new file mode 100644
index 0000000..4a14356
--- /dev/null
+++ b/Lucene.Net.Tests.Sandbox/Queries/TestSortedSetSortField.cs
@@ -0,0 +1,235 @@
+\ufeffusing Lucene.Net.Documents;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Store;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+
+namespace Lucene.Net.Sandbox.Queries
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>Simple tests for SortedSetSortField</summary>
+ public class TestSortedSetSortField : LuceneTestCase
+ {
+ [Test]
+ public void TestForward()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
+ Document doc = new Document();
+ doc.Add(NewStringField("value", "baz", Field.Store.NO));
+ doc.Add(NewStringField("id", "2", Field.Store.YES));
+ writer.AddDocument(doc);
+ doc = new Document();
+ doc.Add(NewStringField("value", "foo", Field.Store.NO));
+ doc.Add(NewStringField("value", "bar", Field.Store.NO));
+ doc.Add(NewStringField("id", "1", Field.Store.YES));
+ writer.AddDocument(doc);
+ IndexReader ir = writer.Reader;
+ writer.Dispose();
+
+ IndexSearcher searcher = NewSearcher(ir);
+ Sort sort = new Sort(new SortedSetSortField("value", false));
+
+ TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
+ assertEquals(2, td.TotalHits);
+ // 'bar' comes before 'baz'
+ assertEquals("1", searcher.Doc(td.ScoreDocs[0].Doc).Get("id"));
+ assertEquals("2", searcher.Doc(td.ScoreDocs[1].Doc).Get("id"));
+
+ ir.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public void TestReverse()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
+ Document doc = new Document();
+ doc.Add(NewStringField("value", "foo", Field.Store.NO));
+ doc.Add(NewStringField("value", "bar", Field.Store.NO));
+ doc.Add(NewStringField("id", "1", Field.Store.YES));
+ writer.AddDocument(doc);
+ doc = new Document();
+ doc.Add(NewStringField("value", "baz", Field.Store.NO));
+ doc.Add(NewStringField("id", "2", Field.Store.YES));
+ writer.AddDocument(doc);
+
+ IndexReader ir = writer.Reader;
+ writer.Dispose();
+
+ IndexSearcher searcher = NewSearcher(ir);
+ Sort sort = new Sort(new SortedSetSortField("value", true));
+
+ TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
+ assertEquals(2, td.TotalHits);
+ // 'bar' comes before 'baz'
+ assertEquals("2", searcher.Doc(td.ScoreDocs[0].Doc).Get("id"));
+ assertEquals("1", searcher.Doc(td.ScoreDocs[1].Doc).Get("id"));
+
+ ir.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public void TestMissingFirst()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
+ Document doc = new Document();
+ doc.Add(NewStringField("value", "baz", Field.Store.NO));
+ doc.Add(NewStringField("id", "2", Field.Store.YES));
+ writer.AddDocument(doc);
+ doc = new Document();
+ doc.Add(NewStringField("value", "foo", Field.Store.NO));
+ doc.Add(NewStringField("value", "bar", Field.Store.NO));
+ doc.Add(NewStringField("id", "1", Field.Store.YES));
+ writer.AddDocument(doc);
+ doc = new Document();
+ doc.Add(NewStringField("id", "3", Field.Store.YES));
+ writer.AddDocument(doc);
+ IndexReader ir = writer.Reader;
+ writer.Dispose();
+
+ IndexSearcher searcher = NewSearcher(ir);
+ SortField sortField = new SortedSetSortField("value", false);
+ sortField.MissingValue = (SortField.STRING_FIRST);
+ Sort sort = new Sort(sortField);
+
+ TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
+ assertEquals(3, td.TotalHits);
+ // 'bar' comes before 'baz'
+ // null comes first
+ assertEquals("3", searcher.Doc(td.ScoreDocs[0].Doc).Get("id"));
+ assertEquals("1", searcher.Doc(td.ScoreDocs[1].Doc).Get("id"));
+ assertEquals("2", searcher.Doc(td.ScoreDocs[2].Doc).Get("id"));
+
+ ir.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public void TestMissingLast()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
+ Document doc = new Document();
+ doc.Add(NewStringField("value", "baz", Field.Store.NO));
+ doc.Add(NewStringField("id", "2", Field.Store.YES));
+ writer.AddDocument(doc);
+ doc = new Document();
+ doc.Add(NewStringField("value", "foo", Field.Store.NO));
+ doc.Add(NewStringField("value", "bar", Field.Store.NO));
+ doc.Add(NewStringField("id", "1", Field.Store.YES));
+ writer.AddDocument(doc);
+ doc = new Document();
+ doc.Add(NewStringField("id", "3", Field.Store.YES));
+ writer.AddDocument(doc);
+ IndexReader ir = writer.Reader;
+ writer.Dispose();
+
+ IndexSearcher searcher = NewSearcher(ir);
+ SortField sortField = new SortedSetSortField("value", false);
+ sortField.MissingValue = (SortField.STRING_LAST);
+ Sort sort = new Sort(sortField);
+
+ TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
+ assertEquals(3, td.TotalHits);
+ // 'bar' comes before 'baz'
+ assertEquals("1", searcher.Doc(td.ScoreDocs[0].Doc).Get("id"));
+ assertEquals("2", searcher.Doc(td.ScoreDocs[1].Doc).Get("id"));
+ // null comes last
+ assertEquals("3", searcher.Doc(td.ScoreDocs[2].Doc).Get("id"));
+
+ ir.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public void TestSingleton()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
+ Document doc = new Document();
+ doc.Add(NewStringField("value", "baz", Field.Store.NO));
+ doc.Add(NewStringField("id", "2", Field.Store.YES));
+ writer.AddDocument(doc);
+ doc = new Document();
+ doc.Add(NewStringField("value", "bar", Field.Store.NO));
+ doc.Add(NewStringField("id", "1", Field.Store.YES));
+ writer.AddDocument(doc);
+ IndexReader ir = writer.Reader;
+ writer.Dispose();
+
+ IndexSearcher searcher = NewSearcher(ir);
+ Sort sort = new Sort(new SortedSetSortField("value", false));
+
+ TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
+ assertEquals(2, td.TotalHits);
+ // 'bar' comes before 'baz'
+ assertEquals("1", searcher.Doc(td.ScoreDocs[0].Doc).Get("id"));
+ assertEquals("2", searcher.Doc(td.ScoreDocs[1].Doc).Get("id"));
+
+ ir.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public void TestEmptyIndex()
+ {
+ IndexSearcher empty = NewSearcher(new MultiReader());
+ Query query = new TermQuery(new Term("contents", "foo"));
+
+ Sort sort = new Sort();
+ sort.SetSort(new SortedSetSortField("sortedset", false));
+ TopDocs td = empty.Search(query, null, 10, sort, true, true);
+ assertEquals(0, td.TotalHits);
+
+ // for an empty index, any selector should work
+ foreach (Selector v in Enum.GetValues(typeof(Selector)))
+ {
+ sort.SetSort(new SortedSetSortField("sortedset", false, v));
+ td = empty.Search(query, null, 10, sort, true, true);
+ assertEquals(0, td.TotalHits);
+ }
+ }
+
+ [Test]
+ public void TestEquals()
+ {
+ SortField sf = new SortedSetSortField("a", false);
+ assertFalse(sf.equals(null));
+
+
+ assertEquals(sf, sf);
+
+ SortField sf2 = new SortedSetSortField("a", false);
+ assertEquals(sf, sf2);
+ assertEquals(sf.GetHashCode(), sf2.GetHashCode());
+
+
+ assertFalse(sf.equals(new SortedSetSortField("a", true)));
+ assertFalse(sf.equals(new SortedSetSortField("b", false)));
+ assertFalse(sf.equals(new SortedSetSortField("a", false, Selector.MAX)));
+ assertFalse(sf.equals("foo"));
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3395a8b4/Lucene.Net.Tests.Sandbox/Queries/TestSortedSetSortFieldDocValues.cs
----------------------------------------------------------------------
diff --git a/Lucene.Net.Tests.Sandbox/Queries/TestSortedSetSortFieldDocValues.cs b/Lucene.Net.Tests.Sandbox/Queries/TestSortedSetSortFieldDocValues.cs
new file mode 100644
index 0000000..342f679
--- /dev/null
+++ b/Lucene.Net.Tests.Sandbox/Queries/TestSortedSetSortFieldDocValues.cs
@@ -0,0 +1,213 @@
+\ufeffusing Lucene.Net.Documents;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Store;
+using Lucene.Net.Util;
+using NUnit.Framework;
+
+namespace Lucene.Net.Sandbox.Queries
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>Simple tests for SortedSetSortField, indexing the sortedset up front</summary>
+ [SuppressCodecs("Lucene40", "Lucene41", "Appending", "Lucene3x")]// avoid codecs that don't support sortedset
+ public class TestSortedSetSortFieldDocValues : LuceneTestCase
+ {
+ public override void SetUp()
+ {
+ base.SetUp();
+ // ensure there is nothing in fieldcache before test starts
+ FieldCache.DEFAULT.PurgeAllCaches();
+ }
+
+ private void assertNoFieldCaches()
+ {
+ // docvalues sorting should NOT create any fieldcache entries!
+ assertEquals(0, FieldCache.DEFAULT.CacheEntries.Length);
+ }
+
+ [Test]
+ public void TestForward()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
+ Document doc = new Document();
+ doc.Add(new SortedSetDocValuesField("value", new BytesRef("baz")));
+ doc.Add(NewStringField("id", "2", Field.Store.YES));
+ writer.AddDocument(doc);
+ doc = new Document();
+ doc.Add(new SortedSetDocValuesField("value", new BytesRef("foo")));
+ doc.Add(new SortedSetDocValuesField("value", new BytesRef("bar")));
+ doc.Add(NewStringField("id", "1", Field.Store.YES));
+ writer.AddDocument(doc);
+ IndexReader ir = writer.Reader;
+ writer.Dispose();
+
+ IndexSearcher searcher = NewSearcher(ir);
+ Sort sort = new Sort(new SortedSetSortField("value", false));
+
+ TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
+ assertEquals(2, td.TotalHits);
+ // 'bar' comes before 'baz'
+ assertEquals("1", searcher.Doc(td.ScoreDocs[0].Doc).Get("id"));
+ assertEquals("2", searcher.Doc(td.ScoreDocs[1].Doc).Get("id"));
+ assertNoFieldCaches();
+
+ ir.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public void TestReverse()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
+ Document doc = new Document();
+ doc.Add(new SortedSetDocValuesField("value", new BytesRef("foo")));
+ doc.Add(new SortedSetDocValuesField("value", new BytesRef("bar")));
+ doc.Add(NewStringField("id", "1", Field.Store.YES));
+ writer.AddDocument(doc);
+ doc = new Document();
+ doc.Add(new SortedSetDocValuesField("value", new BytesRef("baz")));
+ doc.Add(NewStringField("id", "2", Field.Store.YES));
+ writer.AddDocument(doc);
+
+ IndexReader ir = writer.Reader;
+ writer.Dispose();
+
+ IndexSearcher searcher = NewSearcher(ir);
+ Sort sort = new Sort(new SortedSetSortField("value", true));
+
+ TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
+ assertEquals(2, td.TotalHits);
+ // 'bar' comes before 'baz'
+ assertEquals("2", searcher.Doc(td.ScoreDocs[0].Doc).Get("id"));
+ assertEquals("1", searcher.Doc(td.ScoreDocs[1].Doc).Get("id"));
+ assertNoFieldCaches();
+
+ ir.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public void TestMissingFirst()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
+ Document doc = new Document();
+ doc.Add(new SortedSetDocValuesField("value", new BytesRef("baz")));
+ doc.Add(NewStringField("id", "2", Field.Store.YES));
+ writer.AddDocument(doc);
+ doc = new Document();
+ doc.Add(new SortedSetDocValuesField("value", new BytesRef("foo")));
+ doc.Add(new SortedSetDocValuesField("value", new BytesRef("bar")));
+ doc.Add(NewStringField("id", "1", Field.Store.YES));
+ writer.AddDocument(doc);
+ doc = new Document();
+ doc.Add(NewStringField("id", "3", Field.Store.YES));
+ writer.AddDocument(doc);
+ IndexReader ir = writer.Reader;
+ writer.Dispose();
+
+ IndexSearcher searcher = NewSearcher(ir);
+ SortField sortField = new SortedSetSortField("value", false);
+ sortField.MissingValue = (SortField.STRING_FIRST);
+ Sort sort = new Sort(sortField);
+
+ TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
+ assertEquals(3, td.TotalHits);
+ // 'bar' comes before 'baz'
+ // null comes first
+ assertEquals("3", searcher.Doc(td.ScoreDocs[0].Doc).Get("id"));
+ assertEquals("1", searcher.Doc(td.ScoreDocs[1].Doc).Get("id"));
+ assertEquals("2", searcher.Doc(td.ScoreDocs[2].Doc).Get("id"));
+ assertNoFieldCaches();
+
+ ir.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public void TestMissingLast()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
+ Document doc = new Document();
+ doc.Add(new SortedSetDocValuesField("value", new BytesRef("baz")));
+ doc.Add(NewStringField("id", "2", Field.Store.YES));
+ writer.AddDocument(doc);
+ doc = new Document();
+ doc.Add(new SortedSetDocValuesField("value", new BytesRef("foo")));
+ doc.Add(new SortedSetDocValuesField("value", new BytesRef("bar")));
+ doc.Add(NewStringField("id", "1", Field.Store.YES));
+ writer.AddDocument(doc);
+ doc = new Document();
+ doc.Add(NewStringField("id", "3", Field.Store.YES));
+ writer.AddDocument(doc);
+ IndexReader ir = writer.Reader;
+ writer.Dispose();
+
+ IndexSearcher searcher = NewSearcher(ir);
+ SortField sortField = new SortedSetSortField("value", false);
+ sortField.MissingValue = (SortField.STRING_LAST);
+ Sort sort = new Sort(sortField);
+
+ TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
+ assertEquals(3, td.TotalHits);
+ // 'bar' comes before 'baz'
+ assertEquals("1", searcher.Doc(td.ScoreDocs[0].Doc).Get("id"));
+ assertEquals("2", searcher.Doc(td.ScoreDocs[1].Doc).Get("id"));
+ // null comes last
+ assertEquals("3", searcher.Doc(td.ScoreDocs[2].Doc).Get("id"));
+ assertNoFieldCaches();
+
+ ir.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public void TestSingleton()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
+ Document doc = new Document();
+ doc.Add(new SortedSetDocValuesField("value", new BytesRef("baz")));
+ doc.Add(NewStringField("id", "2", Field.Store.YES));
+ writer.AddDocument(doc);
+ doc = new Document();
+ doc.Add(new SortedSetDocValuesField("value", new BytesRef("bar")));
+ doc.Add(NewStringField("id", "1", Field.Store.YES));
+ writer.AddDocument(doc);
+ IndexReader ir = writer.Reader;
+ writer.Dispose();
+
+ IndexSearcher searcher = NewSearcher(ir);
+ Sort sort = new Sort(new SortedSetSortField("value", false));
+
+ TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
+ assertEquals(2, td.TotalHits);
+ // 'bar' comes before 'baz'
+ assertEquals("1", searcher.Doc(td.ScoreDocs[0].Doc).Get("id"));
+ assertEquals("2", searcher.Doc(td.ScoreDocs[1].Doc).Get("id"));
+ assertNoFieldCaches();
+
+ ir.Dispose();
+ dir.Dispose();
+ }
+ }
+}