You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/02/26 23:37:08 UTC
[20/72] [abbrv] [partial] lucenenet git commit: Lucene.Net.Tests:
Removed \core directory and put its contents in root directory
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestPostingsOffsets.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/Index/TestPostingsOffsets.cs b/src/Lucene.Net.Tests/Index/TestPostingsOffsets.cs
new file mode 100644
index 0000000..706987e
--- /dev/null
+++ b/src/Lucene.Net.Tests/Index/TestPostingsOffsets.cs
@@ -0,0 +1,580 @@
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using Lucene.Net.Documents;
+using Lucene.Net.Search;
+
+namespace Lucene.Net.Index
+{
+ using Lucene.Net.Randomized.Generators;
+ using NUnit.Framework;
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using Analyzer = Lucene.Net.Analysis.Analyzer;
+ using BytesRef = Lucene.Net.Util.BytesRef;
+ using CannedTokenStream = Lucene.Net.Analysis.CannedTokenStream;
+ using Directory = Lucene.Net.Store.Directory;
+ using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+ using Document = Documents.Document;
+ using English = Lucene.Net.Util.English;
+ using Field = Field;
+ using FieldType = FieldType;
+ using Int32Field = Int32Field;
+ using IOUtils = Lucene.Net.Util.IOUtils;
+ using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+ using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer;
+ using MockPayloadAnalyzer = Lucene.Net.Analysis.MockPayloadAnalyzer;
+ using StringField = StringField;
+ using TestUtil = Lucene.Net.Util.TestUtil;
+ using TextField = TextField;
+ using Token = Lucene.Net.Analysis.Token;
+ using TokenStream = Lucene.Net.Analysis.TokenStream;
+
+ // TODO: we really need to test indexingoffsets, but then getting only docs / docs + freqs.
+ // not all codecs store prx separate...
+ // TODO: fix sep codec to index offsets so we can greatly reduce this list!
+ [SuppressCodecs("Lucene3x", "MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom")]
+ [TestFixture]
+ public class TestPostingsOffsets : LuceneTestCase
+ {
+ internal IndexWriterConfig Iwc;
+
+ [SetUp]
+ public override void SetUp()
+ {
+ base.SetUp();
+ Iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
+ }
+
+ [Test]
+ public virtual void TestBasic()
+ {
+ Directory dir = NewDirectory();
+
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Iwc);
+ Document doc = new Document();
+
+ FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
+ ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
+ if (Random().NextBoolean())
+ {
+ ft.StoreTermVectors = true;
+ ft.StoreTermVectorPositions = Random().NextBoolean();
+ ft.StoreTermVectorOffsets = Random().NextBoolean();
+ }
+ Token[] tokens = new Token[] { MakeToken("a", 1, 0, 6), MakeToken("b", 1, 8, 9), MakeToken("a", 1, 9, 17), MakeToken("c", 1, 19, 50) };
+ doc.Add(new Field("content", new CannedTokenStream(tokens), ft));
+
+ w.AddDocument(doc);
+ IndexReader r = w.Reader;
+ w.Dispose();
+
+ DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(r, null, "content", new BytesRef("a"));
+ Assert.IsNotNull(dp);
+ Assert.AreEqual(0, dp.NextDoc());
+ Assert.AreEqual(2, dp.Freq);
+ Assert.AreEqual(0, dp.NextPosition());
+ Assert.AreEqual(0, dp.StartOffset);
+ Assert.AreEqual(6, dp.EndOffset);
+ Assert.AreEqual(2, dp.NextPosition());
+ Assert.AreEqual(9, dp.StartOffset);
+ Assert.AreEqual(17, dp.EndOffset);
+ Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc());
+
+ dp = MultiFields.GetTermPositionsEnum(r, null, "content", new BytesRef("b"));
+ Assert.IsNotNull(dp);
+ Assert.AreEqual(0, dp.NextDoc());
+ Assert.AreEqual(1, dp.Freq);
+ Assert.AreEqual(1, dp.NextPosition());
+ Assert.AreEqual(8, dp.StartOffset);
+ Assert.AreEqual(9, dp.EndOffset);
+ Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc());
+
+ dp = MultiFields.GetTermPositionsEnum(r, null, "content", new BytesRef("c"));
+ Assert.IsNotNull(dp);
+ Assert.AreEqual(0, dp.NextDoc());
+ Assert.AreEqual(1, dp.Freq);
+ Assert.AreEqual(3, dp.NextPosition());
+ Assert.AreEqual(19, dp.StartOffset);
+ Assert.AreEqual(50, dp.EndOffset);
+ Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc());
+
+ r.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public virtual void TestSkipping()
+ {
+ DoTestNumbers(false);
+ }
+
+ [Test]
+ public virtual void TestPayloads()
+ {
+ DoTestNumbers(true);
+ }
+
+ public virtual void DoTestNumbers(bool withPayloads)
+ {
+ Directory dir = NewDirectory();
+ Analyzer analyzer = withPayloads ? (Analyzer)new MockPayloadAnalyzer() : new MockAnalyzer(Random());
+ Iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
+ Iwc.SetMergePolicy(NewLogMergePolicy()); // will rely on docids a bit for skipping
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Iwc);
+
+ FieldType ft = new FieldType(TextField.TYPE_STORED);
+ ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
+ if (Random().NextBoolean())
+ {
+ ft.StoreTermVectors = true;
+ ft.StoreTermVectorOffsets = Random().NextBoolean();
+ ft.StoreTermVectorPositions = Random().NextBoolean();
+ }
+
+ int numDocs = AtLeast(500);
+ for (int i = 0; i < numDocs; i++)
+ {
+ Document doc = new Document();
+ doc.Add(new Field("numbers", English.IntToEnglish(i), ft));
+ doc.Add(new Field("oddeven", (i % 2) == 0 ? "even" : "odd", ft));
+ doc.Add(new StringField("id", "" + i, Field.Store.NO));
+ w.AddDocument(doc);
+ }
+
+ IndexReader reader = w.Reader;
+ w.Dispose();
+
+ string[] terms = new string[] { "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "hundred" };
+
+ foreach (string term in terms)
+ {
+ DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef(term));
+ int doc;
+ while ((doc = dp.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
+ {
+ string storedNumbers = reader.Document(doc).Get("numbers");
+ int freq = dp.Freq;
+ for (int i = 0; i < freq; i++)
+ {
+ dp.NextPosition();
+ int start = dp.StartOffset;
+ Debug.Assert(start >= 0);
+ int end = dp.EndOffset;
+ Debug.Assert(end >= 0 && end >= start);
+ // check that the offsets correspond to the term in the src text
+ Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals(term));
+ if (withPayloads)
+ {
+ // check that we have a payload and it starts with "pos"
+ Assert.IsNotNull(dp.GetPayload());
+ BytesRef payload = dp.GetPayload();
+ Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:"));
+ } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer!
+ }
+ }
+ }
+
+ // check we can skip correctly
+ int numSkippingTests = AtLeast(50);
+
+ for (int j = 0; j < numSkippingTests; j++)
+ {
+ int num = TestUtil.NextInt(Random(), 100, Math.Min(numDocs - 1, 999));
+ DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef("hundred"));
+ int doc = dp.Advance(num);
+ Assert.AreEqual(num, doc);
+ int freq = dp.Freq;
+ for (int i = 0; i < freq; i++)
+ {
+ string storedNumbers = reader.Document(doc).Get("numbers");
+ dp.NextPosition();
+ int start = dp.StartOffset;
+ Debug.Assert(start >= 0);
+ int end = dp.EndOffset;
+ Debug.Assert(end >= 0 && end >= start);
+ // check that the offsets correspond to the term in the src text
+ Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals("hundred"));
+ if (withPayloads)
+ {
+ // check that we have a payload and it starts with "pos"
+ Assert.IsNotNull(dp.GetPayload());
+ BytesRef payload = dp.GetPayload();
+ Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:"));
+ } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer!
+ }
+ }
+
+ // check that other fields (without offsets) work correctly
+
+ for (int i = 0; i < numDocs; i++)
+ {
+ DocsEnum dp = MultiFields.GetTermDocsEnum(reader, null, "id", new BytesRef("" + i), 0);
+ Assert.AreEqual(i, dp.NextDoc());
+ Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc());
+ }
+
+ reader.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public virtual void TestRandom()
+ {
+ // token -> docID -> tokens
+ IDictionary<string, IDictionary<int?, IList<Token>>> actualTokens = new Dictionary<string, IDictionary<int?, IList<Token>>>();
+
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Iwc);
+
+ int numDocs = AtLeast(20);
+ //final int numDocs = AtLeast(5);
+
+ FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
+
+ // TODO: randomize what IndexOptions we use; also test
+ // changing this up in one IW buffered segment...:
+ ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
+ if (Random().NextBoolean())
+ {
+ ft.StoreTermVectors = true;
+ ft.StoreTermVectorOffsets = Random().NextBoolean();
+ ft.StoreTermVectorPositions = Random().NextBoolean();
+ }
+
+ for (int docCount = 0; docCount < numDocs; docCount++)
+ {
+ Document doc = new Document();
+ doc.Add(new Int32Field("id", docCount, Field.Store.NO));
+ IList<Token> tokens = new List<Token>();
+ int numTokens = AtLeast(100);
+ //final int numTokens = AtLeast(20);
+ int pos = -1;
+ int offset = 0;
+ //System.out.println("doc id=" + docCount);
+ for (int tokenCount = 0; tokenCount < numTokens; tokenCount++)
+ {
+ string text;
+ if (Random().NextBoolean())
+ {
+ text = "a";
+ }
+ else if (Random().NextBoolean())
+ {
+ text = "b";
+ }
+ else if (Random().NextBoolean())
+ {
+ text = "c";
+ }
+ else
+ {
+ text = "d";
+ }
+
+ int posIncr = Random().NextBoolean() ? 1 : Random().Next(5);
+ if (tokenCount == 0 && posIncr == 0)
+ {
+ posIncr = 1;
+ }
+ int offIncr = Random().NextBoolean() ? 0 : Random().Next(5);
+ int tokenOffset = Random().Next(5);
+
+ Token token = MakeToken(text, posIncr, offset + offIncr, offset + offIncr + tokenOffset);
+ if (!actualTokens.ContainsKey(text))
+ {
+ actualTokens[text] = new Dictionary<int?, IList<Token>>();
+ }
+ IDictionary<int?, IList<Token>> postingsByDoc = actualTokens[text];
+ if (!postingsByDoc.ContainsKey(docCount))
+ {
+ postingsByDoc[docCount] = new List<Token>();
+ }
+ postingsByDoc[docCount].Add(token);
+ tokens.Add(token);
+ pos += posIncr;
+ // stuff abs position into type:
+ token.Type = "" + pos;
+ offset += offIncr + tokenOffset;
+ //System.out.println(" " + token + " posIncr=" + token.getPositionIncrement() + " pos=" + pos + " off=" + token.StartOffset + "/" + token.EndOffset + " (freq=" + postingsByDoc.Get(docCount).Size() + ")");
+ }
+ doc.Add(new Field("content", new CannedTokenStream(tokens.ToArray()), ft));
+ w.AddDocument(doc);
+ }
+ DirectoryReader r = w.Reader;
+ w.Dispose();
+
+ string[] terms = new string[] { "a", "b", "c", "d" };
+ foreach (AtomicReaderContext ctx in r.Leaves)
+ {
+ // TODO: improve this
+ AtomicReader sub = (AtomicReader)ctx.Reader;
+ //System.out.println("\nsub=" + sub);
+ TermsEnum termsEnum = sub.Fields.GetTerms("content").GetIterator(null);
+ DocsEnum docs = null;
+ DocsAndPositionsEnum docsAndPositions = null;
+ DocsAndPositionsEnum docsAndPositionsAndOffsets = null;
+ FieldCache.Int32s docIDToID = FieldCache.DEFAULT.GetInt32s(sub, "id", false);
+ foreach (string term in terms)
+ {
+ //System.out.println(" term=" + term);
+ if (termsEnum.SeekExact(new BytesRef(term)))
+ {
+ docs = termsEnum.Docs(null, docs);
+ Assert.IsNotNull(docs);
+ int doc;
+ //System.out.println(" doc/freq");
+ while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
+ {
+ IList<Token> expected = actualTokens[term][docIDToID.Get(doc)];
+ //System.out.println(" doc=" + docIDToID.Get(doc) + " docID=" + doc + " " + expected.Size() + " freq");
+ Assert.IsNotNull(expected);
+ Assert.AreEqual(expected.Count, docs.Freq);
+ }
+
+ // explicitly exclude offsets here
+ docsAndPositions = termsEnum.DocsAndPositions(null, docsAndPositions, DocsAndPositionsEnum.FLAG_PAYLOADS);
+ Assert.IsNotNull(docsAndPositions);
+ //System.out.println(" doc/freq/pos");
+ while ((doc = docsAndPositions.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
+ {
+ IList<Token> expected = actualTokens[term][docIDToID.Get(doc)];
+ //System.out.println(" doc=" + docIDToID.Get(doc) + " " + expected.Size() + " freq");
+ Assert.IsNotNull(expected);
+ Assert.AreEqual(expected.Count, docsAndPositions.Freq);
+ foreach (Token token in expected)
+ {
+ int pos = Convert.ToInt32(token.Type);
+ //System.out.println(" pos=" + pos);
+ Assert.AreEqual(pos, docsAndPositions.NextPosition());
+ }
+ }
+
+ docsAndPositionsAndOffsets = termsEnum.DocsAndPositions(null, docsAndPositions);
+ Assert.IsNotNull(docsAndPositionsAndOffsets);
+ //System.out.println(" doc/freq/pos/offs");
+ while ((doc = docsAndPositionsAndOffsets.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
+ {
+ IList<Token> expected = actualTokens[term][docIDToID.Get(doc)];
+ //System.out.println(" doc=" + docIDToID.Get(doc) + " " + expected.Size() + " freq");
+ Assert.IsNotNull(expected);
+ Assert.AreEqual(expected.Count, docsAndPositionsAndOffsets.Freq);
+ foreach (Token token in expected)
+ {
+ int pos = Convert.ToInt32(token.Type);
+ //System.out.println(" pos=" + pos);
+ Assert.AreEqual(pos, docsAndPositionsAndOffsets.NextPosition());
+ Assert.AreEqual(token.StartOffset, docsAndPositionsAndOffsets.StartOffset);
+ Assert.AreEqual(token.EndOffset, docsAndPositionsAndOffsets.EndOffset);
+ }
+ }
+ }
+ }
+ // TODO: test advance:
+ }
+ r.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public virtual void TestWithUnindexedFields()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, Iwc);
+ for (int i = 0; i < 100; i++)
+ {
+ Document doc = new Document();
+ // ensure at least one doc is indexed with offsets
+ if (i < 99 && Random().Next(2) == 0)
+ {
+ // stored only
+ FieldType ft = new FieldType();
+ ft.IsIndexed = false;
+ ft.IsStored = true;
+ doc.Add(new Field("foo", "boo!", ft));
+ }
+ else
+ {
+ FieldType ft = new FieldType(TextField.TYPE_STORED);
+ ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
+ if (Random().NextBoolean())
+ {
+ // store some term vectors for the checkindex cross-check
+ ft.StoreTermVectors = true;
+ ft.StoreTermVectorPositions = true;
+ ft.StoreTermVectorOffsets = true;
+ }
+ doc.Add(new Field("foo", "bar", ft));
+ }
+ riw.AddDocument(doc);
+ }
+ CompositeReader ir = riw.Reader;
+ AtomicReader slow = SlowCompositeReaderWrapper.Wrap(ir);
+ FieldInfos fis = slow.FieldInfos;
+ Assert.AreEqual(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, fis.FieldInfo("foo").IndexOptions);
+ slow.Dispose();
+ ir.Dispose();
+ riw.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public virtual void TestAddFieldTwice()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
+ Document doc = new Document();
+ FieldType customType3 = new FieldType(TextField.TYPE_STORED);
+ customType3.StoreTermVectors = true;
+ customType3.StoreTermVectorPositions = true;
+ customType3.StoreTermVectorOffsets = true;
+ customType3.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
+ doc.Add(new Field("content3", "here is more content with aaa aaa aaa", customType3));
+ doc.Add(new Field("content3", "here is more content with aaa aaa aaa", customType3));
+ iw.AddDocument(doc);
+ iw.Dispose();
+ dir.Dispose(); // checkindex
+ }
+
+ // NOTE: the next two tests aren't that good as we need an EvilToken...
+ [Test]
+ public virtual void TestNegativeOffsets()
+ {
+ try
+ {
+ CheckTokens(new Token[] { MakeToken("foo", 1, -1, -1) });
+ Assert.Fail();
+ }
+#pragma warning disable 168
+ catch (System.ArgumentException expected)
+#pragma warning restore 168
+ {
+ //expected
+ }
+ }
+
+ [Test]
+ public virtual void TestIllegalOffsets()
+ {
+ try
+ {
+ CheckTokens(new Token[] { MakeToken("foo", 1, 1, 0) });
+ Assert.Fail();
+ }
+#pragma warning disable 168
+ catch (System.ArgumentException expected)
+#pragma warning restore 168
+ {
+ //expected
+ }
+ }
+
+ [Test]
+ public virtual void TestBackwardsOffsets()
+ {
+ try
+ {
+ CheckTokens(new Token[] { MakeToken("foo", 1, 0, 3), MakeToken("foo", 1, 4, 7), MakeToken("foo", 0, 3, 6) });
+ Assert.Fail();
+ }
+#pragma warning disable 168
+ catch (System.ArgumentException expected)
+#pragma warning restore 168
+ {
+ // expected
+ }
+ }
+
+ [Test]
+ public virtual void TestStackedTokens()
+ {
+ CheckTokens(new Token[] { MakeToken("foo", 1, 0, 3), MakeToken("foo", 0, 0, 3), MakeToken("foo", 0, 0, 3) });
+ }
+
+ [Test]
+ public virtual void TestLegalbutVeryLargeOffsets()
+ {
+ Directory dir = NewDirectory();
+ IndexWriter iw = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null));
+ Document doc = new Document();
+ Token t1 = new Token("foo", 0, int.MaxValue - 500);
+ if (Random().NextBoolean())
+ {
+ t1.Payload = new BytesRef("test");
+ }
+ Token t2 = new Token("foo", int.MaxValue - 500, int.MaxValue);
+ TokenStream tokenStream = new CannedTokenStream(new Token[] { t1, t2 });
+ FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
+ ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
+ // store some term vectors for the checkindex cross-check
+ ft.StoreTermVectors = true;
+ ft.StoreTermVectorPositions = true;
+ ft.StoreTermVectorOffsets = true;
+ Field field = new Field("foo", tokenStream, ft);
+ doc.Add(field);
+ iw.AddDocument(doc);
+ iw.Dispose();
+ dir.Dispose();
+ }
+
+ // TODO: more tests with other possibilities
+
+ private void CheckTokens(Token[] tokens)
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, Iwc);
+ bool success = false;
+ try
+ {
+ FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
+ ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
+ // store some term vectors for the checkindex cross-check
+ ft.StoreTermVectors = true;
+ ft.StoreTermVectorPositions = true;
+ ft.StoreTermVectorOffsets = true;
+
+ Document doc = new Document();
+ doc.Add(new Field("body", new CannedTokenStream(tokens), ft));
+ riw.AddDocument(doc);
+ success = true;
+ }
+ finally
+ {
+ if (success)
+ {
+ IOUtils.Close(riw, dir);
+ }
+ else
+ {
+ IOUtils.CloseWhileHandlingException(riw, dir);
+ }
+ }
+ }
+
+ private Token MakeToken(string text, int posIncr, int startOffset, int endOffset)
+ {
+ Token t = new Token();
+ t.Append(text);
+ t.PositionIncrement = posIncr;
+ t.SetOffset(startOffset, endOffset);
+ return t;
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestPrefixCodedTerms.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/Index/TestPrefixCodedTerms.cs b/src/Lucene.Net.Tests/Index/TestPrefixCodedTerms.cs
new file mode 100644
index 0000000..031d5c0
--- /dev/null
+++ b/src/Lucene.Net.Tests/Index/TestPrefixCodedTerms.cs
@@ -0,0 +1,142 @@
+using Lucene.Net.Support;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Index
+{
+ using Lucene.Net.Util;
+ using NUnit.Framework;
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+
+ //using MergedIterator = Lucene.Net.Util.MergedIterator;
+ using TestUtil = Lucene.Net.Util.TestUtil;
+
+ [TestFixture]
+ public class TestPrefixCodedTerms : LuceneTestCase
+ {
+ [Test]
+ public virtual void TestEmpty()
+ {
+ PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder();
+ PrefixCodedTerms pb = b.Finish();
+ Assert.IsFalse(pb.GetEnumerator().MoveNext());
+ }
+
+ [Test]
+ public virtual void TestOne()
+ {
+ Term term = new Term("foo", "bogus");
+ PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder();
+ b.Add(term);
+ PrefixCodedTerms pb = b.Finish();
+ IEnumerator<Term> iterator = pb.GetEnumerator();
+ Assert.IsTrue(iterator.MoveNext());
+ Assert.AreEqual(term, iterator.Current);
+ }
+
+ [Test]
+ public virtual void TestRandom()
+ {
+ SortedSet<Term> terms = new SortedSet<Term>();
+ int nterms = AtLeast(10000);
+ for (int i = 0; i < nterms; i++)
+ {
+ Term term = new Term(TestUtil.RandomUnicodeString(Random(), 2), TestUtil.RandomUnicodeString(Random()));
+ terms.Add(term);
+ }
+
+ PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder();
+ foreach (Term @ref in terms)
+ {
+ b.Add(@ref);
+ }
+ PrefixCodedTerms pb = b.Finish();
+
+ IEnumerator<Term> expected = terms.GetEnumerator();
+ foreach (Term t in pb)
+ {
+ Assert.IsTrue(expected.MoveNext());
+ Assert.AreEqual(expected.Current, t);
+ }
+ Assert.IsFalse(expected.MoveNext());
+ }
+
+ [Test]
+ public virtual void TestMergeOne()
+ {
+ Term t1 = new Term("foo", "a");
+ PrefixCodedTerms.Builder b1 = new PrefixCodedTerms.Builder();
+ b1.Add(t1);
+ PrefixCodedTerms pb1 = b1.Finish();
+
+ Term t2 = new Term("foo", "b");
+ PrefixCodedTerms.Builder b2 = new PrefixCodedTerms.Builder();
+ b2.Add(t2);
+ PrefixCodedTerms pb2 = b2.Finish();
+
+ IEnumerator<Term> merged = new MergedIterator<Term>(pb1.GetEnumerator(), pb2.GetEnumerator());
+ Assert.IsTrue(merged.MoveNext());
+ Assert.AreEqual(t1, merged.Current);
+ Assert.IsTrue(merged.MoveNext());
+ Assert.AreEqual(t2, merged.Current);
+ }
+
+ [Test]
+ public virtual void TestMergeRandom()
+ {
+ PrefixCodedTerms[] pb = new PrefixCodedTerms[TestUtil.NextInt(Random(), 2, 10)];
+ SortedSet<Term> superSet = new SortedSet<Term>();
+
+ for (int i = 0; i < pb.Length; i++)
+ {
+ SortedSet<Term> terms = new SortedSet<Term>();
+ int nterms = TestUtil.NextInt(Random(), 0, 10000);
+ for (int j = 0; j < nterms; j++)
+ {
+ Term term = new Term(TestUtil.RandomUnicodeString(Random(), 2), TestUtil.RandomUnicodeString(Random(), 4));
+ terms.Add(term);
+ }
+ superSet.AddAll(terms);
+
+ PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder();
+ foreach (Term @ref in terms)
+ {
+ b.Add(@ref);
+ }
+ pb[i] = b.Finish();
+ }
+
+ List<IEnumerator<Term>> subs = new List<IEnumerator<Term>>();
+ for (int i = 0; i < pb.Length; i++)
+ {
+ subs.Add(pb[i].GetEnumerator());
+ }
+
+ IEnumerator<Term> expected = superSet.GetEnumerator();
+ IEnumerator<Term> actual = new MergedIterator<Term>(subs.ToArray());
+ while (actual.MoveNext())
+ {
+ Assert.IsTrue(expected.MoveNext());
+ Assert.AreEqual(expected.Current, actual.Current);
+ }
+ Assert.IsFalse(expected.MoveNext());
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestReaderClosed.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/Index/TestReaderClosed.cs b/src/Lucene.Net.Tests/Index/TestReaderClosed.cs
new file mode 100644
index 0000000..99df942
--- /dev/null
+++ b/src/Lucene.Net.Tests/Index/TestReaderClosed.cs
@@ -0,0 +1,118 @@
+using Lucene.Net.Documents;
+
+namespace Lucene.Net.Index
+{
+ using NUnit.Framework;
+ using AlreadyClosedException = Lucene.Net.Store.AlreadyClosedException;
+ using Directory = Lucene.Net.Store.Directory;
+ using Document = Documents.Document;
+ using Field = Field;
+ using IndexSearcher = Lucene.Net.Search.IndexSearcher;
+ using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer;
+ using MockTokenizer = Lucene.Net.Analysis.MockTokenizer;
+ using TermRangeQuery = Lucene.Net.Search.TermRangeQuery;
+ using TestUtil = Lucene.Net.Util.TestUtil;
+
+ [TestFixture]
+ public class TestReaderClosed : LuceneTestCase
+ {
+ private IndexReader Reader;
+ private Directory Dir;
+
+ [SetUp]
+ public override void SetUp()
+ {
+ base.SetUp();
+ Dir = NewDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000)));
+
+ Document doc = new Document();
+ Field field = NewStringField("field", "", Field.Store.NO);
+ doc.Add(field);
+
+ // we generate aweful prefixes: good for testing.
+ // but for preflex codec, the test can be very slow, so use less iterations.
+ int num = AtLeast(10);
+ for (int i = 0; i < num; i++)
+ {
+ field.SetStringValue(TestUtil.RandomUnicodeString(Random(), 10));
+ writer.AddDocument(doc);
+ }
+ Reader = writer.Reader;
+ writer.Dispose();
+ }
+
+ [Test]
+ public virtual void Test()
+ {
+ Assert.IsTrue(Reader.RefCount > 0);
+ IndexSearcher searcher = NewSearcher(Reader);
+ TermRangeQuery query = TermRangeQuery.NewStringRange("field", "a", "z", true, true);
+ searcher.Search(query, 5);
+ Reader.Dispose();
+ try
+ {
+ searcher.Search(query, 5);
+ }
+#pragma warning disable 168
+ catch (AlreadyClosedException ace)
+#pragma warning restore 168
+ {
+ // expected
+ }
+ }
+
+ // LUCENE-3800
+ [Test]
+ public virtual void TestReaderChaining()
+ {
+ Assert.IsTrue(Reader.RefCount > 0);
+ IndexReader wrappedReader = SlowCompositeReaderWrapper.Wrap(Reader);
+ wrappedReader = new ParallelAtomicReader((AtomicReader)wrappedReader);
+
+ IndexSearcher searcher = NewSearcher(wrappedReader);
+ TermRangeQuery query = TermRangeQuery.NewStringRange("field", "a", "z", true, true);
+ searcher.Search(query, 5);
+ Reader.Dispose(); // close original child reader
+ try
+ {
+ searcher.Search(query, 5);
+ }
+ catch (AlreadyClosedException ace)
+ {
+ Assert.AreEqual("this IndexReader cannot be used anymore as one of its child readers was closed", ace.Message);
+ }
+ finally
+ {
+ // shutdown executor: in case of wrap-wrap-wrapping
+ searcher.IndexReader.Dispose();
+ }
+ }
+
+ [TearDown]
+ public override void TearDown()
+ {
+ Dir.Dispose();
+ base.TearDown();
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestRollback.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/Index/TestRollback.cs b/src/Lucene.Net.Tests/Index/TestRollback.cs
new file mode 100644
index 0000000..f613e47
--- /dev/null
+++ b/src/Lucene.Net.Tests/Index/TestRollback.cs
@@ -0,0 +1,67 @@
+using System;
+using Lucene.Net.Documents;
+
+namespace Lucene.Net.Index
+{
+ using NUnit.Framework;
+ using Directory = Lucene.Net.Store.Directory;
+ using Document = Documents.Document;
+ using Field = Field;
+ using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer;
+
+ [TestFixture]
+ public class TestRollback : LuceneTestCase
+ {
+ // LUCENE-2536
+ [Test]
+ public virtual void TestRollbackIntegrityWithBufferFlush()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter rw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
+ for (int i = 0; i < 5; i++)
+ {
+ Document doc = new Document();
+ doc.Add(NewStringField("pk", Convert.ToString(i), Field.Store.YES));
+ rw.AddDocument(doc);
+ }
+ rw.Dispose();
+
+ // If buffer size is small enough to cause a flush, errors ensue...
+ IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetOpenMode(OpenMode.APPEND));
+
+ for (int i = 0; i < 3; i++)
+ {
+ Document doc = new Document();
+ string value = Convert.ToString(i);
+ doc.Add(NewStringField("pk", value, Field.Store.YES));
+ doc.Add(NewStringField("text", "foo", Field.Store.YES));
+ w.UpdateDocument(new Term("pk", value), doc);
+ }
+ w.Rollback();
+
+ IndexReader r = DirectoryReader.Open(dir);
+ Assert.AreEqual(5, r.NumDocs, "index should contain same number of docs post rollback");
+ r.Dispose();
+ dir.Dispose();
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestRollingUpdates.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/Index/TestRollingUpdates.cs b/src/Lucene.Net.Tests/Index/TestRollingUpdates.cs
new file mode 100644
index 0000000..8989662
--- /dev/null
+++ b/src/Lucene.Net.Tests/Index/TestRollingUpdates.cs
@@ -0,0 +1,285 @@
+using System;
+using Lucene.Net.Documents;
+
+namespace Lucene.Net.Index
+{
+ using Codecs.Memory;
+ //using MemoryPostingsFormat = Lucene.Net.Codecs.memory.MemoryPostingsFormat;
+
+ using Lucene.Net.Randomized.Generators;
+ using Lucene.Net.Store;
+ using Lucene.Net.Support;
+ using Lucene.Net.Util;
+ using NUnit.Framework;
+ using Codec = Lucene.Net.Codecs.Codec;
+ using IndexSearcher = Lucene.Net.Search.IndexSearcher;
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer;
+ using TermQuery = Lucene.Net.Search.TermQuery;
+ using TopDocs = Lucene.Net.Search.TopDocs;
+
+ [TestFixture]
+ public class TestRollingUpdates : LuceneTestCase
+ {
+ // Just updates the same set of N docs over and over, to
+ // stress out deletions
+
+ [Test]
+ public virtual void TestRollingUpdates_Mem()
+ {
+ Random random = new Random(Random().Next());
+ BaseDirectoryWrapper dir = NewDirectory();
+ LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues());
+
+ //provider.register(new MemoryCodec());
+ if ((!"Lucene3x".Equals(Codec.Default.Name)) && Random().NextBoolean())
+ {
+ Codec.Default =
+ TestUtil.AlwaysPostingsFormat(new MemoryPostingsFormat(Random().nextBoolean(), random.NextFloat()));
+ }
+
+ MockAnalyzer analyzer = new MockAnalyzer(Random());
+ analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);
+
+ IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
+ int SIZE = AtLeast(20);
+ int id = 0;
+ IndexReader r = null;
+ IndexSearcher s = null;
+ int numUpdates = (int)(SIZE * (2 + (TEST_NIGHTLY ? 200 * Random().NextDouble() : 5 * Random().NextDouble())));
+ if (VERBOSE)
+ {
+ Console.WriteLine("TEST: numUpdates=" + numUpdates);
+ }
+ int updateCount = 0;
+ // TODO: sometimes update ids not in order...
+ for (int docIter = 0; docIter < numUpdates; docIter++)
+ {
+ Documents.Document doc = docs.NextDoc();
+ string myID = "" + id;
+ if (id == SIZE - 1)
+ {
+ id = 0;
+ }
+ else
+ {
+ id++;
+ }
+ if (VERBOSE)
+ {
+ Console.WriteLine(" docIter=" + docIter + " id=" + id);
+ }
+ ((Field)doc.GetField("docid")).SetStringValue(myID);
+
+ Term idTerm = new Term("docid", myID);
+
+ bool doUpdate;
+ if (s != null && updateCount < SIZE)
+ {
+ TopDocs hits = s.Search(new TermQuery(idTerm), 1);
+ Assert.AreEqual(1, hits.TotalHits);
+ doUpdate = !w.TryDeleteDocument(r, hits.ScoreDocs[0].Doc);
+ if (VERBOSE)
+ {
+ if (doUpdate)
+ {
+ Console.WriteLine(" tryDeleteDocument failed");
+ }
+ else
+ {
+ Console.WriteLine(" tryDeleteDocument succeeded");
+ }
+ }
+ }
+ else
+ {
+ doUpdate = true;
+ if (VERBOSE)
+ {
+ Console.WriteLine(" no searcher: doUpdate=true");
+ }
+ }
+
+ updateCount++;
+
+ if (doUpdate)
+ {
+ w.UpdateDocument(idTerm, doc);
+ }
+ else
+ {
+ w.AddDocument(doc);
+ }
+
+ if (docIter >= SIZE && Random().Next(50) == 17)
+ {
+ if (r != null)
+ {
+ r.Dispose();
+ }
+
+ bool applyDeletions = Random().NextBoolean();
+
+ if (VERBOSE)
+ {
+ Console.WriteLine("TEST: reopen applyDeletions=" + applyDeletions);
+ }
+
+ r = w.GetReader(applyDeletions);
+ if (applyDeletions)
+ {
+ s = NewSearcher(r);
+ }
+ else
+ {
+ s = null;
+ }
+ Assert.IsTrue(!applyDeletions || r.NumDocs == SIZE, "applyDeletions=" + applyDeletions + " r.NumDocs=" + r.NumDocs + " vs SIZE=" + SIZE);
+ updateCount = 0;
+ }
+ }
+
+ if (r != null)
+ {
+ r.Dispose();
+ }
+
+ w.Commit();
+ Assert.AreEqual(SIZE, w.NumDocs);
+
+ w.Dispose();
+
+ TestIndexWriter.AssertNoUnreferencedFiles(dir, "leftover files after rolling updates");
+
+ docs.Dispose();
+
+ // LUCENE-4455:
+ SegmentInfos infos = new SegmentInfos();
+ infos.Read(dir);
+ long totalBytes = 0;
+ foreach (SegmentCommitInfo sipc in infos.Segments)
+ {
+ totalBytes += sipc.SizeInBytes();
+ }
+ long totalBytes2 = 0;
+ foreach (string fileName in dir.ListAll())
+ {
+ if (!fileName.StartsWith(IndexFileNames.SEGMENTS))
+ {
+ totalBytes2 += dir.FileLength(fileName);
+ }
+ }
+ Assert.AreEqual(totalBytes2, totalBytes);
+ dir.Dispose();
+ }
+
+ [Test]
+ public virtual void TestUpdateSameDoc()
+ {
+ Directory dir = NewDirectory();
+
+ LineFileDocs docs = new LineFileDocs(Random());
+ for (int r = 0; r < 3; r++)
+ {
+ IndexWriter w = new IndexWriter(dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2));
+ int numUpdates = AtLeast(20);
+ int numThreads = TestUtil.NextInt(Random(), 2, 6);
+ IndexingThread[] threads = new IndexingThread[numThreads];
+ for (int i = 0; i < numThreads; i++)
+ {
+ threads[i] = new IndexingThread(docs, w, numUpdates, NewStringField);
+ threads[i].Start();
+ }
+
+ for (int i = 0; i < numThreads; i++)
+ {
+ threads[i].Join();
+ }
+
+ w.Dispose();
+ }
+
+ IndexReader open = DirectoryReader.Open(dir);
+ Assert.AreEqual(1, open.NumDocs);
+ open.Dispose();
+ docs.Dispose();
+ dir.Dispose();
+ }
+
+ internal class IndexingThread : ThreadClass
+ {
+ internal readonly LineFileDocs Docs;
+ internal readonly IndexWriter Writer;
+ internal readonly int Num;
+
+ private readonly Func<string, string, Field.Store, Field> NewStringField;
+
+ /// <param name="newStringField">
+ /// LUCENENET specific
+ /// Passed in because <see cref="LuceneTestCase.NewStringField(string, string, Field.Store)"/>
+ /// is no longer static.
+ /// </param>
+ public IndexingThread(LineFileDocs docs, IndexWriter writer, int num, Func<string, string, Field.Store, Field> newStringField)
+ : base()
+ {
+ this.Docs = docs;
+ this.Writer = writer;
+ this.Num = num;
+ NewStringField = newStringField;
+ }
+
+ public override void Run()
+ {
+ try
+ {
+ DirectoryReader open = null;
+ for (int i = 0; i < Num; i++)
+ {
+ Documents.Document doc = new Documents.Document(); // docs.NextDoc();
+ doc.Add(NewStringField("id", "test", Field.Store.NO));
+ Writer.UpdateDocument(new Term("id", "test"), doc);
+ if (Random().Next(3) == 0)
+ {
+ if (open == null)
+ {
+ open = DirectoryReader.Open(Writer, true);
+ }
+ DirectoryReader reader = DirectoryReader.OpenIfChanged(open);
+ if (reader != null)
+ {
+ open.Dispose();
+ open = reader;
+ }
+ Assert.AreEqual(1, open.NumDocs, "iter: " + i + " numDocs: " + open.NumDocs + " del: " + open.NumDeletedDocs + " max: " + open.MaxDoc);
+ }
+ }
+ if (open != null)
+ {
+ open.Dispose();
+ }
+ }
+ catch (Exception e)
+ {
+ throw new Exception(e.Message, e);
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestSameTokenSamePosition.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/Index/TestSameTokenSamePosition.cs b/src/Lucene.Net.Tests/Index/TestSameTokenSamePosition.cs
new file mode 100644
index 0000000..ca9637a
--- /dev/null
+++ b/src/Lucene.Net.Tests/Index/TestSameTokenSamePosition.cs
@@ -0,0 +1,110 @@
+using Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Documents;
+using NUnit.Framework;
+
+namespace Lucene.Net.Index
+{
+ using Directory = Lucene.Net.Store.Directory;
+ using Document = Documents.Document;
+ using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+ using TextField = TextField;
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using TokenStream = Lucene.Net.Analysis.TokenStream;
+
+ [TestFixture]
+ public class TestSameTokenSamePosition : LuceneTestCase
+ {
+ /// <summary>
+ /// Attempt to reproduce an assertion error that happens
+ /// only with the trunk version around April 2011.
+ /// </summary>
+ [Test]
+ public virtual void Test()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
+ Document doc = new Document();
+ doc.Add(new TextField("eng", new BugReproTokenStream()));
+ riw.AddDocument(doc);
+ riw.Dispose();
+ dir.Dispose();
+ }
+
+ /// <summary>
+ /// Same as the above, but with more docs
+ /// </summary>
+ [Test]
+ public virtual void TestMoreDocs()
+ {
+ Directory dir = NewDirectory();
+ RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
+ for (int i = 0; i < 100; i++)
+ {
+ Document doc = new Document();
+ doc.Add(new TextField("eng", new BugReproTokenStream()));
+ riw.AddDocument(doc);
+ }
+ riw.Dispose();
+ dir.Dispose();
+ }
+ }
+
+ internal sealed class BugReproTokenStream : TokenStream
+ {
+ private readonly ICharTermAttribute TermAtt;
+ private readonly IOffsetAttribute OffsetAtt;
+ private readonly IPositionIncrementAttribute PosIncAtt;
+ private readonly int TokenCount = 4;
+ private int NextTokenIndex = 0;
+ private readonly string[] Terms = new string[] { "six", "six", "drunken", "drunken" };
+ private readonly int[] Starts = new int[] { 0, 0, 4, 4 };
+ private readonly int[] Ends = new int[] { 3, 3, 11, 11 };
+ private readonly int[] Incs = new int[] { 1, 0, 1, 0 };
+
+ public BugReproTokenStream()
+ {
+ TermAtt = AddAttribute<ICharTermAttribute>();
+ OffsetAtt = AddAttribute<IOffsetAttribute>();
+ PosIncAtt = AddAttribute<IPositionIncrementAttribute>();
+ }
+
+ public override bool IncrementToken()
+ {
+ if (NextTokenIndex < TokenCount)
+ {
+ TermAtt.SetEmpty().Append(Terms[NextTokenIndex]);
+ OffsetAtt.SetOffset(Starts[NextTokenIndex], Ends[NextTokenIndex]);
+ PosIncAtt.PositionIncrement = Incs[NextTokenIndex];
+ NextTokenIndex++;
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+ public override void Reset()
+ {
+ base.Reset();
+ this.NextTokenIndex = 0;
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestSegmentMerger.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/Index/TestSegmentMerger.cs b/src/Lucene.Net.Tests/Index/TestSegmentMerger.cs
new file mode 100644
index 0000000..30786b5
--- /dev/null
+++ b/src/Lucene.Net.Tests/Index/TestSegmentMerger.cs
@@ -0,0 +1,207 @@
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Index
+{
+ using NUnit.Framework;
+ using BytesRef = Lucene.Net.Util.BytesRef;
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using Codec = Lucene.Net.Codecs.Codec;
+ using Constants = Lucene.Net.Util.Constants;
+ using Directory = Lucene.Net.Store.Directory;
+ using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+ using Document = Documents.Document;
+ using FixedBitSet = Lucene.Net.Util.FixedBitSet;
+ using InfoStream = Lucene.Net.Util.InfoStream;
+ using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+ using TestUtil = Lucene.Net.Util.TestUtil;
+
+ [TestFixture]
+ public class TestSegmentMerger : LuceneTestCase
+ {
+ //The variables for the new merged segment
+ private Directory MergedDir;
+
+ private string MergedSegment = "test";
+
+ //First segment to be merged
+ private Directory Merge1Dir;
+
+ private Document Doc1;
+ private SegmentReader Reader1;
+
+ //Second Segment to be merged
+ private Directory Merge2Dir;
+
+ private Document Doc2;
+ private SegmentReader Reader2;
+
+ [SetUp]
+ public override void SetUp()
+ {
+ base.SetUp();
+ this.Doc1 = new Document();
+ this.Doc2 = new Document();
+ MergedDir = NewDirectory();
+ Merge1Dir = NewDirectory();
+ Merge2Dir = NewDirectory();
+ DocHelper.SetupDoc(Doc1);
+ SegmentCommitInfo info1 = DocHelper.WriteDoc(Random(), Merge1Dir, Doc1);
+ DocHelper.SetupDoc(Doc2);
+ SegmentCommitInfo info2 = DocHelper.WriteDoc(Random(), Merge2Dir, Doc2);
+ Reader1 = new SegmentReader(info1, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random()));
+ Reader2 = new SegmentReader(info2, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random()));
+ }
+
+ [TearDown]
+ public override void TearDown()
+ {
+ Reader1.Dispose();
+ Reader2.Dispose();
+ MergedDir.Dispose();
+ Merge1Dir.Dispose();
+ Merge2Dir.Dispose();
+ base.TearDown();
+ }
+
+ [Test]
+ public virtual void Test()
+ {
+ Assert.IsTrue(MergedDir != null);
+ Assert.IsTrue(Merge1Dir != null);
+ Assert.IsTrue(Merge2Dir != null);
+ Assert.IsTrue(Reader1 != null);
+ Assert.IsTrue(Reader2 != null);
+ }
+
+ [Test]
+ public virtual void TestMerge()
+ {
+ Codec codec = Codec.Default;
+ SegmentInfo si = new SegmentInfo(MergedDir, Constants.LUCENE_MAIN_VERSION, MergedSegment, -1, false, codec, null);
+
+ SegmentMerger merger = new SegmentMerger(Arrays.AsList<AtomicReader>(Reader1, Reader2), si, InfoStream.Default, MergedDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, CheckAbort.NONE, new FieldInfos.FieldNumbers(), NewIOContext(Random()), true);
+ MergeState mergeState = merger.Merge();
+ int docsMerged = mergeState.SegmentInfo.DocCount;
+ Assert.IsTrue(docsMerged == 2);
+ //Should be able to open a new SegmentReader against the new directory
+ SegmentReader mergedReader = new SegmentReader(new SegmentCommitInfo(new SegmentInfo(MergedDir, Constants.LUCENE_MAIN_VERSION, MergedSegment, docsMerged, false, codec, null), 0, -1L, -1L), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random()));
+ Assert.IsTrue(mergedReader != null);
+ Assert.IsTrue(mergedReader.NumDocs == 2);
+ Document newDoc1 = mergedReader.Document(0);
+ Assert.IsTrue(newDoc1 != null);
+ //There are 2 unstored fields on the document
+ Assert.IsTrue(DocHelper.NumFields(newDoc1) == DocHelper.NumFields(Doc1) - DocHelper.Unstored.Count);
+ Document newDoc2 = mergedReader.Document(1);
+ Assert.IsTrue(newDoc2 != null);
+ Assert.IsTrue(DocHelper.NumFields(newDoc2) == DocHelper.NumFields(Doc2) - DocHelper.Unstored.Count);
+
+ DocsEnum termDocs = TestUtil.Docs(Random(), mergedReader, DocHelper.TEXT_FIELD_2_KEY, new BytesRef("field"), MultiFields.GetLiveDocs(mergedReader), null, 0);
+ Assert.IsTrue(termDocs != null);
+ Assert.IsTrue(termDocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+
+ int tvCount = 0;
+ foreach (FieldInfo fieldInfo in mergedReader.FieldInfos)
+ {
+ if (fieldInfo.HasVectors)
+ {
+ tvCount++;
+ }
+ }
+
+ //System.out.println("stored size: " + stored.Size());
+ Assert.AreEqual(3, tvCount, "We do not have 3 fields that were indexed with term vector");
+
+ Terms vector = mergedReader.GetTermVectors(0).GetTerms(DocHelper.TEXT_FIELD_2_KEY);
+ Assert.IsNotNull(vector);
+ Assert.AreEqual(3, vector.Count);
+ TermsEnum termsEnum = vector.GetIterator(null);
+
+ int i = 0;
+ while (termsEnum.Next() != null)
+ {
+ string term = termsEnum.Term.Utf8ToString();
+ int freq = (int)termsEnum.TotalTermFreq;
+ //System.out.println("Term: " + term + " Freq: " + freq);
+ Assert.IsTrue(DocHelper.FIELD_2_TEXT.IndexOf(term) != -1);
+ Assert.IsTrue(DocHelper.FIELD_2_FREQS[i] == freq);
+ i++;
+ }
+
+ TestSegmentReader.CheckNorms(mergedReader);
+ mergedReader.Dispose();
+ }
+
+ private static bool Equals(MergeState.DocMap map1, MergeState.DocMap map2)
+ {
+ if (map1.MaxDoc != map2.MaxDoc)
+ {
+ return false;
+ }
+ for (int i = 0; i < map1.MaxDoc; ++i)
+ {
+ if (map1.Get(i) != map2.Get(i))
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ [Test]
+ public virtual void TestBuildDocMap()
+ {
+ int maxDoc = TestUtil.NextInt(Random(), 1, 128);
+ int numDocs = TestUtil.NextInt(Random(), 0, maxDoc);
+ int numDeletedDocs = maxDoc - numDocs;
+ FixedBitSet liveDocs = new FixedBitSet(maxDoc);
+ for (int i = 0; i < numDocs; ++i)
+ {
+ while (true)
+ {
+ int docID = Random().Next(maxDoc);
+ if (!liveDocs.Get(docID))
+ {
+ liveDocs.Set(docID);
+ break;
+ }
+ }
+ }
+
+ MergeState.DocMap docMap = MergeState.DocMap.Build(maxDoc, liveDocs);
+
+ Assert.AreEqual(maxDoc, docMap.MaxDoc);
+ Assert.AreEqual(numDocs, docMap.NumDocs);
+ Assert.AreEqual(numDeletedDocs, docMap.NumDeletedDocs);
+ // assert the mapping is compact
+ for (int i = 0, del = 0; i < maxDoc; ++i)
+ {
+ if (!liveDocs.Get(i))
+ {
+ Assert.AreEqual(-1, docMap.Get(i));
+ ++del;
+ }
+ else
+ {
+ Assert.AreEqual(i - del, docMap.Get(i));
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestSegmentReader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/Index/TestSegmentReader.cs b/src/Lucene.Net.Tests/Index/TestSegmentReader.cs
new file mode 100644
index 0000000..b98287d
--- /dev/null
+++ b/src/Lucene.Net.Tests/Index/TestSegmentReader.cs
@@ -0,0 +1,277 @@
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Index
+{
+ using NUnit.Framework;
+ using BytesRef = Lucene.Net.Util.BytesRef;
+ using Directory = Lucene.Net.Store.Directory;
+ using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using Document = Documents.Document;
+ using IOContext = Lucene.Net.Store.IOContext;
+ using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+ using TestUtil = Lucene.Net.Util.TestUtil;
+
+ [TestFixture]
+ public class TestSegmentReader : LuceneTestCase
+ {
+ private Directory Dir;
+ private Document TestDoc;
+ private SegmentReader Reader;
+
+ //TODO: Setup the reader w/ multiple documents
+ [SetUp]
+ public override void SetUp()
+ {
+ base.SetUp();
+ Dir = NewDirectory();
+ TestDoc = new Document();
+ DocHelper.SetupDoc(TestDoc);
+ SegmentCommitInfo info = DocHelper.WriteDoc(Random(), Dir, TestDoc);
+ Reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, IOContext.READ);
+ }
+
+ [TearDown]
+ public override void TearDown()
+ {
+ Reader.Dispose();
+ Dir.Dispose();
+ base.TearDown();
+ }
+
+ [Test]
+ public virtual void Test()
+ {
+ Assert.IsTrue(Dir != null);
+ Assert.IsTrue(Reader != null);
+ Assert.IsTrue(DocHelper.NameValues.Count > 0);
+ Assert.IsTrue(DocHelper.NumFields(TestDoc) == DocHelper.All.Count);
+ }
+
+ [Test]
+ public virtual void TestDocument()
+ {
+ Assert.IsTrue(Reader.NumDocs == 1);
+ Assert.IsTrue(Reader.MaxDoc >= 1);
+ Document result = Reader.Document(0);
+ Assert.IsTrue(result != null);
+ //There are 2 unstored fields on the document that are not preserved across writing
+ Assert.IsTrue(DocHelper.NumFields(result) == DocHelper.NumFields(TestDoc) - DocHelper.Unstored.Count);
+
+ IList<IIndexableField> fields = result.Fields;
+ foreach (IIndexableField field in fields)
+ {
+ Assert.IsTrue(field != null);
+ Assert.IsTrue(DocHelper.NameValues.ContainsKey(field.Name));
+ }
+ }
+
+ [Test]
+ public virtual void TestGetFieldNameVariations()
+ {
+ ICollection<string> allFieldNames = new HashSet<string>();
+ ICollection<string> indexedFieldNames = new HashSet<string>();
+ ICollection<string> notIndexedFieldNames = new HashSet<string>();
+ ICollection<string> tvFieldNames = new HashSet<string>();
+ ICollection<string> noTVFieldNames = new HashSet<string>();
+
+ foreach (FieldInfo fieldInfo in Reader.FieldInfos)
+ {
+ string name = fieldInfo.Name;
+ allFieldNames.Add(name);
+ if (fieldInfo.IsIndexed)
+ {
+ indexedFieldNames.Add(name);
+ }
+ else
+ {
+ notIndexedFieldNames.Add(name);
+ }
+ if (fieldInfo.HasVectors)
+ {
+ tvFieldNames.Add(name);
+ }
+ else if (fieldInfo.IsIndexed)
+ {
+ noTVFieldNames.Add(name);
+ }
+ }
+
+ Assert.IsTrue(allFieldNames.Count == DocHelper.All.Count);
+ foreach (string s in allFieldNames)
+ {
+ Assert.IsTrue(DocHelper.NameValues.ContainsKey(s) == true || s.Equals(""));
+ }
+
+ Assert.IsTrue(indexedFieldNames.Count == DocHelper.Indexed.Count);
+ foreach (string s in indexedFieldNames)
+ {
+ Assert.IsTrue(DocHelper.Indexed.ContainsKey(s) == true || s.Equals(""));
+ }
+
+ Assert.IsTrue(notIndexedFieldNames.Count == DocHelper.Unindexed.Count);
+ //Get all indexed fields that are storing term vectors
+ Assert.IsTrue(tvFieldNames.Count == DocHelper.Termvector.Count);
+
+ Assert.IsTrue(noTVFieldNames.Count == DocHelper.Notermvector.Count);
+ }
+
+ [Test]
+ public virtual void TestTerms()
+ {
+ Fields fields = MultiFields.GetFields(Reader);
+ foreach (string field in fields)
+ {
+ Terms terms = fields.GetTerms(field);
+ Assert.IsNotNull(terms);
+ TermsEnum termsEnum = terms.GetIterator(null);
+ while (termsEnum.Next() != null)
+ {
+ BytesRef term = termsEnum.Term;
+ Assert.IsTrue(term != null);
+ string fieldValue = (string)DocHelper.NameValues[field];
+ Assert.IsTrue(fieldValue.IndexOf(term.Utf8ToString()) != -1);
+ }
+ }
+
+ DocsEnum termDocs = TestUtil.Docs(Random(), Reader, DocHelper.TEXT_FIELD_1_KEY, new BytesRef("field"), MultiFields.GetLiveDocs(Reader), null, 0);
+ Assert.IsTrue(termDocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+
+ termDocs = TestUtil.Docs(Random(), Reader, DocHelper.NO_NORMS_KEY, new BytesRef(DocHelper.NO_NORMS_TEXT), MultiFields.GetLiveDocs(Reader), null, 0);
+
+ Assert.IsTrue(termDocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+
+ DocsAndPositionsEnum positions = MultiFields.GetTermPositionsEnum(Reader, MultiFields.GetLiveDocs(Reader), DocHelper.TEXT_FIELD_1_KEY, new BytesRef("field"));
+ // NOTE: prior rev of this test was failing to first
+ // call next here:
+ Assert.IsTrue(positions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+ Assert.IsTrue(positions.DocID == 0);
+ Assert.IsTrue(positions.NextPosition() >= 0);
+ }
+
+ [Test]
+ public virtual void TestNorms()
+ {
+ //TODO: Not sure how these work/should be tested
+ /*
+ try {
+ byte [] norms = reader.norms(DocHelper.TEXT_FIELD_1_KEY);
+ System.out.println("Norms: " + norms);
+ Assert.IsTrue(norms != null);
+ } catch (IOException e) {
+ e.printStackTrace();
+ Assert.IsTrue(false);
+ }
+ */
+
+ CheckNorms(Reader);
+ }
+
+ public static void CheckNorms(AtomicReader reader)
+ {
+ // test omit norms
+ for (int i = 0; i < DocHelper.Fields.Length; i++)
+ {
+ IIndexableField f = DocHelper.Fields[i];
+ if (f.FieldType.IsIndexed)
+ {
+ Assert.AreEqual(reader.GetNormValues(f.Name) != null, !f.FieldType.OmitNorms);
+ Assert.AreEqual(reader.GetNormValues(f.Name) != null, !DocHelper.NoNorms.ContainsKey(f.Name));
+ if (reader.GetNormValues(f.Name) == null)
+ {
+ // test for norms of null
+ NumericDocValues norms = MultiDocValues.GetNormValues(reader, f.Name);
+ Assert.IsNull(norms);
+ }
+ }
+ }
+ }
+
+ [Test]
+ public virtual void TestTermVectors()
+ {
+ Terms result = Reader.GetTermVectors(0).GetTerms(DocHelper.TEXT_FIELD_2_KEY);
+ Assert.IsNotNull(result);
+ Assert.AreEqual(3, result.Count);
+ TermsEnum termsEnum = result.GetIterator(null);
+ while (termsEnum.Next() != null)
+ {
+ string term = termsEnum.Term.Utf8ToString();
+ int freq = (int)termsEnum.TotalTermFreq;
+ Assert.IsTrue(DocHelper.FIELD_2_TEXT.IndexOf(term) != -1);
+ Assert.IsTrue(freq > 0);
+ }
+
+ Fields results = Reader.GetTermVectors(0);
+ Assert.IsTrue(results != null);
+ Assert.AreEqual(3, results.Count, "We do not have 3 term freq vectors");
+ }
+
+ [Test]
+ public virtual void TestOutOfBoundsAccess()
+ {
+ int numDocs = Reader.MaxDoc;
+ try
+ {
+ Reader.Document(-1);
+ Assert.Fail();
+ }
+#pragma warning disable 168
+ catch (System.IndexOutOfRangeException expected)
+#pragma warning restore 168
+ {
+ }
+
+ try
+ {
+ Reader.GetTermVectors(-1);
+ Assert.Fail();
+ }
+#pragma warning disable 168
+ catch (System.IndexOutOfRangeException expected)
+#pragma warning restore 168
+ {
+ }
+
+ try
+ {
+ Reader.Document(numDocs);
+ Assert.Fail();
+ }
+#pragma warning disable 168
+ catch (System.IndexOutOfRangeException expected)
+#pragma warning restore 168
+ {
+ }
+
+ try
+ {
+ Reader.GetTermVectors(numDocs);
+ Assert.Fail();
+ }
+#pragma warning disable 168
+ catch (System.IndexOutOfRangeException expected)
+#pragma warning restore 168
+ {
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestSegmentTermDocs.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/Index/TestSegmentTermDocs.cs b/src/Lucene.Net.Tests/Index/TestSegmentTermDocs.cs
new file mode 100644
index 0000000..f876774
--- /dev/null
+++ b/src/Lucene.Net.Tests/Index/TestSegmentTermDocs.cs
@@ -0,0 +1,274 @@
+using Lucene.Net.Documents;
+
+namespace Lucene.Net.Index
+{
+ using NUnit.Framework;
+ using BytesRef = Lucene.Net.Util.BytesRef;
+ using Directory = Lucene.Net.Store.Directory;
+ using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+ using Document = Documents.Document;
+ using Field = Field;
+ using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer;
+ using TestUtil = Lucene.Net.Util.TestUtil;
+
+ [TestFixture]
+ public class TestSegmentTermDocs : LuceneTestCase
+ {
+ private Document TestDoc;
+ private Directory Dir;
+ private SegmentCommitInfo Info;
+
+ [SetUp]
+ public override void SetUp()
+ {
+ base.SetUp();
+ TestDoc = new Document();
+ Dir = NewDirectory();
+ DocHelper.SetupDoc(TestDoc);
+ Info = DocHelper.WriteDoc(Random(), Dir, TestDoc);
+ }
+
+ [TearDown]
+ public override void TearDown()
+ {
+ Dir.Dispose();
+ base.TearDown();
+ }
+
+ [Test]
+ public virtual void Test()
+ {
+ Assert.IsTrue(Dir != null);
+ }
+
+ [Test]
+ public virtual void TestTermDocs()
+ {
+ TestTermDocs(1);
+ }
+
+ public virtual void TestTermDocs(int indexDivisor)
+ {
+ //After adding the document, we should be able to read it back in
+ SegmentReader reader = new SegmentReader(Info, indexDivisor, NewIOContext(Random()));
+ Assert.IsTrue(reader != null);
+ Assert.AreEqual(indexDivisor, reader.TermInfosIndexDivisor);
+
+ TermsEnum terms = reader.Fields.GetTerms(DocHelper.TEXT_FIELD_2_KEY).GetIterator(null);
+ terms.SeekCeil(new BytesRef("field"));
+ DocsEnum termDocs = TestUtil.Docs(Random(), terms, reader.LiveDocs, null, DocsEnum.FLAG_FREQS);
+ if (termDocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
+ {
+ int docId = termDocs.DocID;
+ Assert.IsTrue(docId == 0);
+ int freq = termDocs.Freq;
+ Assert.IsTrue(freq == 3);
+ }
+ reader.Dispose();
+ }
+
+ [Test]
+ public virtual void TestBadSeek()
+ {
+ TestBadSeek(1);
+ }
+
+ public virtual void TestBadSeek(int indexDivisor)
+ {
+ {
+ //After adding the document, we should be able to read it back in
+ SegmentReader reader = new SegmentReader(Info, indexDivisor, NewIOContext(Random()));
+ Assert.IsTrue(reader != null);
+ DocsEnum termDocs = TestUtil.Docs(Random(), reader, "textField2", new BytesRef("bad"), reader.LiveDocs, null, 0);
+
+ Assert.IsNull(termDocs);
+ reader.Dispose();
+ }
+ {
+ //After adding the document, we should be able to read it back in
+ SegmentReader reader = new SegmentReader(Info, indexDivisor, NewIOContext(Random()));
+ Assert.IsTrue(reader != null);
+ DocsEnum termDocs = TestUtil.Docs(Random(), reader, "junk", new BytesRef("bad"), reader.LiveDocs, null, 0);
+ Assert.IsNull(termDocs);
+ reader.Dispose();
+ }
+ }
+
+ [Test]
+ public virtual void TestSkipTo()
+ {
+ TestSkipTo(1);
+ }
+
+ public virtual void TestSkipTo(int indexDivisor)
+ {
+ Directory dir = NewDirectory();
+ IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));
+
+ Term ta = new Term("content", "aaa");
+ for (int i = 0; i < 10; i++)
+ {
+ AddDoc(writer, "aaa aaa aaa aaa");
+ }
+
+ Term tb = new Term("content", "bbb");
+ for (int i = 0; i < 16; i++)
+ {
+ AddDoc(writer, "bbb bbb bbb bbb");
+ }
+
+ Term tc = new Term("content", "ccc");
+ for (int i = 0; i < 50; i++)
+ {
+ AddDoc(writer, "ccc ccc ccc ccc");
+ }
+
+ // assure that we deal with a single segment
+ writer.ForceMerge(1);
+ writer.Dispose();
+
+ IndexReader reader = DirectoryReader.Open(dir, indexDivisor);
+
+ DocsEnum tdocs = TestUtil.Docs(Random(), reader, ta.Field, new BytesRef(ta.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS);
+
+ // without optimization (assumption skipInterval == 16)
+
+ // with next
+ Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+ Assert.AreEqual(0, tdocs.DocID);
+ Assert.AreEqual(4, tdocs.Freq);
+ Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+ Assert.AreEqual(1, tdocs.DocID);
+ Assert.AreEqual(4, tdocs.Freq);
+ Assert.IsTrue(tdocs.Advance(2) != DocIdSetIterator.NO_MORE_DOCS);
+ Assert.AreEqual(2, tdocs.DocID);
+ Assert.IsTrue(tdocs.Advance(4) != DocIdSetIterator.NO_MORE_DOCS);
+ Assert.AreEqual(4, tdocs.DocID);
+ Assert.IsTrue(tdocs.Advance(9) != DocIdSetIterator.NO_MORE_DOCS);
+ Assert.AreEqual(9, tdocs.DocID);
+ Assert.IsFalse(tdocs.Advance(10) != DocIdSetIterator.NO_MORE_DOCS);
+
+ // without next
+ tdocs = TestUtil.Docs(Random(), reader, ta.Field, new BytesRef(ta.Text()), MultiFields.GetLiveDocs(reader), null, 0);
+
+ Assert.IsTrue(tdocs.Advance(0) != DocIdSetIterator.NO_MORE_DOCS);
+ Assert.AreEqual(0, tdocs.DocID);
+ Assert.IsTrue(tdocs.Advance(4) != DocIdSetIterator.NO_MORE_DOCS);
+ Assert.AreEqual(4, tdocs.DocID);
+ Assert.IsTrue(tdocs.Advance(9) != DocIdSetIterator.NO_MORE_DOCS);
+ Assert.AreEqual(9, tdocs.DocID);
+ Assert.IsFalse(tdocs.Advance(10) != DocIdSetIterator.NO_MORE_DOCS);
+
+ // exactly skipInterval documents and therefore with optimization
+
+ // with next
+ tdocs = TestUtil.Docs(Random(), reader, tb.Field, new BytesRef(tb.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS);
+
+ Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+ Assert.AreEqual(10, tdocs.DocID);
+ Assert.AreEqual(4, tdocs.Freq);
+ Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+ Assert.AreEqual(11, tdocs.DocID);
+ Assert.AreEqual(4, tdocs.Freq);
+ Assert.IsTrue(tdocs.Advance(12) != DocIdSetIterator.NO_MORE_DOCS);
+ Assert.AreEqual(12, tdocs.DocID);
+ Assert.IsTrue(tdocs.Advance(15) != DocIdSetIterator.NO_MORE_DOCS);
+ Assert.AreEqual(15, tdocs.DocID);
+ Assert.IsTrue(tdocs.Advance(24) != DocIdSetIterator.NO_MORE_DOCS);
+ Assert.AreEqual(24, tdocs.DocID);
+ Assert.IsTrue(tdocs.Advance(25) != DocIdSetIterator.NO_MORE_DOCS);
+ Assert.AreEqual(25, tdocs.DocID);
+ Assert.IsFalse(tdocs.Advance(26) != DocIdSetIterator.NO_MORE_DOCS);
+
+ // without next
+ tdocs = TestUtil.Docs(Random(), reader, tb.Field, new BytesRef(tb.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS);
+
+ Assert.IsTrue(tdocs.Advance(5) != DocIdSetIterator.NO_MORE_DOCS);
+ Assert.AreEqual(10, tdocs.DocID);
+ Assert.IsTrue(tdocs.Advance(15) != DocIdSetIterator.NO_MORE_DOCS);
+ Assert.AreEqual(15, tdocs.DocID);
+ Assert.IsTrue(tdocs.Advance(24) != DocIdSetIterator.NO_MORE_DOCS);
+ Assert.AreEqual(24, tdocs.DocID);
+ Assert.IsTrue(tdocs.Advance(25) != DocIdSetIterator.NO_MORE_DOCS);
+ Assert.AreEqual(25, tdocs.DocID);
+ Assert.IsFalse(tdocs.Advance(26) != DocIdSetIterator.NO_MORE_DOCS);
+
+ // much more than skipInterval documents and therefore with optimization
+
+ // with next
+ tdocs = TestUtil.Docs(Random(), reader, tc.Field, new BytesRef(tc.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS);
+
+ Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+ Assert.AreEqual(26, tdocs.DocID);
+ Assert.AreEqual(4, tdocs.Freq);
+ Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+ Assert.AreEqual(27, tdocs.DocID);
+ Assert.AreEqual(4, tdocs.Freq);
+ Assert.IsTrue(tdocs.Advance(28) != DocIdSetIterator.NO_MORE_DOCS);
+ Assert.AreEqual(28, tdocs.DocID);
+ Assert.IsTrue(tdocs.Advance(40) != DocIdSetIterator.NO_MORE_DOCS);
+ Assert.AreEqual(40, tdocs.DocID);
+ Assert.IsTrue(tdocs.Advance(57) != DocIdSetIterator.NO_MORE_DOCS);
+ Assert.AreEqual(57, tdocs.DocID);
+ Assert.IsTrue(tdocs.Advance(74) != DocIdSetIterator.NO_MORE_DOCS);
+ Assert.AreEqual(74, tdocs.DocID);
+ Assert.IsTrue(tdocs.Advance(75) != DocIdSetIterator.NO_MORE_DOCS);
+ Assert.AreEqual(75, tdocs.DocID);
+ Assert.IsFalse(tdocs.Advance(76) != DocIdSetIterator.NO_MORE_DOCS);
+
+ //without next
+ tdocs = TestUtil.Docs(Random(), reader, tc.Field, new BytesRef(tc.Text()), MultiFields.GetLiveDocs(reader), null, 0);
+ Assert.IsTrue(tdocs.Advance(5) != DocIdSetIterator.NO_MORE_DOCS);
+ Assert.AreEqual(26, tdocs.DocID);
+ Assert.IsTrue(tdocs.Advance(40) != DocIdSetIterator.NO_MORE_DOCS);
+ Assert.AreEqual(40, tdocs.DocID);
+ Assert.IsTrue(tdocs.Advance(57) != DocIdSetIterator.NO_MORE_DOCS);
+ Assert.AreEqual(57, tdocs.DocID);
+ Assert.IsTrue(tdocs.Advance(74) != DocIdSetIterator.NO_MORE_DOCS);
+ Assert.AreEqual(74, tdocs.DocID);
+ Assert.IsTrue(tdocs.Advance(75) != DocIdSetIterator.NO_MORE_DOCS);
+ Assert.AreEqual(75, tdocs.DocID);
+ Assert.IsFalse(tdocs.Advance(76) != DocIdSetIterator.NO_MORE_DOCS);
+
+ reader.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public virtual void TestIndexDivisor()
+ {
+ TestDoc = new Document();
+ DocHelper.SetupDoc(TestDoc);
+ DocHelper.WriteDoc(Random(), Dir, TestDoc);
+ TestTermDocs(2);
+ TestBadSeek(2);
+ TestSkipTo(2);
+ }
+
+ private void AddDoc(IndexWriter writer, string value)
+ {
+ Document doc = new Document();
+ doc.Add(NewTextField("content", value, Field.Store.NO));
+ writer.AddDocument(doc);
+ }
+ }
+}
\ No newline at end of file