You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/02/26 23:37:13 UTC
[25/72] [abbrv] [partial] lucenenet git commit: Lucene.Net.Tests:
Removed \core directory and put its contents in root directory
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestLogMergePolicy.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/Index/TestLogMergePolicy.cs b/src/Lucene.Net.Tests/Index/TestLogMergePolicy.cs
new file mode 100644
index 0000000..921b719
--- /dev/null
+++ b/src/Lucene.Net.Tests/Index/TestLogMergePolicy.cs
@@ -0,0 +1,42 @@
+using NUnit.Framework;
+
+namespace Lucene.Net.Index
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ public class TestLogMergePolicy : BaseMergePolicyTestCase
+ {
+ protected internal override MergePolicy MergePolicy()
+ {
+ return NewLogMergePolicy(Random());
+ }
+
+ #region BaseMergePolicyTestCase
+ // LUCENENET NOTE: Tests in an abstract base class are not pulled into the correct
+ // context in Visual Studio. This fixes that with the minimum amount of code necessary
+ // to run them in the correct context without duplicating all of the tests.
+
+ [Test]
+ public override void TestForceMergeNotNeeded()
+ {
+ base.TestForceMergeNotNeeded();
+ }
+
+ #endregion
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestLongPostings.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/Index/TestLongPostings.cs b/src/Lucene.Net.Tests/Index/TestLongPostings.cs
new file mode 100644
index 0000000..0f06912
--- /dev/null
+++ b/src/Lucene.Net.Tests/Index/TestLongPostings.cs
@@ -0,0 +1,570 @@
+using Lucene.Net.Analysis.TokenAttributes;
+using System;
+using System.Diagnostics;
+using Lucene.Net.Documents;
+
+namespace Lucene.Net.Index
+{
+ using Lucene.Net.Randomized.Generators;
+ using NUnit.Framework;
+ using System.IO;
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using Analyzer = Lucene.Net.Analysis.Analyzer;
+ using BytesRef = Lucene.Net.Util.BytesRef;
+ using Directory = Lucene.Net.Store.Directory;
+ using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+ using Document = Documents.Document;
+ using Field = Field;
+ using FieldType = FieldType;
+ using FixedBitSet = Lucene.Net.Util.FixedBitSet;
+ using IOUtils = Lucene.Net.Util.IOUtils;
+ using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+ using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer;
+ using TestUtil = Lucene.Net.Util.TestUtil;
+ using TextField = TextField;
+ using TokenStream = Lucene.Net.Analysis.TokenStream;
+
+ [SuppressCodecs("SimpleText", "Memory", "Direct")]
+ [TestFixture]
+ public class TestLongPostings : LuceneTestCase
+ {
+ // Produces a realistic unicode random string that
+ // survives MockAnalyzer unchanged:
+ private string GetRandomTerm(string other)
+ {
+ Analyzer a = new MockAnalyzer(Random());
+ while (true)
+ {
+ string s = TestUtil.RandomRealisticUnicodeString(Random());
+ if (other != null && s.Equals(other))
+ {
+ continue;
+ }
+ IOException priorException = null;
+ TokenStream ts = a.TokenStream("foo", new StringReader(s));
+ try
+ {
+ ITermToBytesRefAttribute termAtt = ts.GetAttribute<ITermToBytesRefAttribute>();
+ BytesRef termBytes = termAtt.BytesRef;
+ ts.Reset();
+
+ int count = 0;
+ bool changed = false;
+
+ while (ts.IncrementToken())
+ {
+ termAtt.FillBytesRef();
+ if (count == 0 && !termBytes.Utf8ToString().Equals(s))
+ {
+ // The value was changed during analysis. Keep iterating so the
+ // tokenStream is exhausted.
+ changed = true;
+ }
+ count++;
+ }
+
+ ts.End();
+ // Did we iterate just once and the value was unchanged?
+ if (!changed && count == 1)
+ {
+ return s;
+ }
+ }
+ catch (IOException e)
+ {
+ priorException = e;
+ }
+ finally
+ {
+ IOUtils.CloseWhileHandlingException(priorException, ts);
+ }
+ }
+ }
+
+ [Test]
+ public virtual void TestLongPostings_Mem()
+ {
+ // Don't use TestUtil.getTempDir so that we own the
+ // randomness (ie same seed will point to same dir):
+ Directory dir = NewFSDirectory(CreateTempDir("longpostings" + "." + Random().NextLong()));
+
+ int NUM_DOCS = AtLeast(2000);
+
+ if (VERBOSE)
+ {
+ Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS);
+ }
+
+ string s1 = GetRandomTerm(null);
+ string s2 = GetRandomTerm(s1);
+
+ if (VERBOSE)
+ {
+ Console.WriteLine("\nTEST: s1=" + s1 + " s2=" + s2);
+ /*
+ for(int idx=0;idx<s1.Length();idx++) {
+ System.out.println(" s1 ch=0x" + Integer.toHexString(s1.charAt(idx)));
+ }
+ for(int idx=0;idx<s2.Length();idx++) {
+ System.out.println(" s2 ch=0x" + Integer.toHexString(s2.charAt(idx)));
+ }
+ */
+ }
+
+ FixedBitSet isS1 = new FixedBitSet(NUM_DOCS);
+ for (int idx = 0; idx < NUM_DOCS; idx++)
+ {
+ if (Random().NextBoolean())
+ {
+ isS1.Set(idx);
+ }
+ }
+
+ IndexReader r;
+ IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.CREATE).SetMergePolicy(NewLogMergePolicy());
+ iwc.SetRAMBufferSizeMB(16.0 + 16.0 * Random().NextDouble());
+ iwc.SetMaxBufferedDocs(-1);
+ RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, iwc);
+
+ for (int idx = 0; idx < NUM_DOCS; idx++)
+ {
+ Document doc = new Document();
+ string s = isS1.Get(idx) ? s1 : s2;
+ Field f = NewTextField("field", s, Field.Store.NO);
+ int count = TestUtil.NextInt(Random(), 1, 4);
+ for (int ct = 0; ct < count; ct++)
+ {
+ doc.Add(f);
+ }
+ riw.AddDocument(doc);
+ }
+
+ r = riw.Reader;
+ riw.Dispose();
+
+ /*
+ if (VERBOSE) {
+ System.out.println("TEST: terms");
+ TermEnum termEnum = r.Terms();
+ while(termEnum.Next()) {
+ System.out.println(" term=" + termEnum.Term() + " len=" + termEnum.Term().Text().Length());
+ Assert.IsTrue(termEnum.DocFreq() > 0);
+ System.out.println(" s1?=" + (termEnum.Term().Text().equals(s1)) + " s1len=" + s1.Length());
+ System.out.println(" s2?=" + (termEnum.Term().Text().equals(s2)) + " s2len=" + s2.Length());
+ final String s = termEnum.Term().Text();
+ for(int idx=0;idx<s.Length();idx++) {
+ System.out.println(" ch=0x" + Integer.toHexString(s.charAt(idx)));
+ }
+ }
+ }
+ */
+
+ Assert.AreEqual(NUM_DOCS, r.NumDocs);
+ Assert.IsTrue(r.DocFreq(new Term("field", s1)) > 0);
+ Assert.IsTrue(r.DocFreq(new Term("field", s2)) > 0);
+
+ int num = AtLeast(1000);
+ for (int iter = 0; iter < num; iter++)
+ {
+ string term;
+ bool doS1;
+ if (Random().NextBoolean())
+ {
+ term = s1;
+ doS1 = true;
+ }
+ else
+ {
+ term = s2;
+ doS1 = false;
+ }
+
+ if (VERBOSE)
+ {
+ Console.WriteLine("\nTEST: iter=" + iter + " doS1=" + doS1);
+ }
+
+ DocsAndPositionsEnum postings = MultiFields.GetTermPositionsEnum(r, null, "field", new BytesRef(term));
+
+ int docID = -1;
+ while (docID < DocIdSetIterator.NO_MORE_DOCS)
+ {
+ int what = Random().Next(3);
+ if (what == 0)
+ {
+ if (VERBOSE)
+ {
+ Console.WriteLine("TEST: docID=" + docID + "; do next()");
+ }
+ // nextDoc
+ int expected = docID + 1;
+ while (true)
+ {
+ if (expected == NUM_DOCS)
+ {
+ expected = int.MaxValue;
+ break;
+ }
+ else if (isS1.Get(expected) == doS1)
+ {
+ break;
+ }
+ else
+ {
+ expected++;
+ }
+ }
+ docID = postings.NextDoc();
+ if (VERBOSE)
+ {
+ Console.WriteLine(" got docID=" + docID);
+ }
+ Assert.AreEqual(expected, docID);
+ if (docID == DocIdSetIterator.NO_MORE_DOCS)
+ {
+ break;
+ }
+
+ if (Random().Next(6) == 3)
+ {
+ int freq = postings.Freq;
+ Assert.IsTrue(freq >= 1 && freq <= 4);
+ for (int pos = 0; pos < freq; pos++)
+ {
+ Assert.AreEqual(pos, postings.NextPosition());
+ if (Random().NextBoolean())
+ {
+ var dummy = postings.GetPayload();
+ if (Random().NextBoolean())
+ {
+ dummy = postings.GetPayload(); // get it again
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ // advance
+ int targetDocID;
+ if (docID == -1)
+ {
+ targetDocID = Random().Next(NUM_DOCS + 1);
+ }
+ else
+ {
+ targetDocID = docID + TestUtil.NextInt(Random(), 1, NUM_DOCS - docID);
+ }
+ if (VERBOSE)
+ {
+ Console.WriteLine("TEST: docID=" + docID + "; do advance(" + targetDocID + ")");
+ }
+ int expected = targetDocID;
+ while (true)
+ {
+ if (expected == NUM_DOCS)
+ {
+ expected = int.MaxValue;
+ break;
+ }
+ else if (isS1.Get(expected) == doS1)
+ {
+ break;
+ }
+ else
+ {
+ expected++;
+ }
+ }
+
+ docID = postings.Advance(targetDocID);
+ if (VERBOSE)
+ {
+ Console.WriteLine(" got docID=" + docID);
+ }
+ Assert.AreEqual(expected, docID);
+ if (docID == DocIdSetIterator.NO_MORE_DOCS)
+ {
+ break;
+ }
+
+ if (Random().Next(6) == 3)
+ {
+ int freq = postings.Freq;
+ Assert.IsTrue(freq >= 1 && freq <= 4);
+ for (int pos = 0; pos < freq; pos++)
+ {
+ Assert.AreEqual(pos, postings.NextPosition());
+ if (Random().NextBoolean())
+ {
+ var dummy = postings.GetPayload();
+ if (Random().NextBoolean())
+ {
+ dummy = postings.GetPayload(); // get it again
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ r.Dispose();
+ dir.Dispose();
+ }
+
+ // a weaker form of testLongPostings, that doesnt check positions
+ [Test]
+ public virtual void TestLongPostingsNoPositions()
+ {
+ DoTestLongPostingsNoPositions(IndexOptions.DOCS_ONLY);
+ DoTestLongPostingsNoPositions(IndexOptions.DOCS_AND_FREQS);
+ }
+
+ public virtual void DoTestLongPostingsNoPositions(IndexOptions options)
+ {
+ // Don't use TestUtil.getTempDir so that we own the
+ // randomness (ie same seed will point to same dir):
+ Directory dir = NewFSDirectory(CreateTempDir("longpostings" + "." + Random().NextLong()));
+
+ int NUM_DOCS = AtLeast(2000);
+
+ if (VERBOSE)
+ {
+ Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS);
+ }
+
+ string s1 = GetRandomTerm(null);
+ string s2 = GetRandomTerm(s1);
+
+ if (VERBOSE)
+ {
+ Console.WriteLine("\nTEST: s1=" + s1 + " s2=" + s2);
+ /*
+ for(int idx=0;idx<s1.Length();idx++) {
+ System.out.println(" s1 ch=0x" + Integer.toHexString(s1.charAt(idx)));
+ }
+ for(int idx=0;idx<s2.Length();idx++) {
+ System.out.println(" s2 ch=0x" + Integer.toHexString(s2.charAt(idx)));
+ }
+ */
+ }
+
+ FixedBitSet isS1 = new FixedBitSet(NUM_DOCS);
+ for (int idx = 0; idx < NUM_DOCS; idx++)
+ {
+ if (Random().NextBoolean())
+ {
+ isS1.Set(idx);
+ }
+ }
+
+ IndexReader r;
+ if (true)
+ {
+ IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.CREATE).SetMergePolicy(NewLogMergePolicy());
+ iwc.SetRAMBufferSizeMB(16.0 + 16.0 * Random().NextDouble());
+ iwc.SetMaxBufferedDocs(-1);
+ RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, iwc);
+
+ FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
+ ft.IndexOptions = options;
+ for (int idx = 0; idx < NUM_DOCS; idx++)
+ {
+ Document doc = new Document();
+ string s = isS1.Get(idx) ? s1 : s2;
+ Field f = NewField("field", s, ft);
+ int count = TestUtil.NextInt(Random(), 1, 4);
+ for (int ct = 0; ct < count; ct++)
+ {
+ doc.Add(f);
+ }
+ riw.AddDocument(doc);
+ }
+
+ r = riw.Reader;
+ riw.Dispose();
+ }
+ else
+ {
+#pragma warning disable 162
+ r = DirectoryReader.Open(dir);
+#pragma warning restore 162
+ }
+
+ /*
+ if (VERBOSE) {
+ System.out.println("TEST: terms");
+ TermEnum termEnum = r.Terms();
+ while(termEnum.Next()) {
+ System.out.println(" term=" + termEnum.Term() + " len=" + termEnum.Term().Text().Length());
+ Assert.IsTrue(termEnum.DocFreq() > 0);
+ System.out.println(" s1?=" + (termEnum.Term().Text().equals(s1)) + " s1len=" + s1.Length());
+ System.out.println(" s2?=" + (termEnum.Term().Text().equals(s2)) + " s2len=" + s2.Length());
+ final String s = termEnum.Term().Text();
+ for(int idx=0;idx<s.Length();idx++) {
+ System.out.println(" ch=0x" + Integer.toHexString(s.charAt(idx)));
+ }
+ }
+ }
+ */
+
+ Assert.AreEqual(NUM_DOCS, r.NumDocs);
+ Assert.IsTrue(r.DocFreq(new Term("field", s1)) > 0);
+ Assert.IsTrue(r.DocFreq(new Term("field", s2)) > 0);
+
+ int num = AtLeast(1000);
+ for (int iter = 0; iter < num; iter++)
+ {
+ string term;
+ bool doS1;
+ if (Random().NextBoolean())
+ {
+ term = s1;
+ doS1 = true;
+ }
+ else
+ {
+ term = s2;
+ doS1 = false;
+ }
+
+ if (VERBOSE)
+ {
+ Console.WriteLine("\nTEST: iter=" + iter + " doS1=" + doS1 + " term=" + term);
+ }
+
+ DocsEnum docs;
+ DocsEnum postings;
+
+ if (options == IndexOptions.DOCS_ONLY)
+ {
+ docs = TestUtil.Docs(Random(), r, "field", new BytesRef(term), null, null, DocsEnum.FLAG_NONE);
+ postings = null;
+ }
+ else
+ {
+ docs = postings = TestUtil.Docs(Random(), r, "field", new BytesRef(term), null, null, DocsEnum.FLAG_FREQS);
+ Debug.Assert(postings != null);
+ }
+ Debug.Assert(docs != null);
+
+ int docID = -1;
+ while (docID < DocIdSetIterator.NO_MORE_DOCS)
+ {
+ int what = Random().Next(3);
+ if (what == 0)
+ {
+ if (VERBOSE)
+ {
+ Console.WriteLine("TEST: docID=" + docID + "; do next()");
+ }
+ // nextDoc
+ int expected = docID + 1;
+ while (true)
+ {
+ if (expected == NUM_DOCS)
+ {
+ expected = int.MaxValue;
+ break;
+ }
+ else if (isS1.Get(expected) == doS1)
+ {
+ break;
+ }
+ else
+ {
+ expected++;
+ }
+ }
+ docID = docs.NextDoc();
+ if (VERBOSE)
+ {
+ Console.WriteLine(" got docID=" + docID);
+ }
+ Assert.AreEqual(expected, docID);
+ if (docID == DocIdSetIterator.NO_MORE_DOCS)
+ {
+ break;
+ }
+
+ if (Random().Next(6) == 3 && postings != null)
+ {
+ int freq = postings.Freq;
+ Assert.IsTrue(freq >= 1 && freq <= 4);
+ }
+ }
+ else
+ {
+ // advance
+ int targetDocID;
+ if (docID == -1)
+ {
+ targetDocID = Random().Next(NUM_DOCS + 1);
+ }
+ else
+ {
+ targetDocID = docID + TestUtil.NextInt(Random(), 1, NUM_DOCS - docID);
+ }
+ if (VERBOSE)
+ {
+ Console.WriteLine("TEST: docID=" + docID + "; do advance(" + targetDocID + ")");
+ }
+ int expected = targetDocID;
+ while (true)
+ {
+ if (expected == NUM_DOCS)
+ {
+ expected = int.MaxValue;
+ break;
+ }
+ else if (isS1.Get(expected) == doS1)
+ {
+ break;
+ }
+ else
+ {
+ expected++;
+ }
+ }
+
+ docID = docs.Advance(targetDocID);
+ if (VERBOSE)
+ {
+ Console.WriteLine(" got docID=" + docID);
+ }
+ Assert.AreEqual(expected, docID);
+ if (docID == DocIdSetIterator.NO_MORE_DOCS)
+ {
+ break;
+ }
+
+ if (Random().Next(6) == 3 && postings != null)
+ {
+ int freq = postings.Freq;
+ Assert.IsTrue(freq >= 1 && freq <= 4, "got invalid freq=" + freq);
+ }
+ }
+ }
+ }
+ r.Dispose();
+ dir.Dispose();
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestMaxTermFrequency.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/Index/TestMaxTermFrequency.cs b/src/Lucene.Net.Tests/Index/TestMaxTermFrequency.cs
new file mode 100644
index 0000000..cbbdc72
--- /dev/null
+++ b/src/Lucene.Net.Tests/Index/TestMaxTermFrequency.cs
@@ -0,0 +1,171 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Lucene.Net.Documents;
+
+namespace Lucene.Net.Index
+{
+ using Lucene.Net.Support;
+ using NUnit.Framework;
+ using BytesRef = Lucene.Net.Util.BytesRef;
+ using Directory = Lucene.Net.Store.Directory;
+ using Document = Documents.Document;
+ using Field = Field;
+ using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer;
+ using MockTokenizer = Lucene.Net.Analysis.MockTokenizer;
+ using TestUtil = Lucene.Net.Util.TestUtil;
+ using TFIDFSimilarity = Lucene.Net.Search.Similarities.TFIDFSimilarity;
+
+ /// <summary>
+ /// Tests the maxTermFrequency statistic in FieldInvertState
+ /// </summary>
+ [TestFixture]
+ public class TestMaxTermFrequency : LuceneTestCase
+ {
+ internal Directory Dir;
+ internal IndexReader Reader;
+ /* expected maxTermFrequency values for our documents */
+ internal List<int?> Expected = new List<int?>();
+
+ [SetUp]
+ public override void SetUp()
+ {
+ base.SetUp();
+ Dir = NewDirectory();
+ IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true)).SetMergePolicy(NewLogMergePolicy());
+ config.SetSimilarity(new TestSimilarity(this));
+ RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, config);
+ Document doc = new Document();
+ Field foo = NewTextField("foo", "", Field.Store.NO);
+ doc.Add(foo);
+ for (int i = 0; i < 100; i++)
+ {
+ foo.SetStringValue(AddValue());
+ writer.AddDocument(doc);
+ }
+ Reader = writer.Reader;
+ writer.Dispose();
+ }
+
+ [TearDown]
+ public override void TearDown()
+ {
+ Reader.Dispose();
+ Dir.Dispose();
+ base.TearDown();
+ }
+
+ [Test]
+ public virtual void Test()
+ {
+ NumericDocValues fooNorms = MultiDocValues.GetNormValues(Reader, "foo");
+ for (int i = 0; i < Reader.MaxDoc; i++)
+ {
+ Assert.AreEqual((int)Expected[i], fooNorms.Get(i) & 0xff);
+ }
+ }
+
+ /// <summary>
+ /// Makes a bunch of single-char tokens (the max freq will at most be 255).
+ /// shuffles them around, and returns the whole list with Arrays.toString().
+ /// this works fine because we use lettertokenizer.
+ /// puts the max-frequency term into expected, to be checked against the norm.
+ /// </summary>
+ private string AddValue()
+ {
+ IList<string> terms = new List<string>();
+ int maxCeiling = TestUtil.NextInt(Random(), 0, 255);
+ int max = 0;
+ for (char ch = 'a'; ch <= 'z'; ch++)
+ {
+ int num = TestUtil.NextInt(Random(), 0, maxCeiling);
+ for (int i = 0; i < num; i++)
+ {
+ terms.Add(char.ToString(ch));
+ }
+ max = Math.Max(max, num);
+ }
+ Expected.Add(max);
+
+ Collections.Shuffle(terms);
+ return Arrays.ToString(terms.ToArray());
+ }
+
+ /// <summary>
+ /// Simple similarity that encodes maxTermFrequency directly as a byte
+ /// </summary>
+ internal class TestSimilarity : TFIDFSimilarity
+ {
+ private readonly TestMaxTermFrequency OuterInstance;
+
+ public TestSimilarity(TestMaxTermFrequency outerInstance)
+ {
+ this.OuterInstance = outerInstance;
+ }
+
+ public override float LengthNorm(FieldInvertState state)
+ {
+ return state.MaxTermFrequency;
+ }
+
+ public override long EncodeNormValue(float f)
+ {
+ return (sbyte)f;
+ }
+
+ public override float DecodeNormValue(long norm)
+ {
+ return norm;
+ }
+
+ public override float Coord(int overlap, int maxOverlap)
+ {
+ return 0;
+ }
+
+ public override float QueryNorm(float sumOfSquaredWeights)
+ {
+ return 0;
+ }
+
+ public override float Tf(float freq)
+ {
+ return 0;
+ }
+
+ public override float Idf(long docFreq, long numDocs)
+ {
+ return 0;
+ }
+
+ public override float SloppyFreq(int distance)
+ {
+ return 0;
+ }
+
+ public override float ScorePayload(int doc, int start, int end, BytesRef payload)
+ {
+ return 0;
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestMixedCodecs.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/Index/TestMixedCodecs.cs b/src/Lucene.Net.Tests/Index/TestMixedCodecs.cs
new file mode 100644
index 0000000..56ba215
--- /dev/null
+++ b/src/Lucene.Net.Tests/Index/TestMixedCodecs.cs
@@ -0,0 +1,107 @@
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Documents;
+
+namespace Lucene.Net.Index
+{
+ using Lucene.Net.Randomized.Generators;
+ using NUnit.Framework;
+ using Codec = Lucene.Net.Codecs.Codec;
+ using Directory = Lucene.Net.Store.Directory;
+ using Document = Documents.Document;
+ using Field = Field;
+ using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer;
+ using TestUtil = Lucene.Net.Util.TestUtil;
+
+ [SuppressCodecs("Lucene3x")]
+ [TestFixture]
+ public class TestMixedCodecs : LuceneTestCase
+ {
+ [Test]
+ public virtual void Test()
+ {
+ int NUM_DOCS = AtLeast(1000);
+
+ Directory dir = NewDirectory();
+ RandomIndexWriter w = null;
+
+ int docsLeftInthisSegment = 0;
+
+ int docUpto = 0;
+ while (docUpto < NUM_DOCS)
+ {
+ if (VERBOSE)
+ {
+ Console.WriteLine("TEST: " + docUpto + " of " + NUM_DOCS);
+ }
+ if (docsLeftInthisSegment == 0)
+ {
+ IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
+ if (Random().NextBoolean())
+ {
+ // Make sure we aggressively mix in SimpleText
+ // since it has different impls for all codec
+ // formats...
+ iwc.SetCodec(Codec.ForName("Lucene46"));
+ }
+ if (w != null)
+ {
+ w.Dispose();
+ }
+ w = new RandomIndexWriter(Random(), dir, iwc);
+ docsLeftInthisSegment = TestUtil.NextInt(Random(), 10, 100);
+ }
+ Document doc = new Document();
+ doc.Add(NewStringField("id", Convert.ToString(docUpto), Field.Store.YES));
+ w.AddDocument(doc);
+ docUpto++;
+ docsLeftInthisSegment--;
+ }
+
+ if (VERBOSE)
+ {
+ Console.WriteLine("\nTEST: now delete...");
+ }
+
+ // Random delete half the docs:
+ HashSet<int?> deleted = new HashSet<int?>();
+ while (deleted.Count < NUM_DOCS / 2)
+ {
+ int? toDelete = Random().Next(NUM_DOCS);
+ if (!deleted.Contains(toDelete))
+ {
+ deleted.Add(toDelete);
+ w.DeleteDocuments(new Term("id", Convert.ToString(toDelete)));
+ if (Random().Next(17) == 6)
+ {
+ IndexReader r = w.Reader;
+ Assert.AreEqual(NUM_DOCS - deleted.Count, r.NumDocs);
+ r.Dispose();
+ }
+ }
+ }
+
+ w.Dispose();
+ dir.Dispose();
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestMixedDocValuesUpdates.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/Index/TestMixedDocValuesUpdates.cs b/src/Lucene.Net.Tests/Index/TestMixedDocValuesUpdates.cs
new file mode 100644
index 0000000..4a3741d
--- /dev/null
+++ b/src/Lucene.Net.Tests/Index/TestMixedDocValuesUpdates.cs
@@ -0,0 +1,576 @@
+using System;
+using System.Threading;
+using System.Collections.Generic;
+using Lucene.Net.Documents;
+
+namespace Lucene.Net.Index
+{
+ using Lucene.Net.Randomized.Generators;
+ using Lucene.Net.Support;
+ using NUnit.Framework;
+ using System.IO;
+ using BinaryDocValuesField = BinaryDocValuesField;
+ using IBits = Lucene.Net.Util.IBits;
+ using BytesRef = Lucene.Net.Util.BytesRef;
+ using Directory = Lucene.Net.Store.Directory;
+ using Document = Documents.Document;
+ using IOUtils = Lucene.Net.Util.IOUtils;
+ using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+ using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer;
+ using NumericDocValuesField = NumericDocValuesField;
+ using Store = Field.Store;
+ using StringField = StringField;
+ using TestUtil = Lucene.Net.Util.TestUtil;
+ using Attributes;
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ [SuppressCodecs("Appending", "Lucene3x", "Lucene40", "Lucene41", "Lucene42", "Lucene45")]
+ [TestFixture]
+ public class TestMixedDocValuesUpdates : LuceneTestCase
+ {
+ [Test]
+ public virtual void TestManyReopensAndFields()
+ {
+ Directory dir = NewDirectory();
+ Random random = Random();
+ IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
+ LogMergePolicy lmp = NewLogMergePolicy();
+ lmp.MergeFactor = 3; // merge often
+ conf.SetMergePolicy(lmp);
+ IndexWriter writer = new IndexWriter(dir, conf);
+
+ bool isNRT = random.NextBoolean();
+ DirectoryReader reader;
+ if (isNRT)
+ {
+ reader = DirectoryReader.Open(writer, true);
+ }
+ else
+ {
+ writer.Commit();
+ reader = DirectoryReader.Open(dir);
+ }
+
+ int numFields = random.Next(4) + 3; // 3-7
+ int numNDVFields = random.Next(numFields / 2) + 1; // 1-3
+ long[] fieldValues = new long[numFields];
+ bool[] fieldHasValue = new bool[numFields];
+ Arrays.Fill(fieldHasValue, true);
+ for (int i = 0; i < fieldValues.Length; i++)
+ {
+ fieldValues[i] = 1;
+ }
+
+ int numRounds = AtLeast(15);
+ int docID = 0;
+ for (int i = 0; i < numRounds; i++)
+ {
+ int numDocs = AtLeast(5);
+ // System.out.println("[" + Thread.currentThread().getName() + "]: round=" + i + ", numDocs=" + numDocs);
+ for (int j = 0; j < numDocs; j++)
+ {
+ Document doc = new Document();
+ doc.Add(new StringField("id", "doc-" + docID, Store.NO));
+ doc.Add(new StringField("key", "all", Store.NO)); // update key
+ // add all fields with their current value
+ for (int f = 0; f < fieldValues.Length; f++)
+ {
+ if (f < numNDVFields)
+ {
+ doc.Add(new NumericDocValuesField("f" + f, fieldValues[f]));
+ }
+ else
+ {
+ doc.Add(new BinaryDocValuesField("f" + f, TestBinaryDocValuesUpdates.ToBytes(fieldValues[f])));
+ }
+ }
+ writer.AddDocument(doc);
+ ++docID;
+ }
+
+ // if field's value was unset before, unset it from all new added documents too
+ for (int field = 0; field < fieldHasValue.Length; field++)
+ {
+ if (!fieldHasValue[field])
+ {
+ if (field < numNDVFields)
+ {
+ writer.UpdateNumericDocValue(new Term("key", "all"), "f" + field, null);
+ }
+ else
+ {
+ writer.UpdateBinaryDocValue(new Term("key", "all"), "f" + field, null);
+ }
+ }
+ }
+
+ int fieldIdx = random.Next(fieldValues.Length);
+ string updateField = "f" + fieldIdx;
+ if (random.NextBoolean())
+ {
+ // System.out.println("[" + Thread.currentThread().getName() + "]: unset field '" + updateField + "'");
+ fieldHasValue[fieldIdx] = false;
+ if (fieldIdx < numNDVFields)
+ {
+ writer.UpdateNumericDocValue(new Term("key", "all"), updateField, null);
+ }
+ else
+ {
+ writer.UpdateBinaryDocValue(new Term("key", "all"), updateField, null);
+ }
+ }
+ else
+ {
+ fieldHasValue[fieldIdx] = true;
+ if (fieldIdx < numNDVFields)
+ {
+ writer.UpdateNumericDocValue(new Term("key", "all"), updateField, ++fieldValues[fieldIdx]);
+ }
+ else
+ {
+ writer.UpdateBinaryDocValue(new Term("key", "all"), updateField, TestBinaryDocValuesUpdates.ToBytes(++fieldValues[fieldIdx]));
+ }
+ // System.out.println("[" + Thread.currentThread().getName() + "]: updated field '" + updateField + "' to value " + fieldValues[fieldIdx]);
+ }
+
+ if (random.NextDouble() < 0.2)
+ {
+ int deleteDoc = random.Next(docID); // might also delete an already deleted document, ok!
+ writer.DeleteDocuments(new Term("id", "doc-" + deleteDoc));
+ // System.out.println("[" + Thread.currentThread().getName() + "]: deleted document: doc-" + deleteDoc);
+ }
+
+ // verify reader
+ if (!isNRT)
+ {
+ writer.Commit();
+ }
+
+ // System.out.println("[" + Thread.currentThread().getName() + "]: reopen reader: " + reader);
+ DirectoryReader newReader = DirectoryReader.OpenIfChanged(reader);
+ Assert.IsNotNull(newReader);
+ reader.Dispose();
+ reader = newReader;
+ // System.out.println("[" + Thread.currentThread().getName() + "]: reopened reader: " + reader);
+ Assert.IsTrue(reader.NumDocs > 0); // we delete at most one document per round
+ BytesRef scratch = new BytesRef();
+ foreach (AtomicReaderContext context in reader.Leaves)
+ {
+ AtomicReader r = context.AtomicReader;
+ // System.out.println(((SegmentReader) r).getSegmentName());
+ IBits liveDocs = r.LiveDocs;
+ for (int field = 0; field < fieldValues.Length; field++)
+ {
+ string f = "f" + field;
+ BinaryDocValues bdv = r.GetBinaryDocValues(f);
+ NumericDocValues ndv = r.GetNumericDocValues(f);
+ IBits docsWithField = r.GetDocsWithField(f);
+ if (field < numNDVFields)
+ {
+ Assert.IsNotNull(ndv);
+ Assert.IsNull(bdv);
+ }
+ else
+ {
+ Assert.IsNull(ndv);
+ Assert.IsNotNull(bdv);
+ }
+ int maxDoc = r.MaxDoc;
+ for (int doc = 0; doc < maxDoc; doc++)
+ {
+ if (liveDocs == null || liveDocs.Get(doc))
+ {
+ // System.out.println("doc=" + (doc + context.DocBase) + " f='" + f + "' vslue=" + getValue(bdv, doc, scratch));
+ if (fieldHasValue[field])
+ {
+ Assert.IsTrue(docsWithField.Get(doc));
+ if (field < numNDVFields)
+ {
+ Assert.AreEqual(fieldValues[field], ndv.Get(doc), "invalid value for doc=" + doc + ", field=" + f + ", reader=" + r);
+ }
+ else
+ {
+ Assert.AreEqual(fieldValues[field], TestBinaryDocValuesUpdates.GetValue(bdv, doc, scratch), "invalid value for doc=" + doc + ", field=" + f + ", reader=" + r);
+ }
+ }
+ else
+ {
+ Assert.IsFalse(docsWithField.Get(doc));
+ }
+ }
+ }
+ }
+ }
+ // System.out.println();
+ }
+
+ IOUtils.Close(writer, reader, dir);
+ }
+
+ [Test]
+ public virtual void TestStressMultiThreading()
+ {
+ Directory dir = NewDirectory();
+ IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
+ IndexWriter writer = new IndexWriter(dir, conf);
+
+ // create index
+ int numThreads = TestUtil.NextInt(Random(), 3, 6);
+ int numDocs = AtLeast(2000);
+ for (int i = 0; i < numDocs; i++)
+ {
+ Document doc = new Document();
+ doc.Add(new StringField("id", "doc" + i, Store.NO));
+ double group = Random().NextDouble();
+ string g;
+ if (group < 0.1)
+ {
+ g = "g0";
+ }
+ else if (group < 0.5)
+ {
+ g = "g1";
+ }
+ else if (group < 0.8)
+ {
+ g = "g2";
+ }
+ else
+ {
+ g = "g3";
+ }
+ doc.Add(new StringField("updKey", g, Store.NO));
+ for (int j = 0; j < numThreads; j++)
+ {
+ long value = Random().Next();
+ doc.Add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.ToBytes(value)));
+ doc.Add(new NumericDocValuesField("cf" + j, value * 2)); // control, always updated to f * 2
+ }
+ writer.AddDocument(doc);
+ }
+
+ CountdownEvent done = new CountdownEvent(numThreads);
+ AtomicInt32 numUpdates = new AtomicInt32(AtLeast(100));
+
+ // same thread updates a field as well as reopens
+ ThreadClass[] threads = new ThreadClass[numThreads];
+ for (int i = 0; i < threads.Length; i++)
+ {
+ string f = "f" + i;
+ string cf = "cf" + i;
+ threads[i] = new ThreadAnonymousInnerClassHelper(this, "UpdateThread-" + i, writer, numDocs, done, numUpdates, f, cf);
+ }
+
+ foreach (ThreadClass t in threads)
+ {
+ t.Start();
+ }
+ done.Wait();
+ writer.Dispose();
+
+ DirectoryReader reader = DirectoryReader.Open(dir);
+ BytesRef scratch = new BytesRef();
+ foreach (AtomicReaderContext context in reader.Leaves)
+ {
+ AtomicReader r = context.AtomicReader;
+ for (int i = 0; i < numThreads; i++)
+ {
+ BinaryDocValues bdv = r.GetBinaryDocValues("f" + i);
+ NumericDocValues control = r.GetNumericDocValues("cf" + i);
+ IBits docsWithBdv = r.GetDocsWithField("f" + i);
+ IBits docsWithControl = r.GetDocsWithField("cf" + i);
+ IBits liveDocs = r.LiveDocs;
+ for (int j = 0; j < r.MaxDoc; j++)
+ {
+ if (liveDocs == null || liveDocs.Get(j))
+ {
+ Assert.AreEqual(docsWithBdv.Get(j), docsWithControl.Get(j));
+ if (docsWithBdv.Get(j))
+ {
+ long ctrlValue = control.Get(j);
+ long bdvValue = TestBinaryDocValuesUpdates.GetValue(bdv, j, scratch) * 2;
+ // if (ctrlValue != bdvValue) {
+ // System.out.println("seg=" + r + ", f=f" + i + ", doc=" + j + ", group=" + r.Document(j).Get("updKey") + ", ctrlValue=" + ctrlValue + ", bdvBytes=" + scratch);
+ // }
+ Assert.AreEqual(ctrlValue, bdvValue);
+ }
+ }
+ }
+ }
+ }
+ reader.Dispose();
+
+ dir.Dispose();
+ }
+
+ private class ThreadAnonymousInnerClassHelper : ThreadClass
+ {
+ private readonly TestMixedDocValuesUpdates OuterInstance;
+
+ private IndexWriter Writer;
+ private int NumDocs;
+ private CountdownEvent Done;
+ private AtomicInt32 NumUpdates;
+ private string f;
+ private string Cf;
+
+ public ThreadAnonymousInnerClassHelper(TestMixedDocValuesUpdates outerInstance, string str, IndexWriter writer, int numDocs, CountdownEvent done, AtomicInt32 numUpdates, string f, string cf)
+ : base(str)
+ {
+ this.OuterInstance = outerInstance;
+ this.Writer = writer;
+ this.NumDocs = numDocs;
+ this.Done = done;
+ this.NumUpdates = numUpdates;
+ this.f = f;
+ this.Cf = cf;
+ }
+
+ public override void Run()
+ {
+ DirectoryReader reader = null;
+ bool success = false;
+ try
+ {
+ Random random = Random();
+ while (NumUpdates.GetAndDecrement() > 0)
+ {
+ double group = random.NextDouble();
+ Term t;
+ if (group < 0.1)
+ {
+ t = new Term("updKey", "g0");
+ }
+ else if (group < 0.5)
+ {
+ t = new Term("updKey", "g1");
+ }
+ else if (group < 0.8)
+ {
+ t = new Term("updKey", "g2");
+ }
+ else
+ {
+ t = new Term("updKey", "g3");
+ }
+ // System.out.println("[" + Thread.currentThread().getName() + "] numUpdates=" + numUpdates + " updateTerm=" + t);
+ if (random.NextBoolean()) // sometimes unset a value
+ {
+ // System.err.println("[" + Thread.currentThread().getName() + "] t=" + t + ", f=" + f + ", updValue=UNSET");
+ Writer.UpdateBinaryDocValue(t, f, null);
+ Writer.UpdateNumericDocValue(t, Cf, null);
+ }
+ else
+ {
+ long updValue = random.Next();
+ // System.err.println("[" + Thread.currentThread().getName() + "] t=" + t + ", f=" + f + ", updValue=" + updValue);
+ Writer.UpdateBinaryDocValue(t, f, TestBinaryDocValuesUpdates.ToBytes(updValue));
+ Writer.UpdateNumericDocValue(t, Cf, updValue * 2);
+ }
+
+ if (random.NextDouble() < 0.2)
+ {
+ // delete a random document
+ int doc = random.Next(NumDocs);
+ // System.out.println("[" + Thread.currentThread().getName() + "] deleteDoc=doc" + doc);
+ Writer.DeleteDocuments(new Term("id", "doc" + doc));
+ }
+
+ if (random.NextDouble() < 0.05) // commit every 20 updates on average
+ {
+ // System.out.println("[" + Thread.currentThread().getName() + "] commit");
+ Writer.Commit();
+ }
+
+ if (random.NextDouble() < 0.1) // reopen NRT reader (apply updates), on average once every 10 updates
+ {
+ if (reader == null)
+ {
+ // System.out.println("[" + Thread.currentThread().getName() + "] open NRT");
+ reader = DirectoryReader.Open(Writer, true);
+ }
+ else
+ {
+ // System.out.println("[" + Thread.currentThread().getName() + "] reopen NRT");
+ DirectoryReader r2 = DirectoryReader.OpenIfChanged(reader, Writer, true);
+ if (r2 != null)
+ {
+ reader.Dispose();
+ reader = r2;
+ }
+ }
+ }
+ }
+ // System.out.println("[" + Thread.currentThread().getName() + "] DONE");
+ success = true;
+ }
+ catch (IOException e)
+ {
+ throw new Exception(e.Message, e);
+ }
+ finally
+ {
+ if (reader != null)
+ {
+ try
+ {
+ reader.Dispose();
+ }
+ catch (IOException e)
+ {
+ if (success) // suppress this exception only if there was another exception
+ {
+ throw new Exception(e.Message, e);
+ }
+ }
+ }
+ Done.Signal();
+ }
+ }
+ }
+
+ [Test]
+ public virtual void TestUpdateDifferentDocsInDifferentGens()
+ {
+ // update same document multiple times across generations
+ Directory dir = NewDirectory();
+ IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
+ conf.SetMaxBufferedDocs(4);
+ IndexWriter writer = new IndexWriter(dir, conf);
+ int numDocs = AtLeast(10);
+ for (int i = 0; i < numDocs; i++)
+ {
+ Document doc = new Document();
+ doc.Add(new StringField("id", "doc" + i, Store.NO));
+ long value = Random().Next();
+ doc.Add(new BinaryDocValuesField("f", TestBinaryDocValuesUpdates.ToBytes(value)));
+ doc.Add(new NumericDocValuesField("cf", value * 2));
+ writer.AddDocument(doc);
+ }
+
+ int numGens = AtLeast(5);
+ BytesRef scratch = new BytesRef();
+ for (int i = 0; i < numGens; i++)
+ {
+ int doc = Random().Next(numDocs);
+ Term t = new Term("id", "doc" + doc);
+ long value = Random().NextLong();
+ writer.UpdateBinaryDocValue(t, "f", TestBinaryDocValuesUpdates.ToBytes(value));
+ writer.UpdateNumericDocValue(t, "cf", value * 2);
+ DirectoryReader reader = DirectoryReader.Open(writer, true);
+ foreach (AtomicReaderContext context in reader.Leaves)
+ {
+ AtomicReader r = context.AtomicReader;
+ BinaryDocValues fbdv = r.GetBinaryDocValues("f");
+ NumericDocValues cfndv = r.GetNumericDocValues("cf");
+ for (int j = 0; j < r.MaxDoc; j++)
+ {
+ Assert.AreEqual(cfndv.Get(j), TestBinaryDocValuesUpdates.GetValue(fbdv, j, scratch) * 2);
+ }
+ }
+ reader.Dispose();
+ }
+ writer.Dispose();
+ dir.Dispose();
+ }
+
+#if !NETSTANDARD
+ // LUCENENET: There is no Timeout on NUnit for .NET Core.
+ [Timeout(80000)]
+#endif
+ [Test, HasTimeout]
+ public virtual void TestTonsOfUpdates()
+ {
+ // LUCENE-5248: make sure that when there are many updates, we don't use too much RAM
+ Directory dir = NewDirectory();
+ Random random = Random();
+ IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
+ conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB);
+ conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); // don't flush by doc
+ IndexWriter writer = new IndexWriter(dir, conf);
+
+ // test data: lots of documents (few 10Ks) and lots of update terms (few hundreds)
+ int numDocs = AtLeast(20000);
+ int numBinaryFields = AtLeast(5);
+ int numTerms = TestUtil.NextInt(random, 10, 100); // terms should affect many docs
+ HashSet<string> updateTerms = new HashSet<string>();
+ while (updateTerms.Count < numTerms)
+ {
+ updateTerms.Add(TestUtil.RandomSimpleString(random));
+ }
+
+ // System.out.println("numDocs=" + numDocs + " numBinaryFields=" + numBinaryFields + " numTerms=" + numTerms);
+
+ // build a large index with many BDV fields and update terms
+ for (int i = 0; i < numDocs; i++)
+ {
+ Document doc = new Document();
+ int numUpdateTerms = TestUtil.NextInt(random, 1, numTerms / 10);
+ for (int j = 0; j < numUpdateTerms; j++)
+ {
+ doc.Add(new StringField("upd", RandomInts.RandomFrom(random, updateTerms), Store.NO));
+ }
+ for (int j = 0; j < numBinaryFields; j++)
+ {
+ long val = random.Next();
+ doc.Add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.ToBytes(val)));
+ doc.Add(new NumericDocValuesField("cf" + j, val * 2));
+ }
+ writer.AddDocument(doc);
+ }
+
+ writer.Commit(); // commit so there's something to apply to
+
+ // set to flush every 2048 bytes (approximately every 12 updates), so we get
+ // many flushes during binary updates
+ writer.Config.SetRAMBufferSizeMB(2048.0 / 1024 / 1024);
+ int numUpdates = AtLeast(100);
+ // System.out.println("numUpdates=" + numUpdates);
+ for (int i = 0; i < numUpdates; i++)
+ {
+ int field = random.Next(numBinaryFields);
+ Term updateTerm = new Term("upd", RandomInts.RandomFrom(random, updateTerms));
+ long value = random.Next();
+ writer.UpdateBinaryDocValue(updateTerm, "f" + field, TestBinaryDocValuesUpdates.ToBytes(value));
+ writer.UpdateNumericDocValue(updateTerm, "cf" + field, value * 2);
+ }
+
+ writer.Dispose();
+
+ DirectoryReader reader = DirectoryReader.Open(dir);
+ BytesRef scratch = new BytesRef();
+ foreach (AtomicReaderContext context in reader.Leaves)
+ {
+ for (int i = 0; i < numBinaryFields; i++)
+ {
+ AtomicReader r = context.AtomicReader;
+ BinaryDocValues f = r.GetBinaryDocValues("f" + i);
+ NumericDocValues cf = r.GetNumericDocValues("cf" + i);
+ for (int j = 0; j < r.MaxDoc; j++)
+ {
+ Assert.AreEqual(cf.Get(j), TestBinaryDocValuesUpdates.GetValue(f, j, scratch) * 2, "reader=" + r + ", field=f" + i + ", doc=" + j);
+ }
+ }
+ }
+ reader.Dispose();
+
+ dir.Dispose();
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestMultiDocValues.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/Index/TestMultiDocValues.cs b/src/Lucene.Net.Tests/Index/TestMultiDocValues.cs
new file mode 100644
index 0000000..a4a4b84
--- /dev/null
+++ b/src/Lucene.Net.Tests/Index/TestMultiDocValues.cs
@@ -0,0 +1,439 @@
+using Lucene.Net.Documents;
+using Lucene.Net.Randomized.Generators;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Index
+{
+ using NUnit.Framework;
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using BinaryDocValuesField = BinaryDocValuesField;
+ using IBits = Lucene.Net.Util.IBits;
+ using BytesRef = Lucene.Net.Util.BytesRef;
+ using Directory = Lucene.Net.Store.Directory;
+ using Document = Documents.Document;
+ using Field = Field;
+ using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+ using NumericDocValuesField = NumericDocValuesField;
+ using SortedDocValuesField = SortedDocValuesField;
+ using SortedSetDocValuesField = SortedSetDocValuesField;
+ using TestUtil = Lucene.Net.Util.TestUtil;
+
+ /// <summary>
+ /// Tests MultiDocValues versus ordinary segment merging </summary>
+ [SuppressCodecs("Lucene3x")]
+ [TestFixture]
+ public class TestMultiDocValues : LuceneTestCase
+ {
+ [Test]
+ public virtual void TestNumerics()
+ {
+ Directory dir = NewDirectory();
+ Document doc = new Document();
+ Field field = new NumericDocValuesField("numbers", 0);
+ doc.Add(field);
+
+ IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null);
+ iwc.SetMergePolicy(NewLogMergePolicy());
+ RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);
+
+ int numDocs = AtLeast(500);
+ for (int i = 0; i < numDocs; i++)
+ {
+ field.SetInt64Value(Random().NextLong());
+ iw.AddDocument(doc);
+ if (Random().Next(17) == 0)
+ {
+ iw.Commit();
+ }
+ }
+ DirectoryReader ir = iw.Reader;
+ iw.ForceMerge(1);
+ DirectoryReader ir2 = iw.Reader;
+ AtomicReader merged = GetOnlySegmentReader(ir2);
+ iw.Dispose();
+
+ NumericDocValues multi = MultiDocValues.GetNumericValues(ir, "numbers");
+ NumericDocValues single = merged.GetNumericDocValues("numbers");
+ for (int i = 0; i < numDocs; i++)
+ {
+ Assert.AreEqual(single.Get(i), multi.Get(i));
+ }
+ ir.Dispose();
+ ir2.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public virtual void TestBinary()
+ {
+ Directory dir = NewDirectory();
+ Document doc = new Document();
+ BytesRef @ref = new BytesRef();
+ Field field = new BinaryDocValuesField("bytes", @ref);
+ doc.Add(field);
+
+ IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null);
+ iwc.SetMergePolicy(NewLogMergePolicy());
+ RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);
+
+ int numDocs = AtLeast(500);
+ for (int i = 0; i < numDocs; i++)
+ {
+ @ref.CopyChars(TestUtil.RandomUnicodeString(Random()));
+ iw.AddDocument(doc);
+ if (Random().Next(17) == 0)
+ {
+ iw.Commit();
+ }
+ }
+ DirectoryReader ir = iw.Reader;
+ iw.ForceMerge(1);
+ DirectoryReader ir2 = iw.Reader;
+ AtomicReader merged = GetOnlySegmentReader(ir2);
+ iw.Dispose();
+
+ BinaryDocValues multi = MultiDocValues.GetBinaryValues(ir, "bytes");
+ BinaryDocValues single = merged.GetBinaryDocValues("bytes");
+ BytesRef actual = new BytesRef();
+ BytesRef expected = new BytesRef();
+ for (int i = 0; i < numDocs; i++)
+ {
+ single.Get(i, expected);
+ multi.Get(i, actual);
+ Assert.AreEqual(expected, actual);
+ }
+ ir.Dispose();
+ ir2.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public virtual void TestSorted()
+ {
+ Directory dir = NewDirectory();
+ Document doc = new Document();
+ BytesRef @ref = new BytesRef();
+ Field field = new SortedDocValuesField("bytes", @ref);
+ doc.Add(field);
+
+ IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null);
+ iwc.SetMergePolicy(NewLogMergePolicy());
+ RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);
+
+ int numDocs = AtLeast(500);
+ for (int i = 0; i < numDocs; i++)
+ {
+ @ref.CopyChars(TestUtil.RandomUnicodeString(Random()));
+ if (DefaultCodecSupportsDocsWithField() && Random().Next(7) == 0)
+ {
+ iw.AddDocument(new Document());
+ }
+ iw.AddDocument(doc);
+ if (Random().Next(17) == 0)
+ {
+ iw.Commit();
+ }
+ }
+ DirectoryReader ir = iw.Reader;
+ iw.ForceMerge(1);
+ DirectoryReader ir2 = iw.Reader;
+ AtomicReader merged = GetOnlySegmentReader(ir2);
+ iw.Dispose();
+
+ SortedDocValues multi = MultiDocValues.GetSortedValues(ir, "bytes");
+ SortedDocValues single = merged.GetSortedDocValues("bytes");
+ Assert.AreEqual(single.ValueCount, multi.ValueCount);
+ BytesRef actual = new BytesRef();
+ BytesRef expected = new BytesRef();
+ for (int i = 0; i < numDocs; i++)
+ {
+ // check ord
+ Assert.AreEqual(single.GetOrd(i), multi.GetOrd(i));
+ // check value
+ single.Get(i, expected);
+ multi.Get(i, actual);
+ Assert.AreEqual(expected, actual);
+ }
+ ir.Dispose();
+ ir2.Dispose();
+ dir.Dispose();
+ }
+
+ // tries to make more dups than testSorted
+ [Test]
+ public virtual void TestSortedWithLotsOfDups()
+ {
+ Directory dir = NewDirectory();
+ Document doc = new Document();
+ BytesRef @ref = new BytesRef();
+ Field field = new SortedDocValuesField("bytes", @ref);
+ doc.Add(field);
+
+ IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null);
+ iwc.SetMergePolicy(NewLogMergePolicy());
+ RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);
+
+ int numDocs = AtLeast(500);
+ for (int i = 0; i < numDocs; i++)
+ {
+ @ref.CopyChars(TestUtil.RandomSimpleString(Random(), 2));
+ iw.AddDocument(doc);
+ if (Random().Next(17) == 0)
+ {
+ iw.Commit();
+ }
+ }
+ DirectoryReader ir = iw.Reader;
+ iw.ForceMerge(1);
+ DirectoryReader ir2 = iw.Reader;
+ AtomicReader merged = GetOnlySegmentReader(ir2);
+ iw.Dispose();
+
+ SortedDocValues multi = MultiDocValues.GetSortedValues(ir, "bytes");
+ SortedDocValues single = merged.GetSortedDocValues("bytes");
+ Assert.AreEqual(single.ValueCount, multi.ValueCount);
+ BytesRef actual = new BytesRef();
+ BytesRef expected = new BytesRef();
+ for (int i = 0; i < numDocs; i++)
+ {
+ // check ord
+ Assert.AreEqual(single.GetOrd(i), multi.GetOrd(i));
+ // check ord value
+ single.Get(i, expected);
+ multi.Get(i, actual);
+ Assert.AreEqual(expected, actual);
+ }
+ ir.Dispose();
+ ir2.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public virtual void TestSortedSet()
+ {
+ AssumeTrue("codec does not support SORTED_SET", DefaultCodecSupportsSortedSet());
+ Directory dir = NewDirectory();
+
+ IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null);
+ iwc.SetMergePolicy(NewLogMergePolicy());
+ RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);
+
+ int numDocs = AtLeast(500);
+ for (int i = 0; i < numDocs; i++)
+ {
+ Document doc = new Document();
+ int numValues = Random().Next(5);
+ for (int j = 0; j < numValues; j++)
+ {
+ doc.Add(new SortedSetDocValuesField("bytes", new BytesRef(TestUtil.RandomUnicodeString(Random()))));
+ }
+ iw.AddDocument(doc);
+ if (Random().Next(17) == 0)
+ {
+ iw.Commit();
+ }
+ }
+ DirectoryReader ir = iw.Reader;
+ iw.ForceMerge(1);
+ DirectoryReader ir2 = iw.Reader;
+ AtomicReader merged = GetOnlySegmentReader(ir2);
+ iw.Dispose();
+
+ SortedSetDocValues multi = MultiDocValues.GetSortedSetValues(ir, "bytes");
+ SortedSetDocValues single = merged.GetSortedSetDocValues("bytes");
+ if (multi == null)
+ {
+ Assert.IsNull(single);
+ }
+ else
+ {
+ Assert.AreEqual(single.ValueCount, multi.ValueCount);
+ BytesRef actual = new BytesRef();
+ BytesRef expected = new BytesRef();
+ // check values
+ for (long i = 0; i < single.ValueCount; i++)
+ {
+ single.LookupOrd(i, expected);
+ multi.LookupOrd(i, actual);
+ Assert.AreEqual(expected, actual);
+ }
+ // check ord list
+ for (int i = 0; i < numDocs; i++)
+ {
+ single.SetDocument(i);
+ List<long> expectedList = new List<long>();
+ long ord;
+ while ((ord = single.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
+ {
+ expectedList.Add(ord);
+ }
+
+ multi.SetDocument(i);
+ int upto = 0;
+ while ((ord = multi.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
+ {
+ Assert.AreEqual(expectedList[upto], ord);
+ upto++;
+ }
+ Assert.AreEqual(expectedList.Count, upto);
+ }
+ }
+
+ ir.Dispose();
+ ir2.Dispose();
+ dir.Dispose();
+ }
+
+ // tries to make more dups than testSortedSet
+ [Test]
+ public virtual void TestSortedSetWithDups()
+ {
+ AssumeTrue("codec does not support SORTED_SET", DefaultCodecSupportsSortedSet());
+ Directory dir = NewDirectory();
+
+ IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null);
+ iwc.SetMergePolicy(NewLogMergePolicy());
+ RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);
+
+ int numDocs = AtLeast(500);
+ for (int i = 0; i < numDocs; i++)
+ {
+ Document doc = new Document();
+ int numValues = Random().Next(5);
+ for (int j = 0; j < numValues; j++)
+ {
+ doc.Add(new SortedSetDocValuesField("bytes", new BytesRef(TestUtil.RandomSimpleString(Random(), 2))));
+ }
+ iw.AddDocument(doc);
+ if (Random().Next(17) == 0)
+ {
+ iw.Commit();
+ }
+ }
+ DirectoryReader ir = iw.Reader;
+ iw.ForceMerge(1);
+ DirectoryReader ir2 = iw.Reader;
+ AtomicReader merged = GetOnlySegmentReader(ir2);
+ iw.Dispose();
+
+ SortedSetDocValues multi = MultiDocValues.GetSortedSetValues(ir, "bytes");
+ SortedSetDocValues single = merged.GetSortedSetDocValues("bytes");
+ if (multi == null)
+ {
+ Assert.IsNull(single);
+ }
+ else
+ {
+ Assert.AreEqual(single.ValueCount, multi.ValueCount);
+ BytesRef actual = new BytesRef();
+ BytesRef expected = new BytesRef();
+ // check values
+ for (long i = 0; i < single.ValueCount; i++)
+ {
+ single.LookupOrd(i, expected);
+ multi.LookupOrd(i, actual);
+ Assert.AreEqual(expected, actual);
+ }
+ // check ord list
+ for (int i = 0; i < numDocs; i++)
+ {
+ single.SetDocument(i);
+ List<long?> expectedList = new List<long?>();
+ long ord;
+ while ((ord = single.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
+ {
+ expectedList.Add(ord);
+ }
+
+ multi.SetDocument(i);
+ int upto = 0;
+ while ((ord = multi.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
+ {
+ Assert.AreEqual((long)expectedList[upto], ord);
+ upto++;
+ }
+ Assert.AreEqual(expectedList.Count, upto);
+ }
+ }
+
+ ir.Dispose();
+ ir2.Dispose();
+ dir.Dispose();
+ }
+
+ [Test]
+ public virtual void TestDocsWithField()
+ {
+ AssumeTrue("codec does not support docsWithField", DefaultCodecSupportsDocsWithField());
+ Directory dir = NewDirectory();
+
+ IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null);
+ iwc.SetMergePolicy(NewLogMergePolicy());
+ RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);
+
+ int numDocs = AtLeast(500);
+ for (int i = 0; i < numDocs; i++)
+ {
+ Document doc = new Document();
+ if (Random().Next(4) >= 0)
+ {
+ doc.Add(new NumericDocValuesField("numbers", Random().NextLong()));
+ }
+ doc.Add(new NumericDocValuesField("numbersAlways", Random().NextLong()));
+ iw.AddDocument(doc);
+ if (Random().Next(17) == 0)
+ {
+ iw.Commit();
+ }
+ }
+ DirectoryReader ir = iw.Reader;
+ iw.ForceMerge(1);
+ DirectoryReader ir2 = iw.Reader;
+ AtomicReader merged = GetOnlySegmentReader(ir2);
+ iw.Dispose();
+
+ IBits multi = MultiDocValues.GetDocsWithField(ir, "numbers");
+ IBits single = merged.GetDocsWithField("numbers");
+ if (multi == null)
+ {
+ Assert.IsNull(single);
+ }
+ else
+ {
+ Assert.AreEqual(single.Length, multi.Length);
+ for (int i = 0; i < numDocs; i++)
+ {
+ Assert.AreEqual(single.Get(i), multi.Get(i));
+ }
+ }
+
+ multi = MultiDocValues.GetDocsWithField(ir, "numbersAlways");
+ single = merged.GetDocsWithField("numbersAlways");
+ Assert.AreEqual(single.Length, multi.Length);
+ for (int i = 0; i < numDocs; i++)
+ {
+ Assert.AreEqual(single.Get(i), multi.Get(i));
+ }
+ ir.Dispose();
+ ir2.Dispose();
+ dir.Dispose();
+ }
+ }
+}
\ No newline at end of file