You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/09/11 03:19:41 UTC
svn commit: r1624179 - in /lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index: TestIndexWriter.java TestManyFields.java

Author: rmuir
Date: Thu Sep 11 01:19:41 2014
New Revision: 1624179

URL: http://svn.apache.org/r1624179
Log:
pull field spammers out of TestIndexWriter, stop the OOMs in testThreadInterruptDeadlock

Added:
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestManyFields.java   (with props)
Modified:
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java

Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1624179&r1=1624178&r2=1624179&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java Thu Sep 11 01:19:41 2014
@@ -277,37 +277,6 @@ public class TestIndexWriter extends Luc
       dir.close();
     }
 
-    public void testManyFields() throws IOException {
-      Directory dir = newDirectory();
-      IndexWriter writer  = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
-                                                   .setMaxBufferedDocs(10));
-      for(int j=0;j<100;j++) {
-        Document doc = new Document();
-        doc.add(newField("a"+j, "aaa" + j, storedTextType));
-        doc.add(newField("b"+j, "aaa" + j, storedTextType));
-        doc.add(newField("c"+j, "aaa" + j, storedTextType));
-        doc.add(newField("d"+j, "aaa", storedTextType));
-        doc.add(newField("e"+j, "aaa", storedTextType));
-        doc.add(newField("f"+j, "aaa", storedTextType));
-        writer.addDocument(doc);
-      }
-      writer.close();
-
-      IndexReader reader = DirectoryReader.open(dir);
-      assertEquals(100, reader.maxDoc());
-      assertEquals(100, reader.numDocs());
-      for(int j=0;j<100;j++) {
-        assertEquals(1, reader.docFreq(new Term("a"+j, "aaa"+j)));
-        assertEquals(1, reader.docFreq(new Term("b"+j, "aaa"+j)));
-        assertEquals(1, reader.docFreq(new Term("c"+j, "aaa"+j)));
-        assertEquals(1, reader.docFreq(new Term("d"+j, "aaa")));
-        assertEquals(1, reader.docFreq(new Term("e"+j, "aaa")));
-        assertEquals(1, reader.docFreq(new Term("f"+j, "aaa")));
-      }
-      reader.close();
-      dir.close();
-    }
-
     public void testSmallRAMBuffer() throws IOException {
       Directory dir = newDirectory();
       IndexWriter writer  = new IndexWriter(
@@ -458,56 +427,6 @@ public class TestIndexWriter extends Luc
       dir.close();
     }
 
-    public void testDiverseDocs() throws IOException {
-      Directory dir = newDirectory();
-      IndexWriter writer  = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
-                                                   .setRAMBufferSizeMB(0.5));
-      int n = atLeast(1);
-      for(int i=0;i<n;i++) {
-        // First, docs where every term is unique (heavy on
-        // Posting instances)
-        for(int j=0;j<100;j++) {
-          Document doc = new Document();
-          for(int k=0;k<100;k++) {
-            doc.add(newField("field", Integer.toString(random().nextInt()), storedTextType));
-          }
-          writer.addDocument(doc);
-        }
-
-        // Next, many single term docs where only one term
-        // occurs (heavy on byte blocks)
-        for(int j=0;j<100;j++) {
-          Document doc = new Document();
-          doc.add(newField("field", "aaa aaa aaa aaa aaa aaa aaa aaa aaa aaa", storedTextType));
-          writer.addDocument(doc);
-        }
-
-        // Next, many single term docs where only one term
-        // occurs but the terms are very long (heavy on
-        // char[] arrays)
-        for(int j=0;j<100;j++) {
-          StringBuilder b = new StringBuilder();
-          String x = Integer.toString(j) + ".";
-          for(int k=0;k<1000;k++)
-            b.append(x);
-          String longTerm = b.toString();
-
-          Document doc = new Document();
-          doc.add(newField("field", longTerm, storedTextType));
-          writer.addDocument(doc);
-        }
-      }
-      writer.close();
-
-      IndexReader reader = DirectoryReader.open(dir);
-      IndexSearcher searcher = newSearcher(reader);
-      int totalHits = searcher.search(new TermQuery(new Term("field", "aaa")), null, 1).totalHits;
-      assertEquals(n*100, totalHits);
-      reader.close();
-
-      dir.close();
-    }
-
     public void testEnablingNorms() throws IOException {
       Directory dir = newDirectory();
       IndexWriter writer  = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
@@ -1100,7 +1019,7 @@ public class TestIndexWriter extends Luc
             }
             IndexWriterConfig conf = newIndexWriterConfig(random,
                                                           Version.LATEST, new MockAnalyzer(random)).setMaxBufferedDocs(2);
-            conf.setInfoStream(log);
+            //conf.setInfoStream(log);
             w = new IndexWriter(dir, conf);
 
             Document doc = new Document();
@@ -1123,7 +1042,7 @@ public class TestIndexWriter extends Luc
               doc.add(sortedSetDVField);
             }
             for(int i=0;i<100;i++) {
-              log.println("\nTEST: i=" + i);
+              //log.println("\nTEST: i=" + i);
               idField.setStringValue(Integer.toString(i));
               if (defaultCodecSupportsDocValues()) {
                 binaryDVField.setBytesValue(new BytesRef(idField.stringValue()));
@@ -2148,51 +2067,7 @@ public class TestIndexWriter extends Luc
     dir.close();
   }
 
-  // LUCENE-4398
-  public void testRotatingFieldNames() throws Exception {
-    Directory dir = newFSDirectory(createTempDir("TestIndexWriter.testChangingFields"));
-    IndexWriterConfig iwc = new IndexWriterConfig(Version.LATEST, new MockAnalyzer(random()));
-    iwc.setRAMBufferSizeMB(0.2);
-    iwc.setMaxBufferedDocs(-1);
-    IndexWriter w = new IndexWriter(dir, iwc);
-    int upto = 0;
-
-    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
-    ft.setOmitNorms(true);
-
-    int firstDocCount = -1;
-    for(int iter=0;iter<10;iter++) {
-      final int startFlushCount = w.getFlushCount();
-      int docCount = 0;
-      while(w.getFlushCount() == startFlushCount) {
-        Document doc = new Document();
-        for(int i=0;i<10;i++) {
-          doc.add(new Field("field" + (upto++), "content", ft));
-        }
-        w.addDocument(doc);
-        docCount++;
-      }
 
-      if (VERBOSE) {
-        System.out.println("TEST: iter=" + iter + " flushed after docCount=" + docCount);
-      }
-
-      if (iter == 0) {
-        firstDocCount = docCount;
-      }
-
-      assertTrue("flushed after too few docs: first segment flushed at docCount=" + firstDocCount + ", but current segment flushed after docCount=" + docCount + "; iter=" + iter, ((float) docCount) / firstDocCount > 0.9);
-
-      if (upto > 5000) {
-        // Start re-using field names after a while
-        // ... important because otherwise we can OOME due
-        // to too many FieldInfo instances.
-        upto = 0;
-      }
-    }
-    w.close();
-    dir.close();
-  }
   
   // LUCENE-4575
   public void testCommitWithUserDataOnly() throws Exception {

Added: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestManyFields.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestManyFields.java?rev=1624179&view=auto
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestManyFields.java (added)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestManyFields.java Thu Sep 11 01:19:41 2014
@@ -0,0 +1,163 @@
+package org.apache.lucene.index;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.Version;
+
+/** Test that creates way, way, way too many fields */
+public class TestManyFields extends LuceneTestCase {
+  private static final FieldType storedTextType = new FieldType(TextField.TYPE_NOT_STORED);
+
+  public void testManyFields() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriter writer  = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
+                                                 .setMaxBufferedDocs(10));
+    for(int j=0;j<100;j++) {
+      Document doc = new Document();
+      doc.add(newField("a"+j, "aaa" + j, storedTextType));
+      doc.add(newField("b"+j, "aaa" + j, storedTextType));
+      doc.add(newField("c"+j, "aaa" + j, storedTextType));
+      doc.add(newField("d"+j, "aaa", storedTextType));
+      doc.add(newField("e"+j, "aaa", storedTextType));
+      doc.add(newField("f"+j, "aaa", storedTextType));
+      writer.addDocument(doc);
+    }
+    writer.close();
+
+    IndexReader reader = DirectoryReader.open(dir);
+    assertEquals(100, reader.maxDoc());
+    assertEquals(100, reader.numDocs());
+    for(int j=0;j<100;j++) {
+      assertEquals(1, reader.docFreq(new Term("a"+j, "aaa"+j)));
+      assertEquals(1, reader.docFreq(new Term("b"+j, "aaa"+j)));
+      assertEquals(1, reader.docFreq(new Term("c"+j, "aaa"+j)));
+      assertEquals(1, reader.docFreq(new Term("d"+j, "aaa")));
+      assertEquals(1, reader.docFreq(new Term("e"+j, "aaa")));
+      assertEquals(1, reader.docFreq(new Term("f"+j, "aaa")));
+    }
+    reader.close();
+    dir.close();
+  }
+  
+  public void testDiverseDocs() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriter writer  = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
+                                                 .setRAMBufferSizeMB(0.5));
+    int n = atLeast(1);
+    for(int i=0;i<n;i++) {
+      // First, docs where every term is unique (heavy on
+      // Posting instances)
+      for(int j=0;j<100;j++) {
+        Document doc = new Document();
+        for(int k=0;k<100;k++) {
+          doc.add(newField("field", Integer.toString(random().nextInt()), storedTextType));
+        }
+        writer.addDocument(doc);
+      }
+
+      // Next, many single term docs where only one term
+      // occurs (heavy on byte blocks)
+      for(int j=0;j<100;j++) {
+        Document doc = new Document();
+        doc.add(newField("field", "aaa aaa aaa aaa aaa aaa aaa aaa aaa aaa", storedTextType));
+        writer.addDocument(doc);
+      }
+
+      // Next, many single term docs where only one term
+      // occurs but the terms are very long (heavy on
+      // char[] arrays)
+      for(int j=0;j<100;j++) {
+        StringBuilder b = new StringBuilder();
+        String x = Integer.toString(j) + ".";
+        for(int k=0;k<1000;k++)
+          b.append(x);
+        String longTerm = b.toString();
+
+        Document doc = new Document();
+        doc.add(newField("field", longTerm, storedTextType));
+        writer.addDocument(doc);
+      }
+    }
+    writer.close();
+
+    IndexReader reader = DirectoryReader.open(dir);
+    IndexSearcher searcher = newSearcher(reader);
+    int totalHits = searcher.search(new TermQuery(new Term("field", "aaa")), null, 1).totalHits;
+    assertEquals(n*100, totalHits);
+    reader.close();
+
+    dir.close();
+  }
+
+  // LUCENE-4398
+  public void testRotatingFieldNames() throws Exception {
+    Directory dir = newFSDirectory(createTempDir("TestIndexWriter.testChangingFields"));
+    IndexWriterConfig iwc = new IndexWriterConfig(Version.LATEST, new MockAnalyzer(random()));
+    iwc.setRAMBufferSizeMB(0.2);
+    iwc.setMaxBufferedDocs(-1);
+    IndexWriter w = new IndexWriter(dir, iwc);
+    int upto = 0;
+
+    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
+    ft.setOmitNorms(true);
+
+    int firstDocCount = -1;
+    for(int iter=0;iter<10;iter++) {
+      final int startFlushCount = w.getFlushCount();
+      int docCount = 0;
+      while(w.getFlushCount() == startFlushCount) {
+        Document doc = new Document();
+        for(int i=0;i<10;i++) {
+          doc.add(new Field("field" + (upto++), "content", ft));
+        }
+        w.addDocument(doc);
+        docCount++;
+      }
+
+      if (VERBOSE) {
+        System.out.println("TEST: iter=" + iter + " flushed after docCount=" + docCount);
+      }
+
+      if (iter == 0) {
+        firstDocCount = docCount;
+      }
+
+      assertTrue("flushed after too few docs: first segment flushed at docCount=" + firstDocCount + ", but current segment flushed after docCount=" + docCount + "; iter=" + iter, ((float) docCount) / firstDocCount > 0.9);
+
+      if (upto > 5000) {
+        // Start re-using field names after a while
+        // ... important because otherwise we can OOME due
+        // to too many FieldInfo instances.
+        upto = 0;
+      }
+    }
+    w.close();
+    dir.close();
+  }
+}