You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by yo...@apache.org on 2006/12/11 03:38:31 UTC

svn commit: r485469 - in /lucene/java/trunk: CHANGES.txt src/java/org/apache/lucene/index/SegmentMerger.java

Author: yonik
Date: Sun Dec 10 18:38:29 2006
New Revision: 485469

URL: http://svn.apache.org/viewvc?view=rev&rev=485469
Log:
Performance improvement for SegmentMerger.mergeNorms: LUCENE-739

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?view=diff&rev=485469&r1=485468&r2=485469
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Sun Dec 10 18:38:29 2006
@@ -305,6 +305,11 @@
      TermDocs for a MultiReader.  The old implementation could
      recurse up to the number of segments in the index. (Yonik Seeley)
 
+ 14. LUCENE-739: Improve segment merging performance by reusing
+     the norm array across different fields and doing bulk writes
+     of norms of segments with no deleted docs.
+    (Michael Busch via Yonik Seeley)
+
 Test Cases
   1. Added TestTermScorer.java (Grant Ingersoll)
   2. Added TestWindowsMMap.java (Benson Margulies via Mike McCandless)

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java?view=diff&rev=485469&r1=485468&r2=485469
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java Sun Dec 10 18:38:29 2006
@@ -407,6 +407,7 @@
   }
 
   private void mergeNorms() throws IOException {
+    byte[] normBuffer = null;
     for (int i = 0; i < fieldInfos.size(); i++) {
       FieldInfo fi = fieldInfos.fieldInfo(i);
       if (fi.isIndexed && !fi.omitNorms) {
@@ -415,11 +416,21 @@
           for (int j = 0; j < readers.size(); j++) {
             IndexReader reader = (IndexReader) readers.elementAt(j);
             int maxDoc = reader.maxDoc();
-            byte[] input = new byte[maxDoc];
-            reader.norms(fi.name, input, 0);
-            for (int k = 0; k < maxDoc; k++) {
-              if (!reader.isDeleted(k)) {
-                output.writeByte(input[k]);
+            if (normBuffer == null || normBuffer.length < maxDoc) {
+              // the buffer is too small for the current segment
+              normBuffer = new byte[maxDoc];
+            }
+            reader.norms(fi.name, normBuffer, 0);
+            if (!reader.hasDeletions()) {
+              //optimized case for segments without deleted docs
+              output.writeBytes(normBuffer, maxDoc);
+            } else {
+              // this segment has deleted docs, so we have to
+              // check for every doc if it is deleted or not
+              for (int k = 0; k < maxDoc; k++) {
+                if (!reader.isDeleted(k)) {
+                  output.writeByte(normBuffer[k]);
+                }
               }
             }
           }