You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/10/08 21:31:34 UTC

svn commit: r1395736 - in /lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext: SimpleTextStoredFieldsReader.java SimpleTextTermVectorsReader.java

Author: rmuir
Date: Mon Oct  8 19:31:33 2012
New Revision: 1395736

URL: http://svn.apache.org/viewvc?rev=1395736&view=rev
Log:
use less ram for SimpleText stored fields and vectors

Modified:
    lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java
    lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java

Modified: lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java?rev=1395736&r1=1395735&r2=1395736&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java (original)
+++ lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java Mon Oct  8 19:31:33 2012
@@ -18,7 +18,6 @@ package org.apache.lucene.codecs.simplet
  */
 
 import java.io.IOException;
-import java.util.ArrayList;
 
 import org.apache.lucene.codecs.StoredFieldsReader;
 import org.apache.lucene.index.FieldInfo;
@@ -46,7 +45,7 @@ import static org.apache.lucene.codecs.s
  * @lucene.experimental
  */
 public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
-  private ArrayList<Long> offsets; /* docid -> offset in .fld file */
+  private long offsets[]; /* docid -> offset in .fld file */
   private IndexInput in;
   private BytesRef scratch = new BytesRef();
   private CharsRef scratchUTF16 = new CharsRef();
@@ -65,11 +64,11 @@ public class SimpleTextStoredFieldsReade
         } catch (Throwable t) {} // ensure we throw our original exception
       }
     }
-    readIndex();
+    readIndex(si.getDocCount());
   }
   
   // used by clone
-  SimpleTextStoredFieldsReader(ArrayList<Long> offsets, IndexInput in, FieldInfos fieldInfos) {
+  SimpleTextStoredFieldsReader(long offsets[], IndexInput in, FieldInfos fieldInfos) {
     this.offsets = offsets;
     this.in = in;
     this.fieldInfos = fieldInfos;
@@ -78,19 +77,22 @@ public class SimpleTextStoredFieldsReade
   // we don't actually write a .fdx-like index, instead we read the 
   // stored fields file in entirety up-front and save the offsets 
   // so we can seek to the documents later.
-  private void readIndex() throws IOException {
-    offsets = new ArrayList<Long>();
+  private void readIndex(int size) throws IOException {
+    offsets = new long[size];
+    int upto = 0;
     while (!scratch.equals(END)) {
       readLine();
       if (StringHelper.startsWith(scratch, DOC)) {
-        offsets.add(in.getFilePointer());
+        offsets[upto] = in.getFilePointer();
+        upto++;
       }
     }
+    assert upto == offsets.length;
   }
   
   @Override
   public void visitDocument(int n, StoredFieldVisitor visitor) throws IOException {
-    in.seek(offsets.get(n));
+    in.seek(offsets[n]);
     readLine();
     assert StringHelper.startsWith(scratch, NUM);
     int numFields = parseIntAt(NUM.length);

Modified: lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java?rev=1395736&r1=1395735&r2=1395736&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java (original)
+++ lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java Mon Oct  8 19:31:33 2012
@@ -18,7 +18,6 @@ package org.apache.lucene.codecs.simplet
  */
 
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.Iterator;
@@ -54,7 +53,7 @@ import static org.apache.lucene.codecs.s
  * @lucene.experimental
  */
 public class SimpleTextTermVectorsReader extends TermVectorsReader {
-  private ArrayList<Long> offsets; /* docid -> offset in .vec file */
+  private long offsets[]; /* docid -> offset in .vec file */
   private IndexInput in;
   private BytesRef scratch = new BytesRef();
   private CharsRef scratchUTF16 = new CharsRef();
@@ -71,11 +70,11 @@ public class SimpleTextTermVectorsReader
         } catch (Throwable t) {} // ensure we throw our original exception
       }
     }
-    readIndex();
+    readIndex(si.getDocCount());
   }
   
   // used by clone
-  SimpleTextTermVectorsReader(ArrayList<Long> offsets, IndexInput in) {
+  SimpleTextTermVectorsReader(long offsets[], IndexInput in) {
     this.offsets = offsets;
     this.in = in;
   }
@@ -83,26 +82,29 @@ public class SimpleTextTermVectorsReader
   // we don't actually write a .tvx-like index, instead we read the 
   // vectors file in entirety up-front and save the offsets 
   // so we can seek to the data later.
-  private void readIndex() throws IOException {
-    offsets = new ArrayList<Long>();
+  private void readIndex(int maxDoc) throws IOException {
+    offsets = new long[maxDoc];
+    int upto = 0;
     while (!scratch.equals(END)) {
       readLine();
       if (StringHelper.startsWith(scratch, DOC)) {
-        offsets.add(in.getFilePointer());
+        offsets[upto] = in.getFilePointer();
+        upto++;
       }
     }
+    assert upto == offsets.length;
   }
   
   @Override
   public Fields get(int doc) throws IOException {
     // TestTV tests for this in testBadParams... but is this
     // really guaranteed by the API?
-    if (doc < 0 || doc >= offsets.size()) {
+    if (doc < 0 || doc >= offsets.length) {
       throw new IllegalArgumentException("doc id out of range");
     }
 
     SortedMap<String,SimpleTVTerms> fields = new TreeMap<String,SimpleTVTerms>();
-    in.seek(offsets.get(doc));
+    in.seek(offsets[doc]);
     readLine();
     assert StringHelper.startsWith(scratch, NUMFIELDS);
     int numFields = parseIntAt(NUMFIELDS.length);