You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/10/08 21:31:34 UTC
svn commit: r1395736 - in
/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext:
SimpleTextStoredFieldsReader.java SimpleTextTermVectorsReader.java
Author: rmuir
Date: Mon Oct 8 19:31:33 2012
New Revision: 1395736
URL: http://svn.apache.org/viewvc?rev=1395736&view=rev
Log:
use less ram for SimpleText stored fields and vectors
Modified:
lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java
lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
Modified: lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java?rev=1395736&r1=1395735&r2=1395736&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java (original)
+++ lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java Mon Oct 8 19:31:33 2012
@@ -18,7 +18,6 @@ package org.apache.lucene.codecs.simplet
*/
import java.io.IOException;
-import java.util.ArrayList;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.index.FieldInfo;
@@ -46,7 +45,7 @@ import static org.apache.lucene.codecs.s
* @lucene.experimental
*/
public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
- private ArrayList<Long> offsets; /* docid -> offset in .fld file */
+ private long offsets[]; /* docid -> offset in .fld file */
private IndexInput in;
private BytesRef scratch = new BytesRef();
private CharsRef scratchUTF16 = new CharsRef();
@@ -65,11 +64,11 @@ public class SimpleTextStoredFieldsReade
} catch (Throwable t) {} // ensure we throw our original exception
}
}
- readIndex();
+ readIndex(si.getDocCount());
}
// used by clone
- SimpleTextStoredFieldsReader(ArrayList<Long> offsets, IndexInput in, FieldInfos fieldInfos) {
+ SimpleTextStoredFieldsReader(long offsets[], IndexInput in, FieldInfos fieldInfos) {
this.offsets = offsets;
this.in = in;
this.fieldInfos = fieldInfos;
@@ -78,19 +77,22 @@ public class SimpleTextStoredFieldsReade
// we don't actually write a .fdx-like index, instead we read the
// stored fields file in entirety up-front and save the offsets
// so we can seek to the documents later.
- private void readIndex() throws IOException {
- offsets = new ArrayList<Long>();
+ private void readIndex(int size) throws IOException {
+ offsets = new long[size];
+ int upto = 0;
while (!scratch.equals(END)) {
readLine();
if (StringHelper.startsWith(scratch, DOC)) {
- offsets.add(in.getFilePointer());
+ offsets[upto] = in.getFilePointer();
+ upto++;
}
}
+ assert upto == offsets.length;
}
@Override
public void visitDocument(int n, StoredFieldVisitor visitor) throws IOException {
- in.seek(offsets.get(n));
+ in.seek(offsets[n]);
readLine();
assert StringHelper.startsWith(scratch, NUM);
int numFields = parseIntAt(NUM.length);
Modified: lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java?rev=1395736&r1=1395735&r2=1395736&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java (original)
+++ lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java Mon Oct 8 19:31:33 2012
@@ -18,7 +18,6 @@ package org.apache.lucene.codecs.simplet
*/
import java.io.IOException;
-import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
@@ -54,7 +53,7 @@ import static org.apache.lucene.codecs.s
* @lucene.experimental
*/
public class SimpleTextTermVectorsReader extends TermVectorsReader {
- private ArrayList<Long> offsets; /* docid -> offset in .vec file */
+ private long offsets[]; /* docid -> offset in .vec file */
private IndexInput in;
private BytesRef scratch = new BytesRef();
private CharsRef scratchUTF16 = new CharsRef();
@@ -71,11 +70,11 @@ public class SimpleTextTermVectorsReader
} catch (Throwable t) {} // ensure we throw our original exception
}
}
- readIndex();
+ readIndex(si.getDocCount());
}
// used by clone
- SimpleTextTermVectorsReader(ArrayList<Long> offsets, IndexInput in) {
+ SimpleTextTermVectorsReader(long offsets[], IndexInput in) {
this.offsets = offsets;
this.in = in;
}
@@ -83,26 +82,29 @@ public class SimpleTextTermVectorsReader
// we don't actually write a .tvx-like index, instead we read the
// vectors file in entirety up-front and save the offsets
// so we can seek to the data later.
- private void readIndex() throws IOException {
- offsets = new ArrayList<Long>();
+ private void readIndex(int maxDoc) throws IOException {
+ offsets = new long[maxDoc];
+ int upto = 0;
while (!scratch.equals(END)) {
readLine();
if (StringHelper.startsWith(scratch, DOC)) {
- offsets.add(in.getFilePointer());
+ offsets[upto] = in.getFilePointer();
+ upto++;
}
}
+ assert upto == offsets.length;
}
@Override
public Fields get(int doc) throws IOException {
// TestTV tests for this in testBadParams... but is this
// really guaranteed by the API?
- if (doc < 0 || doc >= offsets.size()) {
+ if (doc < 0 || doc >= offsets.length) {
throw new IllegalArgumentException("doc id out of range");
}
SortedMap<String,SimpleTVTerms> fields = new TreeMap<String,SimpleTVTerms>();
- in.seek(offsets.get(doc));
+ in.seek(offsets[doc]);
readLine();
assert StringHelper.startsWith(scratch, NUMFIELDS);
int numFields = parseIntAt(NUMFIELDS.length);