You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/11/19 05:00:30 UTC
svn commit: r1411059 - in /lucene/dev/branches/lucene4547/lucene:
codecs/src/java/org/apache/lucene/codecs/simpletext/
core/src/java/org/apache/lucene/codecs/
core/src/java/org/apache/lucene/index/
core/src/java/org/apache/lucene/search/
Author: rmuir
Date: Mon Nov 19 04:00:29 2012
New Revision: 1411059
URL: http://svn.apache.org/viewvc?rev=1411059&view=rev
Log:
fieldcache=caching, codec=datastructure/encoding (TODO: fix sorted/numeric too)
Modified:
lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BinaryDocValuesConsumer.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/BinaryDocValues.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
Modified: lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java?rev=1411059&r1=1411058&r2=1411059&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java (original)
+++ lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java Mon Nov 19 04:00:29 2012
@@ -29,9 +29,7 @@ import java.util.Map;
import java.util.Set;
import org.apache.lucene.codecs.BinaryDocValuesConsumer;
-import org.apache.lucene.codecs.DocValuesArraySource;
import org.apache.lucene.codecs.NumericDocValuesConsumer;
-import org.apache.lucene.codecs.PerDocProducer;
import org.apache.lucene.codecs.SimpleDVConsumer;
import org.apache.lucene.codecs.SimpleDVProducer;
import org.apache.lucene.codecs.SimpleDocValuesFormat;
@@ -54,7 +52,6 @@ import org.apache.lucene.store.IndexOutp
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper;
-import org.apache.lucene.util.packed.PackedInts;
/**
@@ -70,6 +67,7 @@ public class SimpleTextSimpleDocValuesFo
final static BytesRef MINVALUE = new BytesRef(" minvalue ");
final static BytesRef PATTERN = new BytesRef(" pattern ");
// used for bytes
+ final static BytesRef FIXEDLENGTH = new BytesRef(" fixedlength ");
final static BytesRef MAXLENGTH = new BytesRef(" maxlength ");
final static BytesRef LENGTH = new BytesRef("length ");
// used for sorted bytes
@@ -103,6 +101,7 @@ public class SimpleTextSimpleDocValuesFo
* for bytes this is also a "fixed-width" file, for example:
* <pre>
* field myField
+ * fixedlength false
* maxlength 8
* pattern 0
* length 6
@@ -142,8 +141,7 @@ public class SimpleTextSimpleDocValuesFo
static class SimpleTextDocValuesWriter extends SimpleDVConsumer {
final IndexOutput data;
final BytesRef scratch = new BytesRef();
-
- final int numDocs; // for asserting
+ final int numDocs;
private final Set<String> fieldsSeen = new HashSet<String>(); // for asserting
SimpleTextDocValuesWriter(Directory dir, SegmentInfo si, IOContext context) throws IOException {
@@ -215,6 +213,10 @@ public class SimpleTextSimpleDocValuesFo
public BinaryDocValuesConsumer addBinaryField(FieldInfo field, boolean fixedLength, final int maxLength) throws IOException {
assert fieldSeen(field.name);
writeFieldEntry(field);
+ // write fixedlength
+ SimpleTextUtil.write(data, FIXEDLENGTH);
+ SimpleTextUtil.write(data, Boolean.toString(fixedLength), scratch);
+ SimpleTextUtil.writeNewline(data);
// write maxLength
SimpleTextUtil.write(data, MAXLENGTH);
SimpleTextUtil.write(data, Integer.toString(maxLength), scratch);
@@ -377,6 +379,7 @@ public class SimpleTextSimpleDocValuesFo
String pattern;
String ordPattern;
int maxLength;
+ boolean fixedLength;
long minValue;
int numValues;
};
@@ -420,6 +423,9 @@ public class SimpleTextSimpleDocValuesFo
data.seek(data.getFilePointer() + (1+field.pattern.length()) * maxDoc);
} else if (DocValues.isBytes(dvType)) {
readLine();
+ assert startsWith(FIXEDLENGTH);
+ field.fixedLength = Boolean.parseBoolean(stripPrefix(FIXEDLENGTH));
+ readLine();
assert startsWith(MAXLENGTH);
field.maxLength = Integer.parseInt(stripPrefix(MAXLENGTH));
readLine();
@@ -529,6 +535,21 @@ public class SimpleTextSimpleDocValuesFo
throw new RuntimeException(ioe);
}
}
+
+ @Override
+ public int size() {
+ return maxDoc;
+ }
+
+ @Override
+ public boolean isFixedLength() {
+ return field.fixedLength;
+ }
+
+ @Override
+ public int maxLength() {
+ return field.maxLength;
+ }
};
}
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BinaryDocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BinaryDocValuesConsumer.java?rev=1411059&r1=1411058&r2=1411059&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BinaryDocValuesConsumer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BinaryDocValuesConsumer.java Mon Nov 19 04:00:29 2012
@@ -21,8 +21,6 @@ import java.io.IOException;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.BinaryDocValues;
-import org.apache.lucene.index.DocValues.Source;
-import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@@ -40,7 +38,7 @@ public abstract class BinaryDocValuesCon
BinaryDocValues source = reader.getBinaryDocValues(mergeState.fieldInfo.name);
if (source == null) {
- source = BinaryDocValues.DEFAULT;
+ source = new BinaryDocValues.EMPTY(maxDoc);
}
for (int i = 0; i < maxDoc; i++) {
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java?rev=1411059&r1=1411058&r2=1411059&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java Mon Nov 19 04:00:29 2012
@@ -104,6 +104,7 @@ public abstract class SimpleDVConsumer i
// dead simple impl: codec can optimize
protected void mergeBinaryField(MergeState mergeState) throws IOException {
// first compute fixedLength and maxLength of live ones to be merged.
+ // nocommit: messy, and can be simplified by using docValues.maxLength/fixedLength in many cases.
boolean fixedLength = true;
int maxLength = -1;
BytesRef bytes = new BytesRef();
@@ -112,7 +113,7 @@ public abstract class SimpleDVConsumer i
final Bits liveDocs = reader.getLiveDocs();
BinaryDocValues docValues = reader.getBinaryDocValues(mergeState.fieldInfo.name);
if (docValues == null) {
- docValues = BinaryDocValues.DEFAULT;
+ docValues = new BinaryDocValues.EMPTY(maxDoc);
}
for (int i = 0; i < maxDoc; i++) {
if (liveDocs == null || liveDocs.get(i)) {
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/BinaryDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/BinaryDocValues.java?rev=1411059&r1=1411058&r2=1411059&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/BinaryDocValues.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/BinaryDocValues.java Mon Nov 19 04:00:29 2012
@@ -23,11 +23,77 @@ import org.apache.lucene.util.BytesRef;
public abstract class BinaryDocValues {
// nocommit throws IOE or not?
public abstract void get(int docID, BytesRef result);
+
+ public abstract int size();
+
+ public abstract boolean isFixedLength();
+ public abstract int maxLength();
+
+ public BinaryDocValues newRAMInstance() {
+ // TODO: optimize this default impl with e.g. isFixedLength/maxLength and so on
+ // nocommit used packed ints/pagedbytes and so on
+ final int maxDoc = size();
+ final int maxLength = maxLength();
+ final boolean fixedLength = isFixedLength();
+ final byte[][] values = new byte[maxDoc][];
+ BytesRef scratch = new BytesRef();
+ for(int docID=0;docID<maxDoc;docID++) {
+ get(docID, scratch);
+ values[docID] = new byte[scratch.length];
+ System.arraycopy(scratch.bytes, scratch.offset, values[docID], 0, scratch.length);
+ }
+
+ return new BinaryDocValues() {
- public static final BinaryDocValues DEFAULT = new BinaryDocValues() {
@Override
- public void get(int docID, BytesRef ret) {
- ret.length = 0;
+ public void get(int docID, BytesRef result) {
+ result.bytes = values[docID];
+ result.offset = 0;
+ result.length = result.bytes.length;
+ }
+
+ @Override
+ public int size() {
+ return maxDoc;
+ }
+
+ @Override
+ public boolean isFixedLength() {
+ return fixedLength;
+ }
+
+ @Override
+ public int maxLength() {
+ return maxLength;
}
};
+ }
+
+ public static class EMPTY extends BinaryDocValues {
+ private final int size;
+
+ public EMPTY(int size) {
+ this.size = size;
+ }
+
+ @Override
+ public void get(int docID, BytesRef result) {
+ result.length = 0;
+ }
+
+ @Override
+ public int size() {
+ return size;
+ }
+
+ @Override
+ public boolean isFixedLength() {
+ return true;
+ }
+
+ @Override
+ public int maxLength() {
+ return 0;
+ }
+ };
}
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java?rev=1411059&r1=1411058&r2=1411059&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java Mon Nov 19 04:00:29 2012
@@ -1334,22 +1334,15 @@ class FieldCacheImpl implements FieldCac
protected Object createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField /* ignored */)
throws IOException {
- final int maxDoc = reader.maxDoc();
BinaryDocValues valuesIn = reader.getBinaryDocValues(key.field);
if (valuesIn != null) {
- // nocommit used packed ints like below!
- final byte[][] values = new byte[maxDoc][];
- BytesRef scratch = new BytesRef();
- for(int docID=0;docID<maxDoc;docID++) {
- valuesIn.get(docID, scratch);
- values[docID] = new byte[scratch.length];
- System.arraycopy(scratch.bytes, scratch.offset, values[docID], 0, scratch.length);
- }
-
+ final BinaryDocValues ramInstance = valuesIn.newRAMInstance();
return new DocTerms() {
+
@Override
- public int size() {
- return maxDoc;
+ public BytesRef getTerm(int docID, BytesRef ret) {
+ ramInstance.get(docID, ret);
+ return ret;
}
@Override
@@ -1359,15 +1352,12 @@ class FieldCacheImpl implements FieldCac
}
@Override
- public BytesRef getTerm(int docID, BytesRef ret) {
- ret.bytes = values[docID];
- ret.length = ret.bytes.length;
- ret.offset = 0;
- return ret;
- }
+ public int size() {
+ return ramInstance.size();
+ }
};
} else {
-
+ final int maxDoc = reader.maxDoc();
Terms terms = reader.terms(key.field);
final float acceptableOverheadRatio = ((Float) key.custom).floatValue();