You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2012/01/16 15:57:17 UTC
svn commit: r1232014 [1/2] - in /lucene/dev/trunk: lucene/
lucene/contrib/memory/src/java/org/apache/lucene/index/memory/
lucene/contrib/misc/src/java/org/apache/lucene/misc/
lucene/contrib/misc/src/test/org/apache/lucene/misc/
lucene/src/java/org/apac...
Author: simonw
Date: Mon Jan 16 14:57:15 2012
New Revision: 1232014
URL: http://svn.apache.org/viewvc?rev=1232014&view=rev
Log:
LUCENE-3687: Allow similarity to encode norms other than a single byte
Added:
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosFormat.java
- copied, changed from r1229530, lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosFormat.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosReader.java
- copied, changed from r1229530, lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/Norm.java
lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWFieldInfosFormat.java
lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWFieldInfosReader.java
- copied, changed from r1229530, lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java
lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWFieldInfosWriter.java
- copied, changed from r1229530, lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java
lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWFieldsWriter.java
- copied, changed from r1231643, lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexFieldsWriter.java
lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWNormsConsumer.java
- copied, changed from r1231643, lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexNormsConsumer.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestCustomNorms.java
Removed:
lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexFieldsWriter.java
lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexNormsConsumer.java
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/MIGRATE.txt
lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndexNormDocValues.java
lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java
lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/misc/SweetSpotSimilarityTest.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/PerDocConsumer.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsProducer.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsConsumer.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsProducer.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/Field.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfo.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocValues.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/NormsConsumer.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/NormsConsumerPerField.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/similarities/BM25Similarity.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/similarities/DefaultSimilarity.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/similarities/MultiSimilarity.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/similarities/Similarity.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/similarities/SimilarityBase.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/similarities/package.html
lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java
lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWNormsFormat.java
lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWPostingsFormat.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestFieldInfos.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestNorms.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestOmitTf.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestUniqueTermCount.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/JustCompileSearch.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestDocValuesScoring.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestSimilarity.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestSimilarityProvider.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/function/TestFunctionQuery.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1232014&r1=1232013&r2=1232014&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Mon Jan 16 14:57:15 2012
@@ -622,6 +622,11 @@ New features
* LUCENE-3628: Norms are represented as DocValues. IndexReader exposes
a #normValues(String) method to obtain norms per field. (Simon Willnauer)
+* LUCENE-3687: Similarity#computeNorm(FieldInvertState, Norm) allows to compute
+ norm values or arbitrary precision. Instead of returning a fixed single byte
+ value, custom similarities can now set a integer, float or byte value to the
+ given Norm object. (Simon Willnauer)
+
Optimizations
* LUCENE-2588: Don't store unnecessary suffixes when writing the terms
Modified: lucene/dev/trunk/lucene/MIGRATE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/MIGRATE.txt?rev=1232014&r1=1232013&r2=1232014&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/MIGRATE.txt (original)
+++ lucene/dev/trunk/lucene/MIGRATE.txt Mon Jan 16 14:57:15 2012
@@ -550,3 +550,7 @@ you can now do this:
IndexReader.openIfChanged (a static method), and now returns null
(instead of the old reader) if there are no changes to the index, to
prevent the common pitfall of accidentally closing the old reader.
+
+* LUCENE-3687: Similarity#computeNorm() now expects a Norm object to set the computed
+ norm value instead of returning a fixed single byte value. Custom similarities can now
+ set integer, float and byte values if a single byte is not sufficient.
Modified: lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=1232014&r1=1232013&r2=1232014&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Mon Jan 16 14:57:15 2012
@@ -33,6 +33,7 @@ import org.apache.lucene.analysis.tokena
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.document.Document;
+import org.apache.lucene.index.Norm;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
@@ -48,7 +49,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.index.memory.MemoryIndexNormDocValues.SingleByteSource;
+import org.apache.lucene.index.memory.MemoryIndexNormDocValues.SingleValueSource;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
@@ -1157,8 +1158,9 @@ public class MemoryIndex {
int numOverlapTokens = info != null ? info.numOverlapTokens : 0;
float boost = info != null ? info.getBoost() : 1.0f;
FieldInvertState invertState = new FieldInvertState(0, numTokens, numOverlapTokens, 0, boost);
- byte norm = fieldSim.computeNorm(invertState);
- SingleByteSource singleByteSource = new SingleByteSource(new byte[] {norm});
+ Norm norm = new Norm();
+ fieldSim.computeNorm(invertState, norm);
+ SingleValueSource singleByteSource = new SingleValueSource(norm);
norms = new MemoryIndexNormDocValues(singleByteSource);
// cache it for future reuse
cachedNormValues = norms;
Modified: lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndexNormDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndexNormDocValues.java?rev=1232014&r1=1232013&r2=1232014&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndexNormDocValues.java (original)
+++ lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndexNormDocValues.java Mon Jan 16 14:57:15 2012
@@ -17,6 +17,7 @@ package org.apache.lucene.index.memory;
*/
import java.io.IOException;
+import org.apache.lucene.index.Norm;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.util.BytesRef;
@@ -51,21 +52,56 @@ class MemoryIndexNormDocValues extends D
return 1;
}
- public static class SingleByteSource extends Source {
+ public static class SingleValueSource extends Source {
- private final byte[] bytes;
+ private final Number numericValue;
+ private final BytesRef binaryValue;
- protected SingleByteSource(byte[] bytes) {
- super(Type.BYTES_FIXED_STRAIGHT);
- this.bytes = bytes;
+ protected SingleValueSource(Norm norm) {
+ super(norm.type());
+ this.numericValue = norm.field().numericValue();
+ this.binaryValue = norm.field().binaryValue();
+ }
+
+ @Override
+ public long getInt(int docID) {
+ switch (type) {
+ case FIXED_INTS_16:
+ case FIXED_INTS_32:
+ case FIXED_INTS_64:
+ case FIXED_INTS_8:
+ case VAR_INTS:
+ assert numericValue != null;
+ return numericValue.longValue();
+ }
+ return super.getInt(docID);
+ }
+
+ @Override
+ public double getFloat(int docID) {
+ switch (type) {
+ case FLOAT_32:
+ case FLOAT_64:
+ assert numericValue != null;
+ return numericValue.floatValue();
+ }
+ return super.getFloat(docID);
}
@Override
public BytesRef getBytes(int docID, BytesRef ref) {
- ref.bytes = bytes;
- ref.offset = docID;
- ref.length = 1;
- return ref;
+ switch (type) {
+ case BYTES_FIXED_DEREF:
+ case BYTES_FIXED_SORTED:
+ case BYTES_FIXED_STRAIGHT:
+ case BYTES_VAR_DEREF:
+ case BYTES_VAR_SORTED:
+ case BYTES_VAR_STRAIGHT:
+ assert binaryValue != null;
+ ref.copyBytes(binaryValue);
+ return ref;
+ }
+ return super.getBytes(docID, ref);
}
@Override
@@ -75,9 +111,33 @@ class MemoryIndexNormDocValues extends D
@Override
public Object getArray() {
- return bytes;
+ switch (type) {
+ case BYTES_FIXED_DEREF:
+ case BYTES_FIXED_SORTED:
+ case BYTES_FIXED_STRAIGHT:
+ case BYTES_VAR_DEREF:
+ case BYTES_VAR_SORTED:
+ case BYTES_VAR_STRAIGHT:
+ return binaryValue.bytes;
+ case FIXED_INTS_16:
+ return new short[] { numericValue.shortValue() };
+ case FIXED_INTS_32:
+ return new int[] { numericValue.intValue() };
+ case FIXED_INTS_64:
+ return new long[] { numericValue.longValue() };
+ case FIXED_INTS_8:
+ return new byte[] { numericValue.byteValue() };
+ case VAR_INTS:
+ return new long[] { numericValue.longValue() };
+ case FLOAT_32:
+ return new float[] { numericValue.floatValue() };
+ case FLOAT_64:
+ return new double[] { numericValue.doubleValue() };
+ default:
+ throw new IllegalArgumentException("unknown type " + type);
+ }
+
}
-
}
}
Modified: lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java?rev=1232014&r1=1232013&r2=1232014&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java (original)
+++ lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java Mon Jan 16 14:57:15 2012
@@ -19,6 +19,7 @@ package org.apache.lucene.misc;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.index.FieldInvertState;
+import org.apache.lucene.index.Norm;
/**
* A similarity with a lengthNorm that provides for a "plateau" of
@@ -106,7 +107,7 @@ public class SweetSpotSimilarity extends
* discountOverlaps is true by default or true for this
* specific field. */
@Override
- public byte computeNorm(FieldInvertState state) {
+ public void computeNorm(FieldInvertState state, Norm norm) {
final int numTokens;
if (discountOverlaps)
@@ -114,7 +115,7 @@ public class SweetSpotSimilarity extends
else
numTokens = state.getLength();
- return encodeNormValue(state.getBoost() * computeLengthNorm(numTokens));
+ norm.setByte(encodeNormValue(state.getBoost() * computeLengthNorm(numTokens)));
}
/**
Modified: lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/misc/SweetSpotSimilarityTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/misc/SweetSpotSimilarityTest.java?rev=1232014&r1=1232013&r2=1232014&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/misc/SweetSpotSimilarityTest.java (original)
+++ lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/misc/SweetSpotSimilarityTest.java Mon Jan 16 14:57:15 2012
@@ -24,12 +24,24 @@ import org.apache.lucene.search.similari
import org.apache.lucene.search.similarities.SimilarityProvider;
import org.apache.lucene.search.similarities.TFIDFSimilarity;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.index.Norm;
import org.apache.lucene.index.FieldInvertState;
+
/**
* Test of the SweetSpotSimilarity
*/
public class SweetSpotSimilarityTest extends LuceneTestCase {
+
+ public static float computeAndDecodeNorm(SweetSpotSimilarity decode, Similarity encode, FieldInvertState state) {
+ return decode.decodeNormValue(computeAndGetNorm(encode, state));
+ }
+
+ public static byte computeAndGetNorm(Similarity s, FieldInvertState state) {
+ Norm norm = new Norm();
+ s.computeNorm(state, norm);
+ return norm.field().numericValue().byteValue();
+ }
public void testSweetSpotComputeNorm() {
@@ -45,9 +57,13 @@ public class SweetSpotSimilarityTest ext
invertState.setBoost(1.0f);
for (int i = 1; i < 1000; i++) {
invertState.setLength(i);
+ Norm lNorm = new Norm();
+ Norm rNorm = new Norm();
+ d.computeNorm(invertState, lNorm);
+ s.computeNorm(invertState, rNorm);
assertEquals("base case: i="+i,
- d.computeNorm(invertState),
- s.computeNorm(invertState),
+ computeAndGetNorm(d, invertState),
+ computeAndGetNorm(s, invertState),
0.0f);
}
@@ -59,15 +75,15 @@ public class SweetSpotSimilarityTest ext
invertState.setLength(i);
assertEquals("3,10: spot i="+i,
1.0f,
- ss.decodeNormValue(s.computeNorm(invertState)),
+ computeAndDecodeNorm(ss, ss, invertState),
0.0f);
}
for (int i = 10; i < 1000; i++) {
invertState.setLength(i-9);
- final byte normD = d.computeNorm(invertState);
+ final byte normD = computeAndGetNorm(d, invertState);
invertState.setLength(i);
- final byte normS = s.computeNorm(invertState);
+ final byte normS = computeAndGetNorm(s, invertState);
assertEquals("3,10: 10<x : i="+i,
normD,
normS,
@@ -105,14 +121,14 @@ public class SweetSpotSimilarityTest ext
invertState.setLength(i);
assertEquals("f: 3,10: spot i="+i,
1.0f,
- ss.decodeNormValue(sp.get("foo").computeNorm(invertState)),
+ computeAndDecodeNorm(ss, sp.get("foo"), invertState),
0.0f);
}
for (int i = 10; i < 1000; i++) {
invertState.setLength(i-9);
- final byte normD = d.computeNorm(invertState);
+ final byte normD = computeAndGetNorm(d, invertState);
invertState.setLength(i);
- final byte normS = sp.get("foo").computeNorm(invertState);
+ final byte normS = computeAndGetNorm(sp.get("foo"), invertState);
assertEquals("f: 3,10: 10<x : i="+i,
normD,
normS,
@@ -122,21 +138,21 @@ public class SweetSpotSimilarityTest ext
invertState.setLength(i);
assertEquals("f: 8,13: spot i="+i,
1.0f,
- ss.decodeNormValue(sp.get("bar").computeNorm(invertState)),
+ computeAndDecodeNorm(ss, sp.get("bar"), invertState),
0.0f);
}
for (int i = 6; i <=9; i++) {
invertState.setLength(i);
assertEquals("f: 6,9: spot i="+i,
1.0f,
- ss.decodeNormValue(sp.get("yak").computeNorm(invertState)),
+ computeAndDecodeNorm(ss, sp.get("yak"), invertState),
0.0f);
}
for (int i = 13; i < 1000; i++) {
invertState.setLength(i-12);
- final byte normD = d.computeNorm(invertState);
+ final byte normD = computeAndGetNorm(d, invertState);
invertState.setLength(i);
- final byte normS = sp.get("bar").computeNorm(invertState);
+ final byte normS = computeAndGetNorm(sp.get("bar"), invertState);
assertEquals("f: 8,13: 13<x : i="+i,
normD,
normS,
@@ -144,9 +160,9 @@ public class SweetSpotSimilarityTest ext
}
for (int i = 9; i < 1000; i++) {
invertState.setLength(i-8);
- final byte normD = d.computeNorm(invertState);
+ final byte normD = computeAndGetNorm(d, invertState);
invertState.setLength(i);
- final byte normS = sp.get("yak").computeNorm(invertState);
+ final byte normS = computeAndGetNorm(sp.get("yak"), invertState);
assertEquals("f: 6,9: 9<x : i="+i,
normD,
normS,
@@ -158,8 +174,8 @@ public class SweetSpotSimilarityTest ext
for (int i = 9; i < 1000; i++) {
invertState.setLength(i);
- final byte normSS = sp.get("a").computeNorm(invertState);
- final byte normS = sp.get("b").computeNorm(invertState);
+ final byte normSS = computeAndGetNorm(sp.get("a"), invertState);
+ final byte normS = computeAndGetNorm(sp.get("b"), invertState);
assertTrue("s: i="+i+" : a="+normSS+
" < b="+normS,
normSS < normS);
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/DocValuesConsumer.java?rev=1232014&r1=1232013&r2=1232014&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/DocValuesConsumer.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/DocValuesConsumer.java Mon Jan 16 14:57:15 2012
@@ -115,23 +115,13 @@ public abstract class DocValuesConsumer
final Field scratchField;
switch(type) {
case VAR_INTS:
- scratchField = new DocValuesField("", (long) 0, type);
- break;
case FIXED_INTS_16:
- scratchField = new DocValuesField("", (short) 0, type);
- break;
case FIXED_INTS_32:
- scratchField = new DocValuesField("", 0, type);
- break;
case FIXED_INTS_64:
- scratchField = new DocValuesField("", (long) 0, type);
- break;
case FIXED_INTS_8:
- scratchField = new DocValuesField("", (byte) 0, type);
+ scratchField = new DocValuesField("", (long) 0, type);
break;
case FLOAT_32:
- scratchField = new DocValuesField("", (float) 0, type);
- break;
case FLOAT_64:
scratchField = new DocValuesField("", (double) 0, type);
break;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/PerDocConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/PerDocConsumer.java?rev=1232014&r1=1232013&r2=1232014&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/PerDocConsumer.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/PerDocConsumer.java Mon Jan 16 14:57:15 2012
@@ -52,7 +52,10 @@ public abstract class PerDocConsumer imp
for (int i = 0; i < docValues.length; i++) {
docValues[i] = getDocValuesForMerge(mergeState.readers.get(i).reader, fieldInfo);
}
- final DocValuesConsumer docValuesConsumer = addValuesField(getDocValuesType(fieldInfo), fieldInfo);
+ Type docValuesType = getDocValuesType(fieldInfo);
+ assert docValuesType != null;
+
+ final DocValuesConsumer docValuesConsumer = addValuesField(docValuesType, fieldInfo);
assert docValuesConsumer != null;
docValuesConsumer.merge(mergeState, docValues);
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java?rev=1232014&r1=1232013&r2=1232014&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java Mon Jan 16 14:57:15 2012
@@ -30,7 +30,6 @@ import org.apache.lucene.codecs.Postings
import org.apache.lucene.codecs.SegmentInfosFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
-import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat;
import org.apache.lucene.codecs.lucene40.Lucene40SegmentInfosFormat;
import org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat;
@@ -56,7 +55,7 @@ public class Lucene3xCodec extends Codec
private final TermVectorsFormat vectorsFormat = new Lucene40TermVectorsFormat();
// TODO: this should really be a different impl
- private final FieldInfosFormat fieldInfosFormat = new Lucene40FieldInfosFormat();
+ private final FieldInfosFormat fieldInfosFormat = new Lucene3xFieldInfosFormat();
// TODO: this should really be a different impl
// also if we want preflex to *really* be read-only it should throw exception for the writer?
Copied: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosFormat.java (from r1229530, lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosFormat.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosFormat.java?p2=lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosFormat.java&p1=lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosFormat.java&r1=1229530&r2=1232014&rev=1232014&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosFormat.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosFormat.java Mon Jan 16 14:57:15 2012
@@ -1,4 +1,4 @@
-package org.apache.lucene.codecs.lucene40;
+package org.apache.lucene.codecs.lucene3x;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -29,9 +29,8 @@ import org.apache.lucene.store.Directory
/**
* @lucene.experimental
*/
-public class Lucene40FieldInfosFormat extends FieldInfosFormat {
- private final FieldInfosReader reader = new Lucene40FieldInfosReader();
- private final FieldInfosWriter writer = new Lucene40FieldInfosWriter();
+public class Lucene3xFieldInfosFormat extends FieldInfosFormat {
+ private final FieldInfosReader reader = new Lucene3xFieldInfosReader();
@Override
public FieldInfosReader getFieldInfosReader() throws IOException {
@@ -40,11 +39,11 @@ public class Lucene40FieldInfosFormat ex
@Override
public FieldInfosWriter getFieldInfosWriter() throws IOException {
- return writer;
+ throw new IllegalArgumentException("this codec can only be used for reading");
}
@Override
public void files(Directory dir, SegmentInfo info, Set<String> files) throws IOException {
- Lucene40FieldInfosReader.files(dir, info, files);
+ Lucene3xFieldInfosReader.files(dir, info, files);
}
}
Copied: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosReader.java (from r1229530, lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosReader.java?p2=lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosReader.java&p1=lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java&r1=1229530&r2=1232014&rev=1232014&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosReader.java Mon Jan 16 14:57:15 2012
@@ -1,22 +1,4 @@
-package org.apache.lucene.codecs.lucene40;
-
-import java.io.IOException;
-import java.util.Set;
-
-import org.apache.lucene.codecs.FieldInfosReader;
-import org.apache.lucene.index.CorruptIndexException;
-import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.FieldInfos;
-import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.IndexFormatTooNewException;
-import org.apache.lucene.index.IndexFormatTooOldException;
-import org.apache.lucene.index.SegmentInfo;
-import org.apache.lucene.index.FieldInfo.IndexOptions;
-import org.apache.lucene.index.DocValues;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IOContext;
-import org.apache.lucene.store.IndexInput;
-
+package org.apache.lucene.codecs.lucene3x;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -33,17 +15,46 @@ import org.apache.lucene.store.IndexInpu
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+import java.io.IOException;
+import java.util.Set;
+
+import org.apache.lucene.codecs.FieldInfosReader;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.DocValues.Type;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.IndexFormatTooNewException;
+import org.apache.lucene.index.IndexFormatTooOldException;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
/**
* @lucene.experimental
*/
-public class Lucene40FieldInfosReader extends FieldInfosReader {
-
- static final int FORMAT_MINIMUM = Lucene40FieldInfosWriter.FORMAT_START;
+public class Lucene3xFieldInfosReader extends FieldInfosReader {
+ /** Extension of field infos */
+ static final String FIELD_INFOS_EXTENSION = "fnm";
+
+ // First used in 2.9; prior to 2.9 there was no format header
+ static final int FORMAT_START = -2;
+ // First used in 3.4: omit only positional information
+ static final int FORMAT_OMIT_POSITIONS = -3;
+ static final int FORMAT_MINIMUM = FORMAT_START;
+ static final int FORMAT_CURRENT = FORMAT_OMIT_POSITIONS;
+ static final byte IS_INDEXED = 0x1;
+ static final byte STORE_TERMVECTOR = 0x2;
+ static final byte OMIT_NORMS = 0x10;
+ static final byte STORE_PAYLOADS = 0x20;
+ static final byte OMIT_TERM_FREQ_AND_POSITIONS = 0x40;
+ static final byte OMIT_POSITIONS = -128;
@Override
public FieldInfos read(Directory directory, String segmentName, IOContext iocontext) throws IOException {
- final String fileName = IndexFileNames.segmentFileName(segmentName, "", Lucene40FieldInfosWriter.FIELD_INFOS_EXTENSION);
+ final String fileName = IndexFileNames.segmentFileName(segmentName, "", FIELD_INFOS_EXTENSION);
IndexInput input = directory.openInput(fileName, iocontext);
boolean hasVectors = false;
@@ -54,10 +65,10 @@ public class Lucene40FieldInfosReader ex
final int format = input.readVInt();
if (format > FORMAT_MINIMUM) {
- throw new IndexFormatTooOldException(input, format, FORMAT_MINIMUM, Lucene40FieldInfosWriter.FORMAT_CURRENT);
+ throw new IndexFormatTooOldException(input, format, FORMAT_MINIMUM, FORMAT_CURRENT);
}
- if (format < Lucene40FieldInfosWriter.FORMAT_CURRENT) {
- throw new IndexFormatTooNewException(input, format, FORMAT_MINIMUM, Lucene40FieldInfosWriter.FORMAT_CURRENT);
+ if (format < FORMAT_CURRENT) {
+ throw new IndexFormatTooNewException(input, format, FORMAT_MINIMUM, FORMAT_CURRENT);
}
final int size = input.readVInt(); //read in the size
@@ -65,17 +76,17 @@ public class Lucene40FieldInfosReader ex
for (int i = 0; i < size; i++) {
String name = input.readString();
- final int fieldNumber = format <= Lucene40FieldInfosWriter.FORMAT_FLEX? input.readInt():i;
+ final int fieldNumber = i;
byte bits = input.readByte();
- boolean isIndexed = (bits & Lucene40FieldInfosWriter.IS_INDEXED) != 0;
- boolean storeTermVector = (bits & Lucene40FieldInfosWriter.STORE_TERMVECTOR) != 0;
- boolean omitNorms = (bits & Lucene40FieldInfosWriter.OMIT_NORMS) != 0;
- boolean storePayloads = (bits & Lucene40FieldInfosWriter.STORE_PAYLOADS) != 0;
+ boolean isIndexed = (bits & IS_INDEXED) != 0;
+ boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0;
+ boolean omitNorms = (bits & OMIT_NORMS) != 0;
+ boolean storePayloads = (bits & STORE_PAYLOADS) != 0;
final IndexOptions indexOptions;
- if ((bits & Lucene40FieldInfosWriter.OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
+ if ((bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
indexOptions = IndexOptions.DOCS_ONLY;
- } else if ((bits & Lucene40FieldInfosWriter.OMIT_POSITIONS) != 0) {
- if (format <= Lucene40FieldInfosWriter.FORMAT_OMIT_POSITIONS) {
+ } else if ((bits & OMIT_POSITIONS) != 0) {
+ if (format <= FORMAT_OMIT_POSITIONS) {
indexOptions = IndexOptions.DOCS_AND_FREQS;
} else {
throw new CorruptIndexException("Corrupt fieldinfos, OMIT_POSITIONS set but format=" + format + " (resource: " + input + ")");
@@ -93,65 +104,13 @@ public class Lucene40FieldInfosReader ex
hasVectors |= storeTermVector;
hasProx |= isIndexed && indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
hasFreq |= isIndexed && indexOptions != IndexOptions.DOCS_ONLY;
- DocValues.Type docValuesType = null;
- if (format <= Lucene40FieldInfosWriter.FORMAT_FLEX) {
- final byte b = input.readByte();
- switch(b) {
- case 0:
- docValuesType = null;
- break;
- case 1:
- docValuesType = DocValues.Type.VAR_INTS;
- break;
- case 2:
- docValuesType = DocValues.Type.FLOAT_32;
- break;
- case 3:
- docValuesType = DocValues.Type.FLOAT_64;
- break;
- case 4:
- docValuesType = DocValues.Type.BYTES_FIXED_STRAIGHT;
- break;
- case 5:
- docValuesType = DocValues.Type.BYTES_FIXED_DEREF;
- break;
- case 6:
- docValuesType = DocValues.Type.BYTES_VAR_STRAIGHT;
- break;
- case 7:
- docValuesType = DocValues.Type.BYTES_VAR_DEREF;
- break;
- case 8:
- docValuesType = DocValues.Type.FIXED_INTS_16;
- break;
- case 9:
- docValuesType = DocValues.Type.FIXED_INTS_32;
- break;
- case 10:
- docValuesType = DocValues.Type.FIXED_INTS_64;
- break;
- case 11:
- docValuesType = DocValues.Type.FIXED_INTS_8;
- break;
- case 12:
- docValuesType = DocValues.Type.BYTES_FIXED_SORTED;
- break;
- case 13:
- docValuesType = DocValues.Type.BYTES_VAR_SORTED;
- break;
-
- default:
- throw new IllegalStateException("unhandled indexValues type " + b);
- }
- }
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
- omitNorms, storePayloads, indexOptions, docValuesType);
+ omitNorms, storePayloads, indexOptions, null, isIndexed && !omitNorms? Type.BYTES_VAR_STRAIGHT : null);
}
if (input.getFilePointer() != input.length()) {
throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
}
-
return new FieldInfos(infos, hasFreq, hasProx, hasVectors);
} finally {
input.close();
@@ -159,6 +118,6 @@ public class Lucene40FieldInfosReader ex
}
public static void files(Directory dir, SegmentInfo info, Set<String> files) throws IOException {
- files.add(IndexFileNames.segmentFileName(info.name, "", Lucene40FieldInfosWriter.FIELD_INFOS_EXTENSION));
+ files.add(IndexFileNames.segmentFileName(info.name, "", FIELD_INFOS_EXTENSION));
}
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsProducer.java?rev=1232014&r1=1232013&r2=1232014&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsProducer.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsProducer.java Mon Jan 16 14:57:15 2012
@@ -75,7 +75,7 @@ class Lucene3xNormsProducer extends PerD
try {
long nextNormSeek = NORMS_HEADER.length; //skip header (header unused for now)
for (FieldInfo fi : fields) {
- if (fi.isIndexed && !fi.omitNorms) {
+ if (fi.normsPresent()) {
String fileName = getNormFilename(segmentName, normGen, fi.number);
Directory d = hasSeparateNorms(normGen, fi.number) ? separateNormsDir : dir;
@@ -161,7 +161,7 @@ class Lucene3xNormsProducer extends PerD
static final class NormSource extends Source {
protected NormSource(byte[] bytes) {
- super(Type.BYTES_FIXED_STRAIGHT);
+ super(Type.FIXED_INTS_8);
this.bytes = bytes;
}
@@ -176,6 +176,11 @@ class Lucene3xNormsProducer extends PerD
}
@Override
+ public long getInt(int docID) {
+ return bytes[docID];
+ }
+
+ @Override
public boolean hasArray() {
return true;
}
@@ -192,6 +197,7 @@ class Lucene3xNormsProducer extends PerD
// like first FI that has norms but doesn't have separate norms?
final String normsFileName = IndexFileNames.segmentFileName(info.name, "", NORMS_EXTENSION);
if (dir.fileExists(normsFileName)) {
+ // only needed to do this in 3x - 4x can decide if the norms are present
files.add(normsFileName);
}
}
@@ -231,7 +237,7 @@ class Lucene3xNormsProducer extends PerD
@Override
public Type type() {
- return Type.BYTES_FIXED_STRAIGHT;
+ return Type.FIXED_INTS_8;
}
byte[] bytes() throws IOException {
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java?rev=1232014&r1=1232013&r2=1232014&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java Mon Jan 16 14:57:15 2012
@@ -65,7 +65,7 @@ public class Lucene40FieldInfosReader ex
for (int i = 0; i < size; i++) {
String name = input.readString();
- final int fieldNumber = format <= Lucene40FieldInfosWriter.FORMAT_FLEX? input.readInt():i;
+ final int fieldNumber = input.readInt();
byte bits = input.readByte();
boolean isIndexed = (bits & Lucene40FieldInfosWriter.IS_INDEXED) != 0;
boolean storeTermVector = (bits & Lucene40FieldInfosWriter.STORE_TERMVECTOR) != 0;
@@ -75,12 +75,8 @@ public class Lucene40FieldInfosReader ex
if ((bits & Lucene40FieldInfosWriter.OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
indexOptions = IndexOptions.DOCS_ONLY;
} else if ((bits & Lucene40FieldInfosWriter.OMIT_POSITIONS) != 0) {
- if (format <= Lucene40FieldInfosWriter.FORMAT_OMIT_POSITIONS) {
- indexOptions = IndexOptions.DOCS_AND_FREQS;
- } else {
- throw new CorruptIndexException("Corrupt fieldinfos, OMIT_POSITIONS set but format=" + format + " (resource: " + input + ")");
- }
- } else if (format <= Lucene40FieldInfosWriter.FORMAT_FLEX && (bits & Lucene40FieldInfosWriter.STORE_OFFSETS_IN_POSTINGS) != 0) {
+ indexOptions = IndexOptions.DOCS_AND_FREQS;
+ } else if ((bits & Lucene40FieldInfosWriter.STORE_OFFSETS_IN_POSTINGS) != 0) {
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
} else {
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
@@ -95,59 +91,12 @@ public class Lucene40FieldInfosReader ex
hasVectors |= storeTermVector;
hasProx |= isIndexed && indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
hasFreq |= isIndexed && indexOptions != IndexOptions.DOCS_ONLY;
- DocValues.Type docValuesType = null;
- if (format <= Lucene40FieldInfosWriter.FORMAT_FLEX) {
- final byte b = input.readByte();
- switch(b) {
- case 0:
- docValuesType = null;
- break;
- case 1:
- docValuesType = DocValues.Type.VAR_INTS;
- break;
- case 2:
- docValuesType = DocValues.Type.FLOAT_32;
- break;
- case 3:
- docValuesType = DocValues.Type.FLOAT_64;
- break;
- case 4:
- docValuesType = DocValues.Type.BYTES_FIXED_STRAIGHT;
- break;
- case 5:
- docValuesType = DocValues.Type.BYTES_FIXED_DEREF;
- break;
- case 6:
- docValuesType = DocValues.Type.BYTES_VAR_STRAIGHT;
- break;
- case 7:
- docValuesType = DocValues.Type.BYTES_VAR_DEREF;
- break;
- case 8:
- docValuesType = DocValues.Type.FIXED_INTS_16;
- break;
- case 9:
- docValuesType = DocValues.Type.FIXED_INTS_32;
- break;
- case 10:
- docValuesType = DocValues.Type.FIXED_INTS_64;
- break;
- case 11:
- docValuesType = DocValues.Type.FIXED_INTS_8;
- break;
- case 12:
- docValuesType = DocValues.Type.BYTES_FIXED_SORTED;
- break;
- case 13:
- docValuesType = DocValues.Type.BYTES_VAR_SORTED;
- break;
-
- default:
- throw new IllegalStateException("unhandled indexValues type " + b);
- }
- }
+ // DV Types are packed in one byte
+ byte val = input.readByte();
+ final DocValues.Type docValuesType = getDocValuesType((byte) (val & 0x0F));
+ final DocValues.Type normsType = getDocValuesType((byte) ((val >>> 4) & 0x0F));
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
- omitNorms, storePayloads, indexOptions, docValuesType);
+ omitNorms, storePayloads, indexOptions, docValuesType, normsType);
}
if (input.getFilePointer() != input.length()) {
@@ -159,6 +108,42 @@ public class Lucene40FieldInfosReader ex
input.close();
}
}
+
+ public DocValues.Type getDocValuesType(
+ final byte b) {
+ switch(b) {
+ case 0:
+ return null;
+ case 1:
+ return DocValues.Type.VAR_INTS;
+ case 2:
+ return DocValues.Type.FLOAT_32;
+ case 3:
+ return DocValues.Type.FLOAT_64;
+ case 4:
+ return DocValues.Type.BYTES_FIXED_STRAIGHT;
+ case 5:
+ return DocValues.Type.BYTES_FIXED_DEREF;
+ case 6:
+ return DocValues.Type.BYTES_VAR_STRAIGHT;
+ case 7:
+ return DocValues.Type.BYTES_VAR_DEREF;
+ case 8:
+ return DocValues.Type.FIXED_INTS_16;
+ case 9:
+ return DocValues.Type.FIXED_INTS_32;
+ case 10:
+ return DocValues.Type.FIXED_INTS_64;
+ case 11:
+ return DocValues.Type.FIXED_INTS_8;
+ case 12:
+ return DocValues.Type.BYTES_FIXED_SORTED;
+ case 13:
+ return DocValues.Type.BYTES_VAR_SORTED;
+ default:
+ throw new IllegalStateException("unhandled indexValues type " + b);
+ }
+ }
public static void files(Directory dir, SegmentInfo info, Set<String> files) throws IOException {
files.add(IndexFileNames.segmentFileName(info.name, "", Lucene40FieldInfosWriter.FIELD_INFOS_EXTENSION));
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java?rev=1232014&r1=1232013&r2=1232014&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java Mon Jan 16 14:57:15 2012
@@ -19,6 +19,7 @@ package org.apache.lucene.codecs.lucene4
import java.io.IOException;
import org.apache.lucene.codecs.FieldInfosWriter;
+import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
@@ -35,15 +36,11 @@ public class Lucene40FieldInfosWriter ex
/** Extension of field infos */
static final String FIELD_INFOS_EXTENSION = "fnm";
- // First used in 2.9; prior to 2.9 there was no format header
- static final int FORMAT_START = -2;
- // First used in 3.4: omit only positional information
- static final int FORMAT_OMIT_POSITIONS = -3;
// per-field codec support, records index values for fields
- static final int FORMAT_FLEX = -4;
+ static final int FORMAT_START = -4;
// whenever you add a new format, make it 1 smaller (negative version logic)!
- static final int FORMAT_CURRENT = FORMAT_FLEX;
+ static final int FORMAT_CURRENT = FORMAT_START;
static final byte IS_INDEXED = 0x1;
static final byte STORE_TERMVECTOR = 0x2;
@@ -78,60 +75,53 @@ public class Lucene40FieldInfosWriter ex
output.writeInt(fi.number);
output.writeByte(bits);
- final byte b;
-
- if (!fi.hasDocValues()) {
- b = 0;
- } else {
- switch(fi.getDocValuesType()) {
- case VAR_INTS:
- b = 1;
- break;
- case FLOAT_32:
- b = 2;
- break;
- case FLOAT_64:
- b = 3;
- break;
- case BYTES_FIXED_STRAIGHT:
- b = 4;
- break;
- case BYTES_FIXED_DEREF:
- b = 5;
- break;
- case BYTES_VAR_STRAIGHT:
- b = 6;
- break;
- case BYTES_VAR_DEREF:
- b = 7;
- break;
- case FIXED_INTS_16:
- b = 8;
- break;
- case FIXED_INTS_32:
- b = 9;
- break;
- case FIXED_INTS_64:
- b = 10;
- break;
- case FIXED_INTS_8:
- b = 11;
- break;
- case BYTES_FIXED_SORTED:
- b = 12;
- break;
- case BYTES_VAR_SORTED:
- b = 13;
- break;
- default:
- throw new IllegalStateException("unhandled indexValues type " + fi.getDocValuesType());
- }
- }
- output.writeByte(b);
+ // pack the DV types in one byte
+ final byte dv = docValuesByte(fi.getDocValuesType());
+ final byte nrm = docValuesByte(fi.getNormType());
+ assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0;
+ byte val = (byte) (0xff & ((nrm << 4) | dv));
+ output.writeByte(val);
}
} finally {
output.close();
}
}
+
+ public byte docValuesByte(Type type) {
+ if (type == null) {
+ return 0;
+ } else {
+ switch(type) {
+ case VAR_INTS:
+ return 1;
+ case FLOAT_32:
+ return 2;
+ case FLOAT_64:
+ return 3;
+ case BYTES_FIXED_STRAIGHT:
+ return 4;
+ case BYTES_FIXED_DEREF:
+ return 5;
+ case BYTES_VAR_STRAIGHT:
+ return 6;
+ case BYTES_VAR_DEREF:
+ return 7;
+ case FIXED_INTS_16:
+ return 8;
+ case FIXED_INTS_32:
+ return 9;
+ case FIXED_INTS_64:
+ return 10;
+ case FIXED_INTS_8:
+ return 11;
+ case BYTES_FIXED_SORTED:
+ return 12;
+ case BYTES_VAR_SORTED:
+ return 13;
+ default:
+ throw new IllegalStateException("unhandled indexValues type " + type);
+ }
+ }
+ }
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java?rev=1232014&r1=1232013&r2=1232014&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java Mon Jan 16 14:57:15 2012
@@ -72,12 +72,12 @@ public class Lucene40NormsFormat extends
@Override
protected boolean canLoad(FieldInfo info) {
- return !info.omitNorms && info.isIndexed;
+ return info.normsPresent();
}
@Override
protected Type getDocValuesType(FieldInfo info) {
- return Type.BYTES_FIXED_STRAIGHT;
+ return info.getNormType();
}
@Override
@@ -102,23 +102,24 @@ public class Lucene40NormsFormat extends
@Override
protected boolean canMerge(FieldInfo info) {
- return !info.omitNorms && info.isIndexed;
+ return info.normsPresent();
}
@Override
protected Type getDocValuesType(FieldInfo info) {
- return Type.BYTES_FIXED_STRAIGHT;
+ return info.getNormType();
}
public static void files(Directory dir, SegmentInfo segmentInfo, Set<String> files) throws IOException {
- // see the comment in all the other codecs... its bogus that we do fileExists here, but its
- // a harder problem since fieldinfos are never 'cleaned'
final String normsFileName = IndexFileNames.segmentFileName(segmentInfo.name, NORMS_SEGMENT_SUFFIX, IndexFileNames.COMPOUND_FILE_EXTENSION);
- if (dir.fileExists(normsFileName)) {
- final String normsEntriesFileName = IndexFileNames.segmentFileName(segmentInfo.name, NORMS_SEGMENT_SUFFIX, IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION);
- assert dir.fileExists(normsEntriesFileName);
- files.add(normsFileName);
- files.add(normsEntriesFileName);
+ FieldInfos fieldInfos = segmentInfo.getFieldInfos();
+ for (FieldInfo fieldInfo : fieldInfos) {
+ if (fieldInfo.normsPresent()) {
+ final String normsEntriesFileName = IndexFileNames.segmentFileName(segmentInfo.name, NORMS_SEGMENT_SUFFIX, IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION);
+ files.add(normsFileName);
+ files.add(normsEntriesFileName);
+ return;
+ }
}
}
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java?rev=1232014&r1=1232013&r2=1232014&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java Mon Jan 16 14:57:15 2012
@@ -86,17 +86,18 @@ public class SimpleTextFieldInfosReader
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, NORMS);
boolean omitNorms = !Boolean.parseBoolean(readString(NORMS.length, scratch));
-
+
+ SimpleTextUtil.readLine(input, scratch);
+ assert StringHelper.startsWith(scratch, NORMS_TYPE);
+ String nrmType = readString(NORMS_TYPE.length, scratch);
+ final DocValues.Type normsType = docValuesType(nrmType);
+
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, DOCVALUES);
String dvType = readString(DOCVALUES.length, scratch);
- final DocValues.Type docValuesType;
+ final DocValues.Type docValuesType = docValuesType(dvType);
+
- if ("false".equals(dvType)) {
- docValuesType = null;
- } else {
- docValuesType = DocValues.Type.valueOf(dvType);
- }
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, INDEXOPTIONS);
@@ -107,7 +108,7 @@ public class SimpleTextFieldInfosReader
hasFreq |= isIndexed && indexOptions != IndexOptions.DOCS_ONLY;
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
- omitNorms, storePayloads, indexOptions, docValuesType);
+ omitNorms, storePayloads, indexOptions, docValuesType, normsType);
}
if (input.getFilePointer() != input.length()) {
@@ -119,6 +120,14 @@ public class SimpleTextFieldInfosReader
input.close();
}
}
+
+ public DocValues.Type docValuesType(String dvType) {
+ if ("false".equals(dvType)) {
+ return null;
+ } else {
+ return DocValues.Type.valueOf(dvType);
+ }
+ }
private String readString(int offset, BytesRef scratch) {
return new String(scratch.bytes, scratch.offset+offset, scratch.length-offset, IOUtils.CHARSET_UTF_8);
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java?rev=1232014&r1=1232013&r2=1232014&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java Mon Jan 16 14:57:15 2012
@@ -19,6 +19,7 @@ package org.apache.lucene.codecs.simplet
import java.io.IOException;
import org.apache.lucene.codecs.FieldInfosWriter;
+import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
@@ -48,6 +49,7 @@ public class SimpleTextFieldInfosWriter
static final BytesRef STORETVOFF = new BytesRef(" term vector offsets ");
static final BytesRef PAYLOADS = new BytesRef(" payloads ");
static final BytesRef NORMS = new BytesRef(" norms ");
+ static final BytesRef NORMS_TYPE = new BytesRef(" norms type ");
static final BytesRef DOCVALUES = new BytesRef(" doc values ");
static final BytesRef INDEXOPTIONS = new BytesRef(" index options ");
@@ -88,12 +90,12 @@ public class SimpleTextFieldInfosWriter
SimpleTextUtil.write(out, Boolean.toString(!fi.omitNorms), scratch);
SimpleTextUtil.writeNewline(out);
+ SimpleTextUtil.write(out, NORMS_TYPE);
+ SimpleTextUtil.write(out, getDocValuesType(fi.getNormType()), scratch);
+ SimpleTextUtil.writeNewline(out);
+
SimpleTextUtil.write(out, DOCVALUES);
- if (!fi.hasDocValues()) {
- SimpleTextUtil.write(out, "false", scratch);
- } else {
- SimpleTextUtil.write(out, fi.getDocValuesType().toString(), scratch);
- }
+ SimpleTextUtil.write(out, getDocValuesType(fi.getDocValuesType()), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, INDEXOPTIONS);
@@ -104,4 +106,8 @@ public class SimpleTextFieldInfosWriter
out.close();
}
}
+
+ private static String getDocValuesType(DocValues.Type type) {
+ return type == null ? "false" : type.toString();
+ }
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsConsumer.java?rev=1232014&r1=1232013&r2=1232014&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsConsumer.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsConsumer.java Mon Jan 16 14:57:15 2012
@@ -94,17 +94,20 @@ public class SimpleTextNormsConsumer ext
@Override
protected boolean canMerge(FieldInfo info) {
- return !info.omitNorms && info.isIndexed;
+ return info.normsPresent();
}
@Override
protected Type getDocValuesType(FieldInfo info) {
- return Type.BYTES_FIXED_STRAIGHT;
+ return info.getNormType();
}
@Override
public DocValuesConsumer addValuesField(Type type, FieldInfo fieldInfo)
throws IOException {
+ if (type != Type.FIXED_INTS_8) {
+ throw new UnsupportedOperationException("Codec only supports single byte norm values. Type give: " + type);
+ }
return new SimpleTextNormsDocValuesConsumer(fieldInfo);
}
@@ -131,10 +134,10 @@ public class SimpleTextNormsConsumer ext
@Override
public void add(int docID, IndexableField docValue) throws IOException {
- add(docID, docValue.binaryValue());
+ add(docID, docValue.numericValue().longValue());
}
- protected void add(int docID, BytesRef value) throws IOException {
+ public void add(int docID, long value) {
if (docIDs.length <= upto) {
assert docIDs.length == upto;
docIDs = ArrayUtil.grow(docIDs, 1 + upto);
@@ -143,8 +146,8 @@ public class SimpleTextNormsConsumer ext
assert norms.length == upto;
norms = ArrayUtil.grow(norms, 1 + upto);
}
- assert value.length == 1;
- norms[upto] = value.bytes[value.offset];
+ norms[upto] = (byte) value;
+
docIDs[upto] = docID;
upto++;
}
@@ -281,7 +284,7 @@ public class SimpleTextNormsConsumer ext
FieldInfos fieldInfos = info.getFieldInfos();
for (FieldInfo fieldInfo : fieldInfos) {
- if (!fieldInfo.omitNorms && fieldInfo.isIndexed) {
+ if (fieldInfo.normsPresent()) {
files.add(IndexFileNames.segmentFileName(info.name, "",
NORMS_EXTENSION));
break;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsProducer.java?rev=1232014&r1=1232013&r2=1232014&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsProducer.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsProducer.java Mon Jan 16 14:57:15 2012
@@ -32,6 +32,7 @@ import org.apache.lucene.codecs.PerDocPr
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.DocValues.Type;
+import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
@@ -95,11 +96,12 @@ public class SimpleTextNormsProducer ext
}
static void files(Directory dir, SegmentInfo info, Set<String> files) throws IOException {
- // TODO: This is what SI always did... but we can do this cleaner?
- // like first FI that has norms but doesn't have separate norms?
- final String normsFileName = IndexFileNames.segmentFileName(info.name, "", SimpleTextNormsConsumer.NORMS_EXTENSION);
- if (dir.fileExists(normsFileName)) {
- files.add(normsFileName);
+ FieldInfos fieldInfos = info.getFieldInfos();
+ for (FieldInfo fieldInfo : fieldInfos) {
+ if (fieldInfo.normsPresent()) {
+ files.add(IndexFileNames.segmentFileName(info.name, "", SimpleTextNormsConsumer.NORMS_EXTENSION));
+ break;
+ }
}
}
@@ -130,7 +132,7 @@ public class SimpleTextNormsProducer ext
@Override
public Type type() {
- return Type.BYTES_FIXED_STRAIGHT;
+ return Type.FIXED_INTS_8;
}
@Override
@@ -141,7 +143,7 @@ public class SimpleTextNormsProducer ext
static final class Norm extends Source {
protected Norm(byte[] bytes) {
- super(Type.BYTES_FIXED_STRAIGHT);
+ super(Type.FIXED_INTS_8);
this.bytes = bytes;
}
final byte bytes[];
@@ -153,6 +155,11 @@ public class SimpleTextNormsProducer ext
ref.length = 1;
return ref;
}
+
+ @Override
+ public long getInt(int docID) {
+ return bytes[docID];
+ }
@Override
public boolean hasArray() {
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/Field.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/Field.java?rev=1232014&r1=1232013&r2=1232014&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/Field.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/Field.java Mon Jan 16 14:57:15 2012
@@ -29,6 +29,7 @@ import org.apache.lucene.analysis.tokena
import org.apache.lucene.index.IndexWriter; // javadocs
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.IndexableFieldType;
+import org.apache.lucene.index.Norm;
import org.apache.lucene.util.BytesRef;
/**
@@ -383,13 +384,13 @@ public class Field implements IndexableF
* document.
*
* <p>The boost is used to compute the norm factor for the field. By
- * default, in the {@link org.apache.lucene.search.similarities.Similarity#computeNorm(FieldInvertState)} method,
+ * default, in the {@link org.apache.lucene.search.similarities.Similarity#computeNorm(FieldInvertState, Norm)} method,
* the boost value is multiplied by the length normalization factor and then
* rounded by {@link org.apache.lucene.search.similarities.DefaultSimilarity#encodeNormValue(float)} before it is stored in the
* index. One should attempt to ensure that this product does not overflow
* the range of that encoding.
*
- * @see org.apache.lucene.search.similarities.Similarity#computeNorm(FieldInvertState)
+ * @see org.apache.lucene.search.similarities.Similarity#computeNorm(FieldInvertState, Norm)
* @see org.apache.lucene.search.similarities.DefaultSimilarity#encodeNormValue(float)
*/
public void setBoost(float boost) {
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java?rev=1232014&r1=1232013&r2=1232014&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/CheckIndex.java Mon Jan 16 14:57:15 2012
@@ -671,7 +671,7 @@ public class CheckIndex {
if (reader.normValues(info.name) != null) {
throw new RuntimeException("field: " + info.name + " should omit norms but has them!");
}
- if (info.isIndexed && !info.omitNorms) {
+ if (info.normsPresent()) {
throw new RuntimeException("field: " + info.name + " should have norms but omits them!");
}
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java?rev=1232014&r1=1232013&r2=1232014&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java Mon Jan 16 14:57:15 2012
@@ -339,7 +339,7 @@ final class DocFieldProcessor extends Do
perDocConsumer = docState.docWriter.codec.docValuesFormat().docsConsumer(perDocWriteState);
}
DocValuesConsumer docValuesConsumer = perDocConsumer.addValuesField(valueType, fieldInfo);
- fieldInfo.setDocValuesType(valueType);
+ fieldInfo.setDocValuesType(valueType, false);
docValuesConsumerAndDocID = new DocValuesConsumerAndDocID(docValuesConsumer);
docValuesConsumerAndDocID.docID = docState.docID;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfo.java?rev=1232014&r1=1232013&r2=1232014&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfo.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfo.java Mon Jan 16 14:57:15 2012
@@ -1,5 +1,7 @@
package org.apache.lucene.index;
+import org.apache.lucene.index.DocValues.Type;
+
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -24,11 +26,12 @@ public final class FieldInfo {
public final int number;
public boolean isIndexed;
- private DocValues.Type docValues;
+ private DocValues.Type docValueType;
// True if any document indexed term vectors
public boolean storeTermVector;
+ private DocValues.Type normType;
public boolean omitNorms; // omit norms associated with indexed fields
public IndexOptions indexOptions;
public boolean storePayloads; // whether this field stores payloads together with term positions
@@ -56,21 +59,23 @@ public final class FieldInfo {
* @lucene.experimental
*/
public FieldInfo(String name, boolean isIndexed, int number, boolean storeTermVector,
- boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, DocValues.Type docValues) {
+ boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, DocValues.Type docValues, DocValues.Type normsType) {
this.name = name;
this.isIndexed = isIndexed;
this.number = number;
- this.docValues = docValues;
+ this.docValueType = docValues;
if (isIndexed) {
this.storeTermVector = storeTermVector;
this.storePayloads = storePayloads;
this.omitNorms = omitNorms;
this.indexOptions = indexOptions;
+ this.normType = !omitNorms ? normsType : null;
} else { // for non-indexed fields, leave defaults
this.storeTermVector = false;
this.storePayloads = false;
this.omitNorms = false;
this.indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
+ this.normType = null;
}
assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !storePayloads;
}
@@ -78,7 +83,7 @@ public final class FieldInfo {
@Override
public Object clone() {
return new FieldInfo(name, isIndexed, number, storeTermVector,
- omitNorms, storePayloads, indexOptions, docValues);
+ omitNorms, storePayloads, indexOptions, docValueType, normType);
}
// should only be called by FieldInfos#addOrUpdate
@@ -109,27 +114,44 @@ public final class FieldInfo {
assert this.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !this.storePayloads;
}
- void setDocValuesType(DocValues.Type v) {
- if (docValues == null) {
- docValues = v;
- }
- }
-
- public void resetDocValuesType(DocValues.Type v) {
- if (docValues != null) {
- docValues = v;
+ void setDocValuesType(DocValues.Type type, boolean force) {
+ if (docValueType == null || force) {
+ docValueType = type;
+ } else if (type != docValueType) {
+ throw new IllegalArgumentException("DocValues type already set to " + docValueType + " but was: " + type);
}
}
public boolean hasDocValues() {
- return docValues != null;
+ return docValueType != null;
}
public DocValues.Type getDocValuesType() {
- return docValues;
+ return docValueType;
+ }
+
+ public DocValues.Type getNormType() {
+ return normType;
}
public void setStoreTermVectors() {
storeTermVector = true;
}
+
+ public void setNormValueType(Type type, boolean force) {
+ if (normType == null || force) {
+ normType = type;
+ } else if (type != normType) {
+ throw new IllegalArgumentException("Norm type already set to " + normType);
+ }
+ }
+
+ public boolean omitNorms() {
+ return omitNorms;
+ }
+
+ public boolean normsPresent() {
+ return isIndexed && !omitNorms && normType != null;
+ }
+
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java?rev=1232014&r1=1232013&r2=1232014&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java Mon Jan 16 14:57:15 2012
@@ -268,7 +268,7 @@ public final class FieldInfos implements
*/
synchronized public void addOrUpdate(String name, boolean isIndexed, boolean storeTermVector,
boolean omitNorms) {
- addOrUpdate(name, isIndexed, storeTermVector, omitNorms, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, null);
+ addOrUpdate(name, isIndexed, storeTermVector, omitNorms, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, null, null);
}
/** If the field is not yet known, adds it. If it is known, checks to make
@@ -284,8 +284,8 @@ public final class FieldInfos implements
* @param indexOptions if term freqs should be omitted for this field
*/
synchronized public FieldInfo addOrUpdate(String name, boolean isIndexed, boolean storeTermVector,
- boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, DocValues.Type docValues) {
- return addOrUpdateInternal(name, -1, isIndexed, storeTermVector, omitNorms, storePayloads, indexOptions, docValues);
+ boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, DocValues.Type docValues, DocValues.Type normType) {
+ return addOrUpdateInternal(name, -1, isIndexed, storeTermVector, omitNorms, storePayloads, indexOptions, docValues, normType);
}
// NOTE: this method does not carry over termVector
@@ -301,32 +301,37 @@ public final class FieldInfos implements
// be updated by maybe FreqProxTermsWriterPerField:
return addOrUpdateInternal(name, -1, fieldType.indexed(), false,
fieldType.omitNorms(), false,
- fieldType.indexOptions(), null);
+ fieldType.indexOptions(), null, null);
}
synchronized private FieldInfo addOrUpdateInternal(String name, int preferredFieldNumber, boolean isIndexed,
boolean storeTermVector,
- boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, DocValues.Type docValues) {
+ boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, DocValues.Type docValues, DocValues.Type normType) {
if (globalFieldNumbers == null) {
throw new IllegalStateException("FieldInfos are read-only, create a new instance with a global field map to make modifications to FieldInfos");
}
FieldInfo fi = fieldInfo(name);
if (fi == null) {
final int fieldNumber = nextFieldNumber(name, preferredFieldNumber);
- fi = addInternal(name, fieldNumber, isIndexed, storeTermVector, omitNorms, storePayloads, indexOptions, docValues);
+ fi = addInternal(name, fieldNumber, isIndexed, storeTermVector, omitNorms, storePayloads, indexOptions, docValues, normType);
} else {
fi.update(isIndexed, storeTermVector, omitNorms, storePayloads, indexOptions);
- fi.setDocValuesType(docValues);
+ if (docValues != null) {
+ fi.setDocValuesType(docValues, true);
+ }
+ if (normType != null) {
+ fi.setNormValueType(normType, true);
+ }
}
version++;
return fi;
}
-
+
synchronized public FieldInfo add(FieldInfo fi) {
// IMPORTANT - reuse the field number if possible for consistent field numbers across segments
return addOrUpdateInternal(fi.name, fi.number, fi.isIndexed, fi.storeTermVector,
fi.omitNorms, fi.storePayloads,
- fi.indexOptions, fi.getDocValuesType());
+ fi.indexOptions, fi.getDocValuesType(), fi.getNormType());
}
/*
@@ -334,12 +339,12 @@ public final class FieldInfos implements
*/
private FieldInfo addInternal(String name, int fieldNumber, boolean isIndexed,
boolean storeTermVector, boolean omitNorms, boolean storePayloads,
- IndexOptions indexOptions, DocValues.Type docValuesType) {
+ IndexOptions indexOptions, DocValues.Type docValuesType, DocValues.Type normType) {
// don't check modifiable here since we use that to initially build up FIs
if (globalFieldNumbers != null) {
globalFieldNumbers.setIfNotSet(fieldNumber, name);
}
- final FieldInfo fi = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValuesType);
+ final FieldInfo fi = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValuesType, normType);
putInternal(fi);
return fi;
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocValues.java?rev=1232014&r1=1232013&r2=1232014&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocValues.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocValues.java Mon Jan 16 14:57:15 2012
@@ -49,8 +49,9 @@ public class MultiDocValues extends DocV
public boolean stopLoadingOnNull(IndexReader reader, String field) throws IOException {
// for norms we drop all norms if one leaf reader has no norms and the field is present
- Fields fields = reader.fields();
- return (fields != null && fields.terms(field) != null);
+ FieldInfos fieldInfos = reader.getFieldInfos();
+ FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
+ return fieldInfo != null && fieldInfo.omitNorms;
}
};
Added: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/Norm.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/Norm.java?rev=1232014&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/Norm.java (added)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/Norm.java Mon Jan 16 14:57:15 2012
@@ -0,0 +1,154 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import org.apache.lucene.document.DocValuesField;
+import org.apache.lucene.index.DocValues.Type;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * Stores the normalization value computed in
+ * {@link Similarity#computeNorm(FieldInvertState, Norm)} per field.
+ * Normalization values must be consistent within a single field, different
+ * value types are not permitted within a single field. All values set must be
+ * fixed size values ie. all values passed to {@link Norm#setBytes(BytesRef)}
+ * must have the same length per field.
+ *
+ * @lucene.experimental
+ * @lucene.internal
+ */
+public final class Norm {
+ private DocValuesField field;
+ private BytesRef spare;
+
+ /**
+ * Returns the {@link IndexableField} representation for this norm
+ */
+ public IndexableField field() {
+ return field;
+ }
+
+ /**
+ * Returns the {@link Type} for this norm.
+ */
+ public Type type() {
+ return field == null? null : field.fieldType().docValueType();
+ }
+
+ /**
+ * Returns a spare {@link BytesRef}
+ */
+ public BytesRef getSpare() {
+ if (spare == null) {
+ spare = new BytesRef();
+ }
+ return spare;
+ }
+
+ /**
+ * Sets a float norm value
+ */
+ public void setFloat(float norm) {
+ setType(Type.FLOAT_32);
+ this.field.setValue(norm);
+ }
+
+ /**
+ * Sets a double norm value
+ */
+ public void setDouble(double norm) {
+ setType(Type.FLOAT_64);
+ this.field.setValue(norm);
+ }
+
+ /**
+ * Sets a short norm value
+ */
+ public void setShort(short norm) {
+ setType(Type.FIXED_INTS_16);
+ this.field.setValue(norm);
+
+ }
+
+ /**
+ * Sets a int norm value
+ */
+ public void setInt(int norm) {
+ setType(Type.FIXED_INTS_32);
+ this.field.setValue(norm);
+ }
+
+ /**
+ * Sets a long norm value
+ */
+ public void setLong(long norm) {
+ setType(Type.FIXED_INTS_64);
+ this.field.setValue(norm);
+ }
+
+ /**
+ * Sets a byte norm value
+ */
+ public void setByte(byte norm) {
+ setType(Type.FIXED_INTS_8);
+ this.field.setValue(norm);
+ }
+
+ /**
+ * Sets a fixed byte array norm value
+ */
+ public void setBytes(BytesRef norm) {
+ setType(Type.BYTES_FIXED_STRAIGHT);
+ this.field.setValue(norm);
+ }
+
+
+ private void setType(Type type) {
+ if (field != null) {
+ if (type != field.fieldType().docValueType()) {
+ throw new IllegalArgumentException("FieldType missmatch - expected "+type+" but was " + field.fieldType().docValueType());
+ }
+ } else {
+ switch (type) {
+ case BYTES_FIXED_DEREF:
+ case BYTES_FIXED_SORTED:
+ case BYTES_FIXED_STRAIGHT:
+ case BYTES_VAR_DEREF:
+ case BYTES_VAR_SORTED:
+ case BYTES_VAR_STRAIGHT:
+ this.field = new DocValuesField("", new BytesRef(), type);
+ break;
+
+ case FIXED_INTS_16:
+ case FIXED_INTS_32:
+ case FIXED_INTS_64:
+ case FIXED_INTS_8:
+ case VAR_INTS:
+ this.field = new DocValuesField("", 0, type);
+ break;
+ case FLOAT_32:
+ case FLOAT_64:
+ this.field = new DocValuesField("", 0f, type);
+ break;
+ default:
+ throw new IllegalArgumentException("unknown Type: " + type);
+ }
+ }
+ }
+
+}
\ No newline at end of file
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/NormsConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/NormsConsumer.java?rev=1232014&r1=1232013&r2=1232014&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/NormsConsumer.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/NormsConsumer.java Mon Jan 16 14:57:15 2012
@@ -24,9 +24,7 @@ import java.util.Map;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.PerDocConsumer;
-import org.apache.lucene.document.DocValuesField;
import org.apache.lucene.index.DocValues.Type;
-import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
// TODO FI: norms could actually be stored as doc store
@@ -69,13 +67,12 @@ final class NormsConsumer extends Invert
if (!fi.omitNorms) {
if (toWrite != null && toWrite.initialized()) {
anythingFlushed = true;
- toWrite.flush(state.numDocs);
+ final Type type = toWrite.flush(state.numDocs);
+ assert fi.getNormType() == type;
} else if (fi.isIndexed) {
anythingFlushed = true;
- final DocValuesConsumer valuesConsumer = newConsumer(new PerDocWriteState(state), fi);
- final DocValuesField value = new DocValuesField("", new BytesRef(new byte[] {0x0}), Type.BYTES_FIXED_STRAIGHT);
- valuesConsumer.add(state.numDocs-1, value);
- valuesConsumer.finish(state.numDocs);
+ assert fi.getNormType() == null;
+ fi.setNormValueType(null, false);
}
}
}
@@ -107,12 +104,12 @@ final class NormsConsumer extends Invert
}
DocValuesConsumer newConsumer(PerDocWriteState perDocWriteState,
- FieldInfo fieldInfo) throws IOException {
+ FieldInfo fieldInfo, Type type) throws IOException {
if (consumer == null) {
consumer = normsFormat.docsConsumer(perDocWriteState);
}
- DocValuesConsumer addValuesField = consumer.addValuesField(
- Type.BYTES_FIXED_STRAIGHT, fieldInfo);
+ DocValuesConsumer addValuesField = consumer.addValuesField(type, fieldInfo);
return addValuesField;
}
+
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/NormsConsumerPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/NormsConsumerPerField.java?rev=1232014&r1=1232013&r2=1232014&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/NormsConsumerPerField.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/NormsConsumerPerField.java Mon Jan 16 14:57:15 2012
@@ -19,6 +19,7 @@ import java.io.IOException;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.document.DocValuesField;
+import org.apache.lucene.document.Field;
import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.BytesRef;
@@ -29,9 +30,9 @@ public class NormsConsumerPerField exten
private final Similarity similarity;
private final FieldInvertState fieldState;
private DocValuesConsumer consumer;
- private final BytesRef spare = new BytesRef(1);
- private final DocValuesField value = new DocValuesField("", spare, Type.BYTES_FIXED_STRAIGHT);
+ private final Norm norm;
private final NormsConsumer parent;
+ private Type initType;
public NormsConsumerPerField(final DocInverterPerField docInverterPerField, final FieldInfo fieldInfo, NormsConsumer parent) {
this.fieldInfo = fieldInfo;
@@ -39,10 +40,9 @@ public class NormsConsumerPerField exten
docState = docInverterPerField.docState;
fieldState = docInverterPerField.fieldState;
similarity = docState.similarityProvider.get(fieldInfo.name);
- spare.length = 1;
- spare.offset = 0;
-
+ norm = new Norm();
}
+
@Override
public int compareTo(NormsConsumerPerField other) {
return fieldInfo.name.compareTo(other.fieldInfo.name);
@@ -51,20 +51,33 @@ public class NormsConsumerPerField exten
@Override
void finish() throws IOException {
if (fieldInfo.isIndexed && !fieldInfo.omitNorms) {
- DocValuesConsumer consumer = getConsumer();
- spare.bytes[0] = similarity.computeNorm(fieldState);
- consumer.add(docState.docID, value);
+ similarity.computeNorm(fieldState, norm);
+
+ if (norm.type() != null) {
+ IndexableField field = norm.field();
+ // some similarity might not compute any norms
+ DocValuesConsumer consumer = getConsumer(norm.type());
+ consumer.add(docState.docID, field);
+ }
}
}
- void flush(int docCount) throws IOException {
- assert initialized();
+ Type flush(int docCount) throws IOException {
+ if (!initialized()) {
+ return null; // null type - not omitted but not written
+ }
consumer.finish(docCount);
+ return initType;
}
- private DocValuesConsumer getConsumer() throws IOException {
+ private DocValuesConsumer getConsumer(Type type) throws IOException {
if (consumer == null) {
- consumer = parent.newConsumer(docState.docWriter.newPerDocWriteState(""), fieldInfo);
+ fieldInfo.setNormValueType(type, false);
+ consumer = parent.newConsumer(docState.docWriter.newPerDocWriteState(""), fieldInfo, type);
+ this.initType = type;
+ }
+ if (initType != type) {
+ throw new IllegalArgumentException("NormTypes for field: " + fieldInfo.name + " doesn't match " + initType + " != " + type);
}
return consumer;
}