You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/08/13 13:17:06 UTC
svn commit: r1372366 [4/8] - in /lucene/dev/branches/pforcodec_3892: ./
dev-tools/ dev-tools/eclipse/ dev-tools/idea/.idea/libraries/
dev-tools/maven/ dev-tools/maven/lucene/
dev-tools/maven/lucene/analysis/common/
dev-tools/maven/lucene/analysis/icu/ ...
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java Mon Aug 13 11:16:57 2012
@@ -32,7 +32,6 @@ import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.Fields;
-import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.OrdTermState;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
@@ -44,6 +43,7 @@ import org.apache.lucene.store.RAMOutput
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.UnmodifiableIterator;
import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.automaton.RunAutomaton;
import org.apache.lucene.util.automaton.Transition;
@@ -124,36 +124,14 @@ public class DirectPostingsFormat extend
private final Map<String,DirectField> fields = new TreeMap<String,DirectField>();
public DirectFields(SegmentReadState state, Fields fields, int minSkipCount, int lowFreqCutoff) throws IOException {
- FieldsEnum fieldsEnum = fields.iterator();
- String field;
- while ((field = fieldsEnum.next()) != null) {
- this.fields.put(field, new DirectField(state, field, fieldsEnum.terms(), minSkipCount, lowFreqCutoff));
+ for (String field : fields) {
+ this.fields.put(field, new DirectField(state, field, fields.terms(field), minSkipCount, lowFreqCutoff));
}
}
@Override
- public FieldsEnum iterator() {
-
- final Iterator<Map.Entry<String,DirectField>> iter = fields.entrySet().iterator();
-
- return new FieldsEnum() {
- Map.Entry<String,DirectField> current;
-
- @Override
- public String next() {
- if (iter.hasNext()) {
- current = iter.next();
- return current.getKey();
- } else {
- return null;
- }
- }
-
- @Override
- public Terms terms() {
- return current.getValue();
- }
- };
+ public Iterator<String> iterator() {
+ return new UnmodifiableIterator<String>(fields.keySet().iterator());
}
@Override
@@ -348,9 +326,8 @@ public class DirectPostingsFormat extend
scratch.add(docsAndPositionsEnum.endOffset());
}
if (hasPayloads) {
- final BytesRef payload;
- if (docsAndPositionsEnum.hasPayload()) {
- payload = docsAndPositionsEnum.getPayload();
+ final BytesRef payload = docsAndPositionsEnum.getPayload();
+ if (payload != null) {
scratch.add(payload.length);
ros.writeBytes(payload.bytes, payload.offset, payload.length);
} else {
@@ -421,9 +398,8 @@ public class DirectPostingsFormat extend
for(int pos=0;pos<freq;pos++) {
positions[upto][posUpto] = docsAndPositionsEnum.nextPosition();
if (hasPayloads) {
- if (docsAndPositionsEnum.hasPayload()) {
- BytesRef payload = docsAndPositionsEnum.getPayload();
- assert payload != null;
+ BytesRef payload = docsAndPositionsEnum.getPayload();
+ if (payload != null) {
byte[] payloadBytes = new byte[payload.length];
System.arraycopy(payload.bytes, payload.offset, payloadBytes, 0, payload.length);
payloads[upto][pos] = payloadBytes;
@@ -635,6 +611,21 @@ public class DirectPostingsFormat extend
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
+ @Override
+ public boolean hasOffsets() {
+ return hasOffsets;
+ }
+
+ @Override
+ public boolean hasPositions() {
+ return hasPos;
+ }
+
+ @Override
+ public boolean hasPayloads() {
+ return hasPayloads;
+ }
+
private final class DirectTermsEnum extends TermsEnum {
private final BytesRef scratch = new BytesRef();
@@ -1792,17 +1783,11 @@ public class DirectPostingsFormat extend
}
@Override
- public boolean hasPayload() {
- return payloadLength > 0;
- }
-
- @Override
public BytesRef getPayload() {
if (payloadLength > 0) {
payload.bytes = payloadBytes;
payload.offset = lastPayloadOffset;
payload.length = payloadLength;
- payloadLength = 0;
return payload;
} else {
return null;
@@ -1995,7 +1980,6 @@ public class DirectPostingsFormat extend
private int upto;
private int docID = -1;
private int posUpto;
- private boolean gotPayload;
private int[] curPositions;
public HighFreqDocsAndPositionsEnum(Bits liveDocs, boolean hasOffsets) {
@@ -2065,7 +2049,6 @@ public class DirectPostingsFormat extend
@Override
public int nextPosition() {
posUpto += posJump;
- gotPayload = false;
return curPositions[posUpto];
}
@@ -2199,21 +2182,22 @@ public class DirectPostingsFormat extend
}
}
- @Override
- public boolean hasPayload() {
- return !gotPayload && payloads != null && payloads[upto][posUpto/(hasOffsets ? 3 : 1)] != null;
- }
-
private final BytesRef payload = new BytesRef();
@Override
public BytesRef getPayload() {
- final byte[] payloadBytes = payloads[upto][posUpto/(hasOffsets ? 3:1)];
- payload.bytes = payloadBytes;
- payload.length = payloadBytes.length;
- payload.offset = 0;
- gotPayload = true;
- return payload;
+ if (payloads == null) {
+ return null;
+ } else {
+ final byte[] payloadBytes = payloads[upto][posUpto/(hasOffsets ? 3:1)];
+ if (payloadBytes == null) {
+ return null;
+ }
+ payload.bytes = payloadBytes;
+ payload.length = payloadBytes.length;
+ payload.offset = 0;
+ return payload;
+ }
}
}
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java Mon Aug 13 11:16:57 2012
@@ -34,7 +34,6 @@ import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
-import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
@@ -49,6 +48,7 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.UnmodifiableIterator;
import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.ByteSequenceOutputs;
import org.apache.lucene.util.fst.BytesRefFSTEnum;
@@ -446,7 +446,6 @@ public class MemoryPostingsFormat extend
private int numDocs;
private int posPending;
private int payloadLength;
- private boolean payloadRetrieved;
final boolean storeOffsets;
int offsetLength;
int startOffset;
@@ -484,7 +483,6 @@ public class MemoryPostingsFormat extend
payloadLength = 0;
this.numDocs = numDocs;
posPending = 0;
- payloadRetrieved = false;
startOffset = storeOffsets ? 0 : -1; // always return -1 if no offsets are stored
offsetLength = 0;
return this;
@@ -577,10 +575,6 @@ public class MemoryPostingsFormat extend
payload.offset = in.getPosition();
in.skipBytes(payloadLength);
payload.length = payloadLength;
- // Necessary, in case caller changed the
- // payload.bytes from prior call:
- payload.bytes = buffer;
- payloadRetrieved = false;
}
//System.out.println(" pos=" + pos + " payload=" + payload + " fp=" + in.getPosition());
@@ -599,13 +593,7 @@ public class MemoryPostingsFormat extend
@Override
public BytesRef getPayload() {
- payloadRetrieved = true;
- return payload;
- }
-
- @Override
- public boolean hasPayload() {
- return !payloadRetrieved && payload.length > 0;
+ return payload.length > 0 ? payload : null;
}
@Override
@@ -834,6 +822,21 @@ public class MemoryPostingsFormat extend
public Comparator<BytesRef> getComparator() {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
+
+ @Override
+ public boolean hasOffsets() {
+ return field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+ }
+
+ @Override
+ public boolean hasPositions() {
+ return field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+ }
+
+ @Override
+ public boolean hasPayloads() {
+ return field.hasPayloads();
+ }
}
@Override
@@ -859,24 +862,8 @@ public class MemoryPostingsFormat extend
return new FieldsProducer() {
@Override
- public FieldsEnum iterator() {
- final Iterator<TermsReader> iter = fields.values().iterator();
-
- return new FieldsEnum() {
-
- private TermsReader current;
-
- @Override
- public String next() {
- current = iter.next();
- return current.field.name;
- }
-
- @Override
- public Terms terms() {
- return current;
- }
- };
+ public Iterator<String> iterator() {
+ return new UnmodifiableIterator<String>(fields.keySet().iterator());
}
@Override
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java Mon Aug 13 11:16:57 2012
@@ -30,11 +30,11 @@ import org.apache.lucene.codecs.FieldsPr
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.TermsConsumer;
import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.UnmodifiableIterator;
/**
* Enables per field format support.
@@ -197,34 +197,9 @@ public abstract class PerFieldPostingsFo
}
}
- private final class FieldsIterator extends FieldsEnum {
- private final Iterator<String> it;
- private String current;
-
- public FieldsIterator() {
- it = fields.keySet().iterator();
- }
-
- @Override
- public String next() {
- if (it.hasNext()) {
- current = it.next();
- } else {
- current = null;
- }
-
- return current;
- }
-
- @Override
- public Terms terms() throws IOException {
- return fields.get(current).terms(current);
- }
- }
-
@Override
- public FieldsEnum iterator() throws IOException {
- return new FieldsIterator();
+ public Iterator<String> iterator() {
+ return new UnmodifiableIterator<String>(fields.keySet().iterator());
}
@Override
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java Mon Aug 13 11:16:57 2012
@@ -533,18 +533,12 @@ public class PulsingPostingsReader exten
}
@Override
- public boolean hasPayload() {
- return storePayloads && !payloadRetrieved && payloadLength > 0;
- }
-
- @Override
public BytesRef getPayload() throws IOException {
//System.out.println("PR getPayload payloadLength=" + payloadLength + " this=" + this);
if (payloadRetrieved) {
- throw new IOException("Either no payload exists at this term position or an attempt was made to load it more than once.");
- }
- payloadRetrieved = true;
- if (payloadLength > 0) {
+ return payload;
+ } else if (storePayloads && payloadLength > 0) {
+ payloadRetrieved = true;
if (payload == null) {
payload = new BytesRef(payloadLength);
} else {
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java Mon Aug 13 11:16:57 2012
@@ -714,7 +714,11 @@ public class SepPostingsReader extends P
@Override
public BytesRef getPayload() throws IOException {
if (!payloadPending) {
- throw new IOException("Either no payload exists at this term position or an attempt was made to load it more than once.");
+ return null;
+ }
+
+ if (pendingPayloadBytes == 0) {
+ return payload;
}
assert pendingPayloadBytes >= payloadLength;
@@ -731,15 +735,9 @@ public class SepPostingsReader extends P
}
payloadIn.readBytes(payload.bytes, 0, payloadLength);
- payloadPending = false;
payload.length = payloadLength;
pendingPayloadBytes = 0;
return payload;
}
-
- @Override
- public boolean hasPayload() {
- return payloadPending && payloadLength > 0;
- }
}
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java Mon Aug 13 11:16:57 2012
@@ -20,14 +20,17 @@ package org.apache.lucene.codecs.simplet
import java.io.IOException;
import java.util.Comparator;
import java.util.HashMap;
+import java.util.Iterator;
import java.util.Map;
+import java.util.TreeMap;
+import java.util.TreeSet;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfos;
-import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
@@ -40,6 +43,7 @@ import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.UnicodeUtil;
+import org.apache.lucene.util.UnmodifiableIterator;
import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.FST;
@@ -48,7 +52,7 @@ import org.apache.lucene.util.fst.Positi
import org.apache.lucene.util.fst.Util;
class SimpleTextFieldsReader extends FieldsProducer {
-
+ private final TreeMap<String,Long> fields;
private final IndexInput in;
private final FieldInfos fieldInfos;
@@ -66,35 +70,22 @@ class SimpleTextFieldsReader extends Fie
in = state.dir.openInput(SimpleTextPostingsFormat.getPostingsFileName(state.segmentInfo.name, state.segmentSuffix), state.context);
fieldInfos = state.fieldInfos;
+ fields = readFields((IndexInput)in.clone());
}
-
- private class SimpleTextFieldsEnum extends FieldsEnum {
- private final IndexInput in;
- private final BytesRef scratch = new BytesRef(10);
- private String current;
-
- public SimpleTextFieldsEnum() {
- this.in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
- }
-
- @Override
- public String next() throws IOException {
- while(true) {
- SimpleTextUtil.readLine(in, scratch);
- if (scratch.equals(END)) {
- current = null;
- return null;
- }
- if (StringHelper.startsWith(scratch, FIELD)) {
- return current = new String(scratch.bytes, scratch.offset + FIELD.length, scratch.length - FIELD.length, "UTF-8");
- }
+
+ private TreeMap<String,Long> readFields(IndexInput in) throws IOException {
+ BytesRef scratch = new BytesRef(10);
+ TreeMap<String,Long> fields = new TreeMap<String,Long>();
+
+ while (true) {
+ SimpleTextUtil.readLine(in, scratch);
+ if (scratch.equals(END)) {
+ return fields;
+ } else if (StringHelper.startsWith(scratch, FIELD)) {
+ String fieldName = new String(scratch.bytes, scratch.offset + FIELD.length, scratch.length - FIELD.length, "UTF-8");
+ fields.put(fieldName, in.getFilePointer());
}
}
-
- @Override
- public Terms terms() throws IOException {
- return SimpleTextFieldsReader.this.terms(current);
- }
}
private class SimpleTextTermsEnum extends TermsEnum {
@@ -471,18 +462,7 @@ class SimpleTextFieldsReader extends Fie
@Override
public BytesRef getPayload() {
- // Some tests rely on only being able to retrieve the
- // payload once
- try {
- return payload;
- } finally {
- payload = null;
- }
- }
-
- @Override
- public boolean hasPayload() {
- return payload != null;
+ return payload;
}
}
@@ -498,7 +478,7 @@ class SimpleTextFieldsReader extends Fie
private class SimpleTextTerms extends Terms {
private final long termsStart;
- private final IndexOptions indexOptions;
+ private final FieldInfo fieldInfo;
private long sumTotalTermFreq;
private long sumDocFreq;
private int docCount;
@@ -509,7 +489,7 @@ class SimpleTextFieldsReader extends Fie
public SimpleTextTerms(String field, long termsStart) throws IOException {
this.termsStart = termsStart;
- indexOptions = fieldInfos.fieldInfo(field).getIndexOptions();
+ fieldInfo = fieldInfos.fieldInfo(field);
loadTerms();
}
@@ -579,7 +559,7 @@ class SimpleTextFieldsReader extends Fie
@Override
public TermsEnum iterator(TermsEnum reuse) throws IOException {
if (fst != null) {
- return new SimpleTextTermsEnum(fst, indexOptions);
+ return new SimpleTextTermsEnum(fst, fieldInfo.getIndexOptions());
} else {
return TermsEnum.EMPTY;
}
@@ -597,7 +577,7 @@ class SimpleTextFieldsReader extends Fie
@Override
public long getSumTotalTermFreq() {
- return indexOptions == IndexOptions.DOCS_ONLY ? -1 : sumTotalTermFreq;
+ return fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : sumTotalTermFreq;
}
@Override
@@ -609,11 +589,26 @@ class SimpleTextFieldsReader extends Fie
public int getDocCount() throws IOException {
return docCount;
}
+
+ @Override
+ public boolean hasOffsets() {
+ return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+ }
+
+ @Override
+ public boolean hasPositions() {
+ return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+ }
+
+ @Override
+ public boolean hasPayloads() {
+ return fieldInfo.hasPayloads();
+ }
}
@Override
- public FieldsEnum iterator() throws IOException {
- return new SimpleTextFieldsEnum();
+ public Iterator<String> iterator() {
+ return new UnmodifiableIterator<String>(fields.keySet().iterator());
}
private final Map<String,Terms> termsCache = new HashMap<String,Terms>();
@@ -622,15 +617,13 @@ class SimpleTextFieldsReader extends Fie
synchronized public Terms terms(String field) throws IOException {
Terms terms = termsCache.get(field);
if (terms == null) {
- SimpleTextFieldsEnum fe = (SimpleTextFieldsEnum) iterator();
- String fieldUpto;
- while((fieldUpto = fe.next()) != null) {
- if (fieldUpto.equals(field)) {
- terms = new SimpleTextTerms(field, fe.in.getFilePointer());
- break;
- }
+ Long fp = fields.get(field);
+ if (fp == null) {
+ return null;
+ } else {
+ terms = new SimpleTextTerms(field, fp);
+ termsCache.put(field, terms);
}
- termsCache.put(field, terms);
}
return terms;
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java Mon Aug 13 11:16:57 2012
@@ -29,7 +29,6 @@ import org.apache.lucene.codecs.TermVect
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.Fields;
-import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.Terms;
@@ -45,6 +44,7 @@ import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.UnicodeUtil;
+import org.apache.lucene.util.UnmodifiableIterator;
import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.*;
@@ -127,10 +127,14 @@ public class SimpleTextTermVectorsReader
boolean offsets = Boolean.parseBoolean(readString(FIELDOFFSETS.length, scratch));
readLine();
+ assert StringHelper.startsWith(scratch, FIELDPAYLOADS);
+ boolean payloads = Boolean.parseBoolean(readString(FIELDPAYLOADS.length, scratch));
+
+ readLine();
assert StringHelper.startsWith(scratch, FIELDTERMCOUNT);
int termCount = parseIntAt(FIELDTERMCOUNT.length);
- SimpleTVTerms terms = new SimpleTVTerms();
+ SimpleTVTerms terms = new SimpleTVTerms(offsets, positions, payloads);
fields.put(fieldName, terms);
for (int j = 0; j < termCount; j++) {
@@ -152,6 +156,9 @@ public class SimpleTextTermVectorsReader
if (positions || offsets) {
if (positions) {
postings.positions = new int[postings.freq];
+ if (payloads) {
+ postings.payloads = new BytesRef[postings.freq];
+ }
}
if (offsets) {
@@ -164,6 +171,17 @@ public class SimpleTextTermVectorsReader
readLine();
assert StringHelper.startsWith(scratch, POSITION);
postings.positions[k] = parseIntAt(POSITION.length);
+ if (payloads) {
+ readLine();
+ assert StringHelper.startsWith(scratch, PAYLOAD);
+ if (scratch.length - PAYLOAD.length == 0) {
+ postings.payloads[k] = null;
+ } else {
+ byte payloadBytes[] = new byte[scratch.length - PAYLOAD.length];
+ System.arraycopy(scratch.bytes, scratch.offset+PAYLOAD.length, payloadBytes, 0, payloadBytes.length);
+ postings.payloads[k] = new BytesRef(payloadBytes);
+ }
+ }
}
if (offsets) {
@@ -222,26 +240,8 @@ public class SimpleTextTermVectorsReader
}
@Override
- public FieldsEnum iterator() throws IOException {
- return new FieldsEnum() {
- private Iterator<Map.Entry<String,SimpleTVTerms>> iterator = fields.entrySet().iterator();
- private Map.Entry<String,SimpleTVTerms> current = null;
-
- @Override
- public String next() {
- if (!iterator.hasNext()) {
- return null;
- } else {
- current = iterator.next();
- return current.getKey();
- }
- }
-
- @Override
- public Terms terms() {
- return current.getValue();
- }
- };
+ public Iterator<String> iterator() {
+ return new UnmodifiableIterator<String>(fields.keySet().iterator());
}
@Override
@@ -257,8 +257,14 @@ public class SimpleTextTermVectorsReader
private static class SimpleTVTerms extends Terms {
final SortedMap<BytesRef,SimpleTVPostings> terms;
+ final boolean hasOffsets;
+ final boolean hasPositions;
+ final boolean hasPayloads;
- SimpleTVTerms() {
+ SimpleTVTerms(boolean hasOffsets, boolean hasPositions, boolean hasPayloads) {
+ this.hasOffsets = hasOffsets;
+ this.hasPositions = hasPositions;
+ this.hasPayloads = hasPayloads;
terms = new TreeMap<BytesRef,SimpleTVPostings>();
}
@@ -292,6 +298,21 @@ public class SimpleTextTermVectorsReader
public int getDocCount() throws IOException {
return 1;
}
+
+ @Override
+ public boolean hasOffsets() {
+ return hasOffsets;
+ }
+
+ @Override
+ public boolean hasPositions() {
+ return hasPositions;
+ }
+
+ @Override
+ public boolean hasPayloads() {
+ return hasPayloads;
+ }
}
private static class SimpleTVPostings {
@@ -299,6 +320,7 @@ public class SimpleTextTermVectorsReader
private int positions[];
private int startOffsets[];
private int endOffsets[];
+ private BytesRef payloads[];
}
private static class SimpleTVTermsEnum extends TermsEnum {
@@ -372,7 +394,7 @@ public class SimpleTextTermVectorsReader
}
// TODO: reuse
SimpleTVDocsAndPositionsEnum e = new SimpleTVDocsAndPositionsEnum();
- e.reset(liveDocs, postings.positions, postings.startOffsets, postings.endOffsets);
+ e.reset(liveDocs, postings.positions, postings.startOffsets, postings.endOffsets, postings.payloads);
return e;
}
@@ -433,6 +455,7 @@ public class SimpleTextTermVectorsReader
private int nextPos;
private Bits liveDocs;
private int[] positions;
+ private BytesRef[] payloads;
private int[] startOffsets;
private int[] endOffsets;
@@ -470,11 +493,12 @@ public class SimpleTextTermVectorsReader
}
}
- public void reset(Bits liveDocs, int[] positions, int[] startOffsets, int[] endOffsets) {
+ public void reset(Bits liveDocs, int[] positions, int[] startOffsets, int[] endOffsets, BytesRef payloads[]) {
this.liveDocs = liveDocs;
this.positions = positions;
this.startOffsets = startOffsets;
this.endOffsets = endOffsets;
+ this.payloads = payloads;
this.doc = -1;
didNext = false;
nextPos = 0;
@@ -482,12 +506,7 @@ public class SimpleTextTermVectorsReader
@Override
public BytesRef getPayload() {
- return null;
- }
-
- @Override
- public boolean hasPayload() {
- return false;
+ return payloads == null ? null : payloads[nextPos-1];
}
@Override
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java Mon Aug 13 11:16:57 2012
@@ -45,10 +45,12 @@ public class SimpleTextTermVectorsWriter
static final BytesRef FIELDNAME = new BytesRef(" name ");
static final BytesRef FIELDPOSITIONS = new BytesRef(" positions ");
static final BytesRef FIELDOFFSETS = new BytesRef(" offsets ");
+ static final BytesRef FIELDPAYLOADS = new BytesRef(" payloads ");
static final BytesRef FIELDTERMCOUNT = new BytesRef(" numterms ");
static final BytesRef TERMTEXT = new BytesRef(" term ");
static final BytesRef TERMFREQ = new BytesRef(" freq ");
static final BytesRef POSITION = new BytesRef(" position ");
+ static final BytesRef PAYLOAD = new BytesRef(" payload ");
static final BytesRef STARTOFFSET = new BytesRef(" startoffset ");
static final BytesRef ENDOFFSET = new BytesRef(" endoffset ");
@@ -61,6 +63,7 @@ public class SimpleTextTermVectorsWriter
private final BytesRef scratch = new BytesRef();
private boolean offsets;
private boolean positions;
+ private boolean payloads;
public SimpleTextTermVectorsWriter(Directory directory, String segment, IOContext context) throws IOException {
this.directory = directory;
@@ -89,7 +92,7 @@ public class SimpleTextTermVectorsWriter
}
@Override
- public void startField(FieldInfo info, int numTerms, boolean positions, boolean offsets) throws IOException {
+ public void startField(FieldInfo info, int numTerms, boolean positions, boolean offsets, boolean payloads) throws IOException {
write(FIELD);
write(Integer.toString(info.number));
newLine();
@@ -106,12 +109,17 @@ public class SimpleTextTermVectorsWriter
write(Boolean.toString(offsets));
newLine();
+ write(FIELDPAYLOADS);
+ write(Boolean.toString(payloads));
+ newLine();
+
write(FIELDTERMCOUNT);
write(Integer.toString(numTerms));
newLine();
this.positions = positions;
this.offsets = offsets;
+ this.payloads = payloads;
}
@Override
@@ -126,13 +134,22 @@ public class SimpleTextTermVectorsWriter
}
@Override
- public void addPosition(int position, int startOffset, int endOffset) throws IOException {
+ public void addPosition(int position, int startOffset, int endOffset, BytesRef payload) throws IOException {
assert positions || offsets;
if (positions) {
write(POSITION);
write(Integer.toString(position));
newLine();
+
+ if (payloads) {
+ write(PAYLOAD);
+ if (payload != null) {
+ assert payload.length > 0;
+ write(payload);
+ }
+ newLine();
+ }
}
if (offsets) {
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/document/FieldType.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/document/FieldType.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/document/FieldType.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/document/FieldType.java Mon Aug 13 11:16:57 2012
@@ -39,6 +39,7 @@ public class FieldType implements Indexa
private boolean storeTermVectors;
private boolean storeTermVectorOffsets;
private boolean storeTermVectorPositions;
+ private boolean storeTermVectorPayloads;
private boolean omitNorms;
private IndexOptions indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
private DocValues.Type docValueType;
@@ -53,6 +54,7 @@ public class FieldType implements Indexa
this.storeTermVectors = ref.storeTermVectors();
this.storeTermVectorOffsets = ref.storeTermVectorOffsets();
this.storeTermVectorPositions = ref.storeTermVectorPositions();
+ this.storeTermVectorPayloads = ref.storeTermVectorPayloads();
this.omitNorms = ref.omitNorms();
this.indexOptions = ref.indexOptions();
this.docValueType = ref.docValueType();
@@ -132,6 +134,15 @@ public class FieldType implements Indexa
this.storeTermVectorPositions = value;
}
+ public boolean storeTermVectorPayloads() {
+ return this.storeTermVectorPayloads;
+ }
+
+ public void setStoreTermVectorPayloads(boolean value) {
+ checkIfFrozen();
+ this.storeTermVectorPayloads = value;
+ }
+
public boolean omitNorms() {
return this.omitNorms;
}
@@ -198,24 +209,19 @@ public class FieldType implements Indexa
result.append(",");
result.append("indexed");
if (tokenized()) {
- if (result.length() > 0)
- result.append(",");
- result.append("tokenized");
+ result.append(",tokenized");
}
if (storeTermVectors()) {
- if (result.length() > 0)
- result.append(",");
- result.append("termVector");
+ result.append(",termVector");
}
if (storeTermVectorOffsets()) {
- if (result.length() > 0)
- result.append(",");
- result.append("termVectorOffsets");
+ result.append(",termVectorOffsets");
}
if (storeTermVectorPositions()) {
- if (result.length() > 0)
- result.append(",");
- result.append("termVectorPosition");
+ result.append(",termVectorPosition");
+ if (storeTermVectorPayloads()) {
+ result.append(",termVectorPayloads");
+ }
}
if (omitNorms()) {
result.append(",omitNorms");
@@ -232,7 +238,9 @@ public class FieldType implements Indexa
}
}
if (docValueType != null) {
- result.append(",docValueType=");
+ if (result.length() > 0)
+ result.append(",");
+ result.append("docValueType=");
result.append(docValueType);
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java Mon Aug 13 11:16:57 2012
@@ -685,12 +685,7 @@ public class CheckIndex {
DocsAndPositionsEnum postings = null;
String lastField = null;
- final FieldsEnum fieldsEnum = fields.iterator();
- while(true) {
- final String field = fieldsEnum.next();
- if (field == null) {
- break;
- }
+ for (String field : fields) {
// MultiFieldsEnum relies upon this order...
if (lastField != null && field.compareTo(lastField) <= 0) {
throw new RuntimeException("fields out of order: lastField=" + lastField + " field=" + field);
@@ -713,11 +708,16 @@ public class CheckIndex {
// assert fields.terms(field) != null;
computedFieldCount++;
- final Terms terms = fieldsEnum.terms();
+ final Terms terms = fields.terms(field);
if (terms == null) {
continue;
}
+ final boolean hasPositions = terms.hasPositions();
+ final boolean hasOffsets = terms.hasOffsets();
+ // term vectors cannot omit TF
+ final boolean hasFreqs = isVectors || fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+
final TermsEnum termsEnum = terms.iterator(null);
boolean hasOrd = true;
@@ -777,17 +777,10 @@ public class CheckIndex {
status.termCount++;
final DocsEnum docs2;
- final boolean hasPositions;
- // if we are checking vectors, we have freqs implicitly
- final boolean hasFreqs = isVectors || fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
- // if we are checking vectors, offsets are a free-for-all anyway
- final boolean hasOffsets = isVectors || fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
if (postings != null) {
docs2 = postings;
- hasPositions = true;
} else {
docs2 = docs;
- hasPositions = false;
}
int lastDoc = -1;
@@ -824,22 +817,17 @@ public class CheckIndex {
if (hasPositions) {
for(int j=0;j<freq;j++) {
final int pos = postings.nextPosition();
- // NOTE: pos=-1 is allowed because of ancient bug
- // (LUCENE-1542) whereby IndexWriter could
- // write pos=-1 when first token's posInc is 0
- // (separately: analyzers should not give
- // posInc=0 to first token); also, term
- // vectors are allowed to return pos=-1 if
- // they indexed offset but not positions:
- if (pos < -1) {
+
+ if (pos < 0) {
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
}
if (pos < lastPos) {
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
}
lastPos = pos;
- if (postings.hasPayload()) {
- postings.getPayload();
+ BytesRef payload = postings.getPayload();
+ if (payload != null && payload.length < 1) {
+ throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " payload length is out of bounds " + payload.length);
}
if (hasOffsets) {
int startOffset = postings.startOffset();
@@ -924,14 +912,8 @@ public class CheckIndex {
int lastOffset = 0;
for(int posUpto=0;posUpto<freq;posUpto++) {
final int pos = postings.nextPosition();
- // NOTE: pos=-1 is allowed because of ancient bug
- // (LUCENE-1542) whereby IndexWriter could
- // write pos=-1 when first token's posInc is 0
- // (separately: analyzers should not give
- // posInc=0 to first token); also, term
- // vectors are allowed to return pos=-1 if
- // they indexed offset but not positions:
- if (pos < -1) {
+
+ if (pos < 0) {
throw new RuntimeException("position " + pos + " is out of bounds");
}
if (pos < lastPosition) {
@@ -1000,11 +982,7 @@ public class CheckIndex {
// only happen if it's a ghost field (field with
// no terms, eg there used to be terms but all
// docs got deleted and then merged away):
- // make sure TermsEnum is empty:
- final Terms fieldTerms2 = fieldsEnum.terms();
- if (fieldTerms2 != null && fieldTerms2.iterator(null).next() != null) {
- throw new RuntimeException("Fields.terms(field=" + field + ") returned null yet the field appears to have terms");
- }
+
} else {
if (fieldTerms instanceof BlockTreeTermsReader.FieldReader) {
final BlockTreeTermsReader.Stats stats = ((BlockTreeTermsReader.FieldReader) fieldTerms).computeStats();
@@ -1415,9 +1393,7 @@ public class CheckIndex {
status.docCount++;
}
- FieldsEnum fieldsEnum = tfv.iterator();
- String field = null;
- while((field = fieldsEnum.next()) != null) {
+ for(String field : tfv) {
if (doStats) {
status.totVectors++;
}
@@ -1432,6 +1408,8 @@ public class CheckIndex {
Terms terms = tfv.terms(field);
termsEnum = terms.iterator(termsEnum);
final boolean postingsHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+ final boolean postingsHasPayload = fieldInfo.hasPayloads();
+ final boolean vectorsHasPayload = terms.hasPayloads();
Terms postingsTerms = postingsFields.terms(field);
if (postingsTerms == null) {
@@ -1439,19 +1417,18 @@ public class CheckIndex {
}
postingsTermsEnum = postingsTerms.iterator(postingsTermsEnum);
+ final boolean hasProx = terms.hasOffsets() || terms.hasPositions();
BytesRef term = null;
while ((term = termsEnum.next()) != null) {
-
- final boolean hasProx;
- // Try positions:
- postings = termsEnum.docsAndPositions(null, postings);
- if (postings == null) {
- hasProx = false;
- // Try docIDs & freqs:
- docs = termsEnum.docs(null, docs);
+ if (hasProx) {
+ postings = termsEnum.docsAndPositions(null, postings);
+ assert postings != null;
+ docs = null;
} else {
- hasProx = true;
+ docs = termsEnum.docs(null, docs);
+ assert docs != null;
+ postings = null;
}
final DocsEnum docs2;
@@ -1504,7 +1481,7 @@ public class CheckIndex {
int pos = postings.nextPosition();
if (postingsPostings != null) {
int postingsPos = postingsPostings.nextPosition();
- if (pos != -1 && postingsPos != -1 && pos != postingsPos) {
+ if (terms.hasPositions() && pos != postingsPos) {
throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": pos=" + pos + " differs from postings pos=" + postingsPos);
}
}
@@ -1535,6 +1512,34 @@ public class CheckIndex {
throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": endOffset=" + endOffset + " differs from postings endOffset=" + postingsEndOffset);
}
}
+
+ BytesRef payload = postings.getPayload();
+
+ if (payload != null) {
+ assert vectorsHasPayload;
+ }
+
+ if (postingsHasPayload && vectorsHasPayload) {
+ assert postingsPostings != null;
+
+ if (payload == null) {
+ // we have payloads, but not at this position.
+ // postings has payloads too, it should not have one at this position
+ if (postingsPostings.getPayload() != null) {
+ throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + " has no payload but postings does: " + postingsPostings.getPayload());
+ }
+ } else {
+ // we have payloads, and one at this position
+ // postings should also have one at this position, with the same bytes.
+ if (postingsPostings.getPayload() == null) {
+ throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + " has payload=" + payload + " but postings does not.");
+ }
+ BytesRef postingsPayload = postingsPostings.getPayload();
+ if (!payload.equals(postingsPayload)) {
+ throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + " has payload=" + payload + " but differs from postings payload=" + postingsPayload);
+ }
+ }
+ }
}
}
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/CoalescedDeletes.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/CoalescedDeletes.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/CoalescedDeletes.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/CoalescedDeletes.java Mon Aug 13 11:16:57 2012
@@ -24,7 +24,7 @@ import java.util.List;
import java.util.Map;
import org.apache.lucene.search.Query;
-import org.apache.lucene.util.PriorityQueue;
+import org.apache.lucene.util.MergedIterator;
import org.apache.lucene.index.BufferedDeletesStream.QueryAndLimit;
class CoalescedDeletes {
@@ -48,13 +48,14 @@ class CoalescedDeletes {
public Iterable<Term> termsIterable() {
return new Iterable<Term>() {
+ @SuppressWarnings("unchecked")
@Override
public Iterator<Term> iterator() {
- ArrayList<Iterator<Term>> subs = new ArrayList<Iterator<Term>>(iterables.size());
- for (Iterable<Term> iterable : iterables) {
- subs.add(iterable.iterator());
+ Iterator<Term> subs[] = new Iterator[iterables.size()];
+ for (int i = 0; i < iterables.size(); i++) {
+ subs[i] = iterables.get(i).iterator();
}
- return mergedIterator(subs);
+ return new MergedIterator<Term>(subs);
}
};
}
@@ -86,106 +87,4 @@ class CoalescedDeletes {
}
};
}
-
- /** provides a merged view across multiple iterators */
- static Iterator<Term> mergedIterator(final List<Iterator<Term>> iterators) {
- return new Iterator<Term>() {
- Term current;
- TermMergeQueue queue = new TermMergeQueue(iterators.size());
- SubIterator[] top = new SubIterator[iterators.size()];
- int numTop;
-
- {
- int index = 0;
- for (Iterator<Term> iterator : iterators) {
- if (iterator.hasNext()) {
- SubIterator sub = new SubIterator();
- sub.current = iterator.next();
- sub.iterator = iterator;
- sub.index = index++;
- queue.add(sub);
- }
- }
- }
-
- public boolean hasNext() {
- if (queue.size() > 0) {
- return true;
- }
-
- for (int i = 0; i < numTop; i++) {
- if (top[i].iterator.hasNext()) {
- return true;
- }
- }
- return false;
- }
-
- public Term next() {
- // restore queue
- pushTop();
-
- // gather equal top fields
- if (queue.size() > 0) {
- pullTop();
- } else {
- current = null;
- }
- return current;
- }
-
- public void remove() {
- throw new UnsupportedOperationException();
- }
-
- private void pullTop() {
- // extract all subs from the queue that have the same top term
- assert numTop == 0;
- while (true) {
- top[numTop++] = queue.pop();
- if (queue.size() == 0
- || !(queue.top()).current.equals(top[0].current)) {
- break;
- }
- }
- current = top[0].current;
- }
-
- private void pushTop() {
- // call next() on each top, and put back into queue
- for (int i = 0; i < numTop; i++) {
- if (top[i].iterator.hasNext()) {
- top[i].current = top[i].iterator.next();
- queue.add(top[i]);
- } else {
- // no more terms
- top[i].current = null;
- }
- }
- numTop = 0;
- }
- };
- }
-
- private static class SubIterator {
- Iterator<Term> iterator;
- Term current;
- int index;
- }
-
- private static class TermMergeQueue extends PriorityQueue<SubIterator> {
- TermMergeQueue(int size) {
- super(size);
- }
-
- @Override
- protected boolean lessThan(SubIterator a, SubIterator b) {
- final int cmp = a.current.compareTo(b.current);
- if (cmp != 0) {
- return cmp < 0;
- } else {
- return a.index < b.index;
- }
- }
- }
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/DocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/DocValues.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/DocValues.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/DocValues.java Mon Aug 13 11:16:57 2012
@@ -105,7 +105,7 @@ public abstract class DocValues implemen
* <p>
* {@link Source} instances obtained from this method are closed / released
* from the cache once this {@link DocValues} instance is closed by the
- * {@link IndexReader}, {@link Fields} or {@link FieldsEnum} the
+ * {@link IndexReader}, {@link Fields} or the
* {@link DocValues} was created from.
*/
public Source getSource() throws IOException {
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java Mon Aug 13 11:16:57 2012
@@ -48,11 +48,8 @@ public abstract class DocsAndPositionsEn
public abstract int endOffset() throws IOException;
/** Returns the payload at this position, or null if no
- * payload was indexed. Only call this once per
- * position. You should not modify anything (neither
- * members of the returned BytesRef nor bytes in the
- * byte[]). */
+ * payload was indexed. You should not modify anything
+ * (neither members of the returned BytesRef nor bytes
+ * in the byte[]). */
public abstract BytesRef getPayload() throws IOException;
-
- public abstract boolean hasPayload();
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/Fields.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/Fields.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/Fields.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/Fields.java Mon Aug 13 11:16:57 2012
@@ -18,15 +18,16 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
+import java.util.Iterator;
/** Flex API for access to fields and terms
* @lucene.experimental */
-public abstract class Fields {
+public abstract class Fields implements Iterable<String> {
/** Returns an iterator that will step through all fields
* names. This will not return null. */
- public abstract FieldsEnum iterator() throws IOException;
+ public abstract Iterator<String> iterator();
/** Get the {@link Terms} for this field. This will return
* null if the field does not exist. */
@@ -45,12 +46,7 @@ public abstract class Fields {
// TODO: deprecate?
public long getUniqueTermCount() throws IOException {
long numTerms = 0;
- FieldsEnum it = iterator();
- while(true) {
- String field = it.next();
- if (field == null) {
- break;
- }
+ for (String field : this) {
Terms terms = terms(field);
if (terms != null) {
final long termCount = terms.size();
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java Mon Aug 13 11:16:57 2012
@@ -24,6 +24,7 @@ import org.apache.lucene.util.automaton.
import java.io.IOException;
import java.util.Comparator;
+import java.util.Iterator;
/** A <code>FilterAtomicReader</code> contains another AtomicReader, which it
* uses as its basic source of data, possibly transforming the data along the
@@ -46,7 +47,7 @@ public class FilterAtomicReader extends
}
@Override
- public FieldsEnum iterator() throws IOException {
+ public Iterator<String> iterator() {
return in.iterator();
}
@@ -109,28 +110,20 @@ public class FilterAtomicReader extends
public TermsEnum intersect(CompiledAutomaton automaton, BytesRef bytes) throws java.io.IOException {
return in.intersect(automaton, bytes);
}
- }
-
- /** Base class for filtering {@link TermsEnum} implementations. */
- public static class FilterFieldsEnum extends FieldsEnum {
- protected final FieldsEnum in;
- public FilterFieldsEnum(FieldsEnum in) {
- this.in = in;
- }
@Override
- public String next() throws IOException {
- return in.next();
+ public boolean hasOffsets() {
+ return in.hasOffsets();
}
@Override
- public Terms terms() throws IOException {
- return in.terms();
+ public boolean hasPositions() {
+ return in.hasPositions();
}
@Override
- public AttributeSource attributes() {
- return in.attributes();
+ public boolean hasPayloads() {
+ return in.hasPayloads();
}
}
@@ -292,11 +285,6 @@ public class FilterAtomicReader extends
public BytesRef getPayload() throws IOException {
return in.getPayload();
}
-
- @Override
- public boolean hasPayload() {
- return in.hasPayload();
- }
@Override
public AttributeSource attributes() {
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java Mon Aug 13 11:16:57 2012
@@ -173,7 +173,7 @@ final class FreqProxTermsWriterPerField
postings.lastDocCodes[termID] = docState.docID;
} else {
postings.lastDocCodes[termID] = docState.docID << 1;
- postings.docFreqs[termID] = 1;
+ postings.termFreqs[termID] = 1;
if (hasProx) {
writeProx(termID, fieldState.position);
if (hasOffsets) {
@@ -194,10 +194,10 @@ final class FreqProxTermsWriterPerField
FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
- assert !hasFreq || postings.docFreqs[termID] > 0;
+ assert !hasFreq || postings.termFreqs[termID] > 0;
if (!hasFreq) {
- assert postings.docFreqs == null;
+ assert postings.termFreqs == null;
if (docState.docID != postings.lastDocIDs[termID]) {
assert docState.docID > postings.lastDocIDs[termID];
termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
@@ -212,13 +212,13 @@ final class FreqProxTermsWriterPerField
// Now that we know doc freq for previous doc,
// write it & lastDocCode
- if (1 == postings.docFreqs[termID]) {
+ if (1 == postings.termFreqs[termID]) {
termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]|1);
} else {
termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
- termsHashPerField.writeVInt(0, postings.docFreqs[termID]);
+ termsHashPerField.writeVInt(0, postings.termFreqs[termID]);
}
- postings.docFreqs[termID] = 1;
+ postings.termFreqs[termID] = 1;
fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
postings.lastDocIDs[termID] = docState.docID;
@@ -233,7 +233,7 @@ final class FreqProxTermsWriterPerField
}
fieldState.uniqueTermCount++;
} else {
- fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.docFreqs[termID]);
+ fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.termFreqs[termID]);
if (hasProx) {
writeProx(termID, fieldState.position-postings.lastPositions[termID]);
}
@@ -252,7 +252,7 @@ final class FreqProxTermsWriterPerField
public FreqProxPostingsArray(int size, boolean writeFreqs, boolean writeProx, boolean writeOffsets) {
super(size);
if (writeFreqs) {
- docFreqs = new int[size];
+ termFreqs = new int[size];
}
lastDocIDs = new int[size];
lastDocCodes = new int[size];
@@ -267,7 +267,7 @@ final class FreqProxTermsWriterPerField
//System.out.println("PA init freqs=" + writeFreqs + " pos=" + writeProx + " offs=" + writeOffsets);
}
- int docFreqs[]; // # times this term occurs in the current doc
+ int termFreqs[]; // # times this term occurs in the current doc
int lastDocIDs[]; // Last docID where this term occurred
int lastDocCodes[]; // Code for prior doc
int lastPositions[]; // Last position where this term occurred
@@ -275,7 +275,7 @@ final class FreqProxTermsWriterPerField
@Override
ParallelPostingsArray newInstance(int size) {
- return new FreqProxPostingsArray(size, docFreqs != null, lastPositions != null, lastOffsets != null);
+ return new FreqProxPostingsArray(size, termFreqs != null, lastPositions != null, lastOffsets != null);
}
@Override
@@ -295,9 +295,9 @@ final class FreqProxTermsWriterPerField
assert to.lastOffsets != null;
System.arraycopy(lastOffsets, 0, to.lastOffsets, 0, numToCopy);
}
- if (docFreqs != null) {
- assert to.docFreqs != null;
- System.arraycopy(docFreqs, 0, to.docFreqs, 0, numToCopy);
+ if (termFreqs != null) {
+ assert to.termFreqs != null;
+ System.arraycopy(termFreqs, 0, to.termFreqs, 0, numToCopy);
}
}
@@ -310,7 +310,7 @@ final class FreqProxTermsWriterPerField
if (lastOffsets != null) {
bytes += RamUsageEstimator.NUM_BYTES_INT;
}
- if (docFreqs != null) {
+ if (termFreqs != null) {
bytes += RamUsageEstimator.NUM_BYTES_INT;
}
@@ -416,21 +416,21 @@ final class FreqProxTermsWriterPerField
// Now termStates has numToMerge FieldMergeStates
// which all share the same term. Now we must
// interleave the docID streams.
- int numDocs = 0;
+ int docFreq = 0;
long totTF = 0;
int docID = 0;
while(true) {
//System.out.println(" cycle");
- final int termDocFreq;
+ final int termFreq;
if (freq.eof()) {
if (postings.lastDocCodes[termID] != -1) {
// Return last doc
docID = postings.lastDocIDs[termID];
if (readTermFreq) {
- termDocFreq = postings.docFreqs[termID];
+ termFreq = postings.termFreqs[termID];
} else {
- termDocFreq = -1;
+ termFreq = -1;
}
postings.lastDocCodes[termID] = -1;
} else {
@@ -441,20 +441,20 @@ final class FreqProxTermsWriterPerField
final int code = freq.readVInt();
if (!readTermFreq) {
docID += code;
- termDocFreq = -1;
+ termFreq = -1;
} else {
docID += code >>> 1;
if ((code & 1) != 0) {
- termDocFreq = 1;
+ termFreq = 1;
} else {
- termDocFreq = freq.readVInt();
+ termFreq = freq.readVInt();
}
}
assert docID != postings.lastDocIDs[termID];
}
- numDocs++;
+ docFreq++;
assert docID < state.segmentInfo.getDocCount(): "doc=" + docID + " maxDoc=" + state.segmentInfo.getDocCount();
// NOTE: we could check here if the docID was
@@ -469,7 +469,7 @@ final class FreqProxTermsWriterPerField
// 2nd sweep does the real flush, but I suspect
// that'd add too much time to flush.
visitedDocs.set(docID);
- postingsConsumer.startDoc(docID, writeTermFreq ? termDocFreq : -1);
+ postingsConsumer.startDoc(docID, writeTermFreq ? termFreq : -1);
if (docID < delDocLimit) {
// Mark it deleted. TODO: we could also skip
// writing its postings; this would be
@@ -485,7 +485,7 @@ final class FreqProxTermsWriterPerField
}
}
- totTF += termDocFreq;
+ totTF += termFreq;
// Carefully copy over the prox + payload info,
// changing the format to match Lucene's segment
@@ -495,7 +495,7 @@ final class FreqProxTermsWriterPerField
// we did record positions (& maybe payload) and/or offsets
int position = 0;
int offset = 0;
- for(int j=0;j<termDocFreq;j++) {
+ for(int j=0;j<termFreq;j++) {
final BytesRef thisPayload;
if (readPositions) {
@@ -542,9 +542,9 @@ final class FreqProxTermsWriterPerField
}
postingsConsumer.finishDoc();
}
- termsConsumer.finishTerm(text, new TermStats(numDocs, writeTermFreq ? totTF : -1));
+ termsConsumer.finishTerm(text, new TermStats(docFreq, writeTermFreq ? totTF : -1));
sumTotalTermFreq += totTF;
- sumDocFreq += numDocs;
+ sumDocFreq += docFreq;
}
termsConsumer.finish(writeTermFreq ? sumTotalTermFreq : -1, sumDocFreq, visitedDocs.cardinality());
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java Mon Aug 13 11:16:57 2012
@@ -2312,9 +2312,7 @@ public class IndexWriter implements Clos
}
SegmentInfos sis = new SegmentInfos(); // read infos from dir
sis.read(dir);
- final Set<String> dsFilesCopied = new HashSet<String>();
- final Map<String, String> dsNames = new HashMap<String, String>();
- final Set<String> copiedFiles = new HashSet<String>();
+
for (SegmentInfoPerCommit info : sis) {
assert !infos.contains(info): "dup info dir=" + info.info.dir + " name=" + info.info.name;
@@ -2327,7 +2325,7 @@ public class IndexWriter implements Clos
IOContext context = new IOContext(new MergeInfo(info.info.getDocCount(), info.info.sizeInBytes(), true, -1));
- infos.add(copySegmentAsIs(info, newSegName, dsNames, dsFilesCopied, context, copiedFiles));
+ infos.add(copySegmentAsIs(info, newSegName, context));
}
}
@@ -2463,25 +2461,9 @@ public class IndexWriter implements Clos
}
/** Copies the segment files as-is into the IndexWriter's directory. */
- // TODO: this can be substantially simplified now that 3.x support/shared docstores is removed!
- private SegmentInfoPerCommit copySegmentAsIs(SegmentInfoPerCommit info, String segName,
- Map<String, String> dsNames, Set<String> dsFilesCopied, IOContext context,
- Set<String> copiedFiles)
+ private SegmentInfoPerCommit copySegmentAsIs(SegmentInfoPerCommit info, String segName, IOContext context)
throws IOException {
- // Determine if the doc store of this segment needs to be copied. It's
- // only relevant for segments that share doc store with others,
- // because the DS might have been copied already, in which case we
- // just want to update the DS name of this SegmentInfo.
- final String dsName = info.info.name;
- assert dsName != null;
- final String newDsName;
- if (dsNames.containsKey(dsName)) {
- newDsName = dsNames.get(dsName);
- } else {
- dsNames.put(dsName, segName);
- newDsName = segName;
- }
-
+
// note: we don't really need this fis (its copied), but we load it up
// so we don't pass a null value to the si writer
FieldInfos fis = getFieldInfos(info.info);
@@ -2496,7 +2478,7 @@ public class IndexWriter implements Clos
}
//System.out.println("copy seg=" + info.info.name + " version=" + info.info.getVersion());
- // Same SI as before but we change directory, name and docStoreSegment:
+ // Same SI as before but we change directory and name
SegmentInfo newInfo = new SegmentInfo(directory, info.info.getVersion(), segName, info.info.getDocCount(),
info.info.getUseCompoundFile(),
info.info.getCodec(), info.info.getDiagnostics(), attributes);
@@ -2513,16 +2495,10 @@ public class IndexWriter implements Clos
}
newInfo.setFiles(segFiles);
- // We must rewrite the SI file because it references
- // segment name (its own name, if its 3.x, and doc
- // store segment name):
+ // We must rewrite the SI file because it references segment name in its list of files, etc
TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory);
- try {
- newInfo.getCodec().segmentInfoFormat().getSegmentInfoWriter().write(trackingDir, newInfo, fis, context);
- } catch (UnsupportedOperationException uoe) {
- // OK: 3x codec cannot write a new SI file;
- // SegmentInfos will write this on commit
- }
+
+ newInfo.getCodec().segmentInfoFormat().getSegmentInfoWriter().write(trackingDir, newInfo, fis, context);
final Collection<String> siFiles = trackingDir.getCreatedFiles();
@@ -2537,8 +2513,7 @@ public class IndexWriter implements Clos
}
assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists; siFiles=" + siFiles;
- assert !copiedFiles.contains(file): "file \"" + file + "\" is being copied more than once";
- copiedFiles.add(file);
+
info.info.dir.copy(directory, file, newFileName, context);
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/IndexableFieldType.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/IndexableFieldType.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/IndexableFieldType.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/IndexableFieldType.java Mon Aug 13 11:16:57 2012
@@ -42,6 +42,9 @@ public interface IndexableFieldType {
/** True if term vector positions should be indexed */
public boolean storeTermVectorPositions();
+
+ /** True if term vector payloads should be indexed */
+ public boolean storeTermVectorPayloads();
/** True if norms should not be indexed */
public boolean omitNorms();
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/MergeState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/MergeState.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/MergeState.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/MergeState.java Mon Aug 13 11:16:57 2012
@@ -199,6 +199,7 @@ public class MergeState {
// and we could make a codec(wrapper) to do all of this privately so IW is uninvolved
public PayloadProcessorProvider payloadProcessorProvider;
public ReaderPayloadProcessor[] readerPayloadProcessor;
+ public ReaderPayloadProcessor currentReaderPayloadProcessor;
public PayloadProcessor[] currentPayloadProcessor;
// TODO: get rid of this? it tells you which segments are 'aligned' (e.g. for bulk merging)
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java Mon Aug 13 11:16:57 2012
@@ -138,11 +138,6 @@ public final class MultiDocsAndPositions
}
@Override
- public boolean hasPayload() {
- return current.hasPayload();
- }
-
- @Override
public BytesRef getPayload() throws IOException {
return current.getPayload();
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/MultiFields.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/MultiFields.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/MultiFields.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/MultiFields.java Mon Aug 13 11:16:57 2012
@@ -19,6 +19,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Collection;
@@ -27,6 +28,7 @@ import java.util.concurrent.ConcurrentHa
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.MergedIterator;
/**
* Exposes flex API, merged from flex API of sub-segments.
@@ -125,7 +127,7 @@ public final class MultiFields extends F
* term. This will return null if the field or term does
* not exist. */
public static DocsEnum getTermDocsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term) throws IOException {
- return getTermDocsEnum(r, liveDocs, field, term);
+ return getTermDocsEnum(r, liveDocs, field, term, DocsEnum.FLAG_FREQS);
}
/** Returns {@link DocsEnum} for the specified field &
@@ -180,22 +182,14 @@ public final class MultiFields extends F
this.subSlices = subSlices;
}
+ @SuppressWarnings("unchecked")
@Override
- public FieldsEnum iterator() throws IOException {
-
- final List<FieldsEnum> fieldsEnums = new ArrayList<FieldsEnum>();
- final List<ReaderSlice> fieldsSlices = new ArrayList<ReaderSlice>();
+ public Iterator<String> iterator() {
+ Iterator<String> subIterators[] = new Iterator[subs.length];
for(int i=0;i<subs.length;i++) {
- fieldsEnums.add(subs[i].iterator());
- fieldsSlices.add(subSlices[i]);
- }
- if (fieldsEnums.size() == 0) {
- return FieldsEnum.EMPTY;
- } else {
- return new MultiFieldsEnum(this,
- fieldsEnums.toArray(FieldsEnum.EMPTY_ARRAY),
- fieldsSlices.toArray(ReaderSlice.EMPTY_ARRAY));
+ subIterators[i] = subs[i].iterator();
}
+ return new MergedIterator<String>(subIterators);
}
@Override
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java Mon Aug 13 11:16:57 2012
@@ -37,12 +37,19 @@ public final class MultiTerms extends Te
private final Terms[] subs;
private final ReaderSlice[] subSlices;
private final Comparator<BytesRef> termComp;
+ private final boolean hasOffsets;
+ private final boolean hasPositions;
+ private final boolean hasPayloads;
public MultiTerms(Terms[] subs, ReaderSlice[] subSlices) throws IOException {
this.subs = subs;
this.subSlices = subSlices;
Comparator<BytesRef> _termComp = null;
+ assert subs.length > 0 : "inefficient: don't use MultiTerms over one sub";
+ boolean _hasOffsets = true;
+ boolean _hasPositions = true;
+ boolean _hasPayloads = false;
for(int i=0;i<subs.length;i++) {
if (_termComp == null) {
_termComp = subs[i].getComparator();
@@ -54,9 +61,15 @@ public final class MultiTerms extends Te
throw new IllegalStateException("sub-readers have different BytesRef.Comparators; cannot merge");
}
}
+ _hasOffsets &= subs[i].hasOffsets();
+ _hasPositions &= subs[i].hasPositions();
+ _hasPayloads |= subs[i].hasPayloads();
}
termComp = _termComp;
+ hasOffsets = _hasOffsets;
+ hasPositions = _hasPositions;
+ hasPayloads = hasPositions && _hasPayloads; // if all subs have pos, and at least one has payloads.
}
@Override
@@ -142,5 +155,20 @@ public final class MultiTerms extends Te
public Comparator<BytesRef> getComparator() {
return termComp;
}
+
+ @Override
+ public boolean hasOffsets() {
+ return hasOffsets;
+ }
+
+ @Override
+ public boolean hasPositions() {
+ return hasPositions;
+ }
+
+ @Override
+ public boolean hasPayloads() {
+ return hasPayloads;
+ }
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java Mon Aug 13 11:16:57 2012
@@ -27,6 +27,7 @@ import java.util.SortedMap;
import java.util.TreeMap;
import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.UnmodifiableIterator;
/** An {@link AtomicReader} which reads multiple, parallel indexes. Each index
@@ -121,12 +122,10 @@ public final class ParallelAtomicReader
for (final AtomicReader reader : this.parallelReaders) {
final Fields readerFields = reader.fields();
if (readerFields != null) {
- final FieldsEnum it = readerFields.iterator();
- String name;
- while ((name = it.next()) != null) {
+ for (String field : readerFields) {
// only add if the reader responsible for that field name is the current:
- if (fieldToReader.get(name) == reader) {
- this.fields.addField(name, it.terms());
+ if (fieldToReader.get(field) == reader) {
+ this.fields.addField(field, readerFields.terms(field));
}
}
}
@@ -151,33 +150,6 @@ public final class ParallelAtomicReader
return buffer.append(')').toString();
}
- private final class ParallelFieldsEnum extends FieldsEnum {
- private String currentField;
- private final Iterator<String> keys;
- private final ParallelFields fields;
-
- ParallelFieldsEnum(ParallelFields fields) {
- this.fields = fields;
- keys = fields.fields.keySet().iterator();
- }
-
- @Override
- public String next() {
- if (keys.hasNext()) {
- currentField = keys.next();
- } else {
- currentField = null;
- }
- return currentField;
- }
-
- @Override
- public Terms terms() {
- return fields.terms(currentField);
- }
-
- }
-
// Single instance of this, per ParallelReader instance
private final class ParallelFields extends Fields {
final Map<String,Terms> fields = new TreeMap<String,Terms>();
@@ -190,8 +162,8 @@ public final class ParallelAtomicReader
}
@Override
- public FieldsEnum iterator() {
- return new ParallelFieldsEnum(this);
+ public Iterator<String> iterator() {
+ return new UnmodifiableIterator<String>(fields.keySet().iterator());
}
@Override
Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java Mon Aug 13 11:16:57 2012
@@ -20,6 +20,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.codecs.TermVectorsWriter;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
@@ -36,9 +37,12 @@ final class TermVectorsConsumerPerField
boolean doVectors;
boolean doVectorPositions;
boolean doVectorOffsets;
+ boolean doVectorPayloads;
int maxNumPostings;
OffsetAttribute offsetAttribute;
+ PayloadAttribute payloadAttribute;
+ boolean hasPayloads; // if enabled, and we actually saw any for this field
public TermVectorsConsumerPerField(TermsHashPerField termsHashPerField, TermVectorsConsumer termsWriter, FieldInfo fieldInfo) {
this.termsHashPerField = termsHashPerField;
@@ -58,13 +62,46 @@ final class TermVectorsConsumerPerField
doVectors = false;
doVectorPositions = false;
doVectorOffsets = false;
+ doVectorPayloads = false;
+ hasPayloads = false;
for(int i=0;i<count;i++) {
IndexableField field = fields[i];
- if (field.fieldType().indexed() && field.fieldType().storeTermVectors()) {
- doVectors = true;
- doVectorPositions |= field.fieldType().storeTermVectorPositions();
- doVectorOffsets |= field.fieldType().storeTermVectorOffsets();
+ if (field.fieldType().indexed()) {
+ if (field.fieldType().storeTermVectors()) {
+ doVectors = true;
+ doVectorPositions |= field.fieldType().storeTermVectorPositions();
+ doVectorOffsets |= field.fieldType().storeTermVectorOffsets();
+ if (doVectorPositions) {
+ doVectorPayloads |= field.fieldType().storeTermVectorPayloads();
+ } else if (field.fieldType().storeTermVectorPayloads()) {
+ // TODO: move this check somewhere else, and impl the other missing ones
+ throw new IllegalArgumentException("cannot index term vector payloads for field: " + field + " without term vector positions");
+ }
+ } else {
+ if (field.fieldType().storeTermVectorOffsets()) {
+ throw new IllegalArgumentException("cannot index term vector offsets when term vectors are not indexed (field=\"" + field.name());
+ }
+ if (field.fieldType().storeTermVectorPositions()) {
+ throw new IllegalArgumentException("cannot index term vector positions when term vectors are not indexed (field=\"" + field.name());
+ }
+ if (field.fieldType().storeTermVectorPayloads()) {
+ throw new IllegalArgumentException("cannot index term vector payloads when term vectors are not indexed (field=\"" + field.name());
+ }
+ }
+ } else {
+ if (field.fieldType().storeTermVectors()) {
+ throw new IllegalArgumentException("cannot index term vectors when field is not indexed (field=\"" + field.name());
+ }
+ if (field.fieldType().storeTermVectorOffsets()) {
+ throw new IllegalArgumentException("cannot index term vector offsets when field is not indexed (field=\"" + field.name());
+ }
+ if (field.fieldType().storeTermVectorPositions()) {
+ throw new IllegalArgumentException("cannot index term vector positions when field is not indexed (field=\"" + field.name());
+ }
+ if (field.fieldType().storeTermVectorPayloads()) {
+ throw new IllegalArgumentException("cannot index term vector payloads when field is not indexed (field=\"" + field.name());
+ }
}
}
@@ -121,7 +158,7 @@ final class TermVectorsConsumerPerField
final int[] termIDs = termsHashPerField.sortPostings(tv.getComparator());
- tv.startField(fieldInfo, numPostings, doVectorPositions, doVectorOffsets);
+ tv.startField(fieldInfo, numPostings, doVectorPositions, doVectorOffsets, hasPayloads);
final ByteSliceReader posReader = doVectorPositions ? termsWriter.vectorSliceReaderPos : null;
final ByteSliceReader offReader = doVectorOffsets ? termsWriter.vectorSliceReaderOff : null;
@@ -165,52 +202,64 @@ final class TermVectorsConsumerPerField
} else {
offsetAttribute = null;
}
+ if (doVectorPayloads && fieldState.attributeSource.hasAttribute(PayloadAttribute.class)) {
+ payloadAttribute = fieldState.attributeSource.getAttribute(PayloadAttribute.class);
+ } else {
+ payloadAttribute = null;
+ }
}
-
- @Override
- void newTerm(final int termID) {
- assert docState.testPoint("TermVectorsTermsWriterPerField.newTerm start");
- TermVectorsPostingsArray postings = (TermVectorsPostingsArray) termsHashPerField.postingsArray;
-
- postings.freqs[termID] = 1;
-
+
+ void writeProx(TermVectorsPostingsArray postings, int termID) {
if (doVectorOffsets) {
int startOffset = fieldState.offset + offsetAttribute.startOffset();
int endOffset = fieldState.offset + offsetAttribute.endOffset();
- termsHashPerField.writeVInt(1, startOffset);
+ termsHashPerField.writeVInt(1, startOffset - postings.lastOffsets[termID]);
termsHashPerField.writeVInt(1, endOffset - startOffset);
postings.lastOffsets[termID] = endOffset;
}
if (doVectorPositions) {
- termsHashPerField.writeVInt(0, fieldState.position);
+ final BytesRef payload;
+ if (payloadAttribute == null) {
+ payload = null;
+ } else {
+ payload = payloadAttribute.getPayload();
+ }
+
+ final int pos = fieldState.position - postings.lastPositions[termID];
+ if (payload != null && payload.length > 0) {
+ termsHashPerField.writeVInt(0, (pos<<1)|1);
+ termsHashPerField.writeVInt(0, payload.length);
+ termsHashPerField.writeBytes(0, payload.bytes, payload.offset, payload.length);
+ hasPayloads = true;
+ } else {
+ termsHashPerField.writeVInt(0, pos<<1);
+ }
postings.lastPositions[termID] = fieldState.position;
}
}
@Override
- void addTerm(final int termID) {
+ void newTerm(final int termID) {
+ assert docState.testPoint("TermVectorsTermsWriterPerField.newTerm start");
+ TermVectorsPostingsArray postings = (TermVectorsPostingsArray) termsHashPerField.postingsArray;
- assert docState.testPoint("TermVectorsTermsWriterPerField.addTerm start");
+ postings.freqs[termID] = 1;
+ postings.lastOffsets[termID] = 0;
+ postings.lastPositions[termID] = 0;
+
+ writeProx(postings, termID);
+ }
+ @Override
+ void addTerm(final int termID) {
+ assert docState.testPoint("TermVectorsTermsWriterPerField.addTerm start");
TermVectorsPostingsArray postings = (TermVectorsPostingsArray) termsHashPerField.postingsArray;
postings.freqs[termID]++;
- if (doVectorOffsets) {
- int startOffset = fieldState.offset + offsetAttribute.startOffset();
- int endOffset = fieldState.offset + offsetAttribute.endOffset();
-
- termsHashPerField.writeVInt(1, startOffset - postings.lastOffsets[termID]);
- termsHashPerField.writeVInt(1, endOffset - startOffset);
- postings.lastOffsets[termID] = endOffset;
- }
-
- if (doVectorPositions) {
- termsHashPerField.writeVInt(0, fieldState.position - postings.lastPositions[termID]);
- postings.lastPositions[termID] = fieldState.position;
- }
+ writeProx(postings, termID);
}
@Override