You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2013/09/19 22:57:11 UTC
svn commit: r1524840 [2/4] - in /lucene/dev/trunk/lucene: ./
codecs/src/java/org/apache/lucene/codecs/blockterms/
codecs/src/java/org/apache/lucene/codecs/bloom/
codecs/src/java/org/apache/lucene/codecs/memory/
codecs/src/java/org/apache/lucene/codecs/...
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java Thu Sep 19 20:57:09 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.codecs.lucene4
*/
import java.io.IOException;
-import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
@@ -511,11 +510,6 @@ class Lucene42DocValuesProducer extends
}
@Override
- public Comparator<BytesRef> getComparator() {
- return BytesRef.getUTF8SortedAsUnicodeComparator();
- }
-
- @Override
public SeekStatus seekCeil(BytesRef text) throws IOException {
if (in.seekCeil(text) == null) {
return SeekStatus.END;
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java Thu Sep 19 20:57:09 2013
@@ -26,7 +26,6 @@ import static org.apache.lucene.codecs.l
import java.io.Closeable; // javadocs
import java.io.IOException;
-import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
@@ -811,11 +810,6 @@ public class Lucene45DocValuesProducer e
public long ord() throws IOException {
return currentOrd;
}
-
- @Override
- public Comparator<BytesRef> getComparator() {
- return BytesRef.getUTF8SortedAsUnicodeComparator();
- }
@Override
public int docFreq() throws IOException {
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java Thu Sep 19 20:57:09 2013
@@ -17,26 +17,29 @@ package org.apache.lucene.codecs.perfiel
* limitations under the License.
*/
-import java.io.Closeable;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.ServiceLoader; // javadocs
+import java.util.Set;
import java.util.TreeMap;
+import java.util.TreeSet;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.TermsConsumer;
import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.Fields;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;
+import static org.apache.lucene.index.FilterAtomicReader.FilterFields;
+
/**
* Enables per field postings support.
* <p>
@@ -65,96 +68,22 @@ public abstract class PerFieldPostingsFo
* segment suffix name for each field. */
public static final String PER_FIELD_SUFFIX_KEY = PerFieldPostingsFormat.class.getSimpleName() + ".suffix";
-
/** Sole constructor. */
public PerFieldPostingsFormat() {
super(PER_FIELD_NAME);
}
- @Override
- public final FieldsConsumer fieldsConsumer(SegmentWriteState state)
- throws IOException {
- return new FieldsWriter(state);
- }
-
- static class FieldsConsumerAndSuffix implements Closeable {
- FieldsConsumer consumer;
+ /** Group of fields written by one PostingsFormat */
+ static class FieldsGroup {
+ final Set<String> fields = new TreeSet<String>();
int suffix;
-
- @Override
- public void close() throws IOException {
- consumer.close();
- }
- }
-
- private class FieldsWriter extends FieldsConsumer {
-
- private final Map<PostingsFormat,FieldsConsumerAndSuffix> formats = new HashMap<PostingsFormat,FieldsConsumerAndSuffix>();
- private final Map<String,Integer> suffixes = new HashMap<String,Integer>();
-
- private final SegmentWriteState segmentWriteState;
- public FieldsWriter(SegmentWriteState state) {
- segmentWriteState = state;
- }
+ /** Custom SegmentWriteState for this group of fields,
+ * with the segmentSuffix uniqueified for this
+ * PostingsFormat */
+ SegmentWriteState state;
+ };
- @Override
- public TermsConsumer addField(FieldInfo field) throws IOException {
- final PostingsFormat format = getPostingsFormatForField(field.name);
- if (format == null) {
- throw new IllegalStateException("invalid null PostingsFormat for field=\"" + field.name + "\"");
- }
- final String formatName = format.getName();
-
- String previousValue = field.putAttribute(PER_FIELD_FORMAT_KEY, formatName);
- assert previousValue == null;
-
- Integer suffix;
-
- FieldsConsumerAndSuffix consumer = formats.get(format);
- if (consumer == null) {
- // First time we are seeing this format; create a new instance
-
- // bump the suffix
- suffix = suffixes.get(formatName);
- if (suffix == null) {
- suffix = 0;
- } else {
- suffix = suffix + 1;
- }
- suffixes.put(formatName, suffix);
-
- final String segmentSuffix = getFullSegmentSuffix(field.name,
- segmentWriteState.segmentSuffix,
- getSuffix(formatName, Integer.toString(suffix)));
- consumer = new FieldsConsumerAndSuffix();
- consumer.consumer = format.fieldsConsumer(new SegmentWriteState(segmentWriteState, segmentSuffix));
- consumer.suffix = suffix;
- formats.put(format, consumer);
- } else {
- // we've already seen this format, so just grab its suffix
- assert suffixes.containsKey(formatName);
- suffix = consumer.suffix;
- }
-
- previousValue = field.putAttribute(PER_FIELD_SUFFIX_KEY, Integer.toString(suffix));
- assert previousValue == null;
-
- // TODO: we should only provide the "slice" of FIS
- // that this PF actually sees ... then stuff like
- // .hasProx could work correctly?
- // NOTE: .hasProx is already broken in the same way for the non-perfield case,
- // if there is a fieldinfo with prox that has no postings, you get a 0 byte file.
- return consumer.consumer.addField(field);
- }
-
- @Override
- public void close() throws IOException {
- // Close all subs
- IOUtils.close(formats.values());
- }
- }
-
static String getSuffix(String formatName, String suffix) {
return formatName + "_" + suffix;
}
@@ -169,6 +98,87 @@ public abstract class PerFieldPostingsFo
throw new IllegalStateException("cannot embed PerFieldPostingsFormat inside itself (field \"" + fieldName + "\" returned PerFieldPostingsFormat)");
}
}
+
+ private class FieldsWriter extends FieldsConsumer {
+ final SegmentWriteState writeState;
+
+ public FieldsWriter(SegmentWriteState writeState) {
+ this.writeState = writeState;
+ }
+
+ @Override
+ public void write(Fields fields) throws IOException {
+
+ // Maps a PostingsFormat instance to the suffix it
+ // should use
+ Map<PostingsFormat,FieldsGroup> formatToGroups = new HashMap<PostingsFormat,FieldsGroup>();
+
+ // Holds last suffix of each PostingFormat name
+ Map<String,Integer> suffixes = new HashMap<String,Integer>();
+
+ // First pass: assign field -> PostingsFormat
+ for(String field : fields) {
+ FieldInfo fieldInfo = writeState.fieldInfos.fieldInfo(field);
+
+ final PostingsFormat format = getPostingsFormatForField(field);
+
+ if (format == null) {
+ throw new IllegalStateException("invalid null PostingsFormat for field=\"" + field + "\"");
+ }
+ String formatName = format.getName();
+
+ FieldsGroup group = formatToGroups.get(format);
+ if (group == null) {
+ // First time we are seeing this format; create a
+ // new instance
+
+ // bump the suffix
+ Integer suffix = suffixes.get(formatName);
+ if (suffix == null) {
+ suffix = 0;
+ } else {
+ suffix = suffix + 1;
+ }
+ suffixes.put(formatName, suffix);
+
+ String segmentSuffix = getFullSegmentSuffix(field,
+ writeState.segmentSuffix,
+ getSuffix(formatName, Integer.toString(suffix)));
+ group = new FieldsGroup();
+ group.state = new SegmentWriteState(writeState, segmentSuffix);
+ group.suffix = suffix;
+ formatToGroups.put(format, group);
+ } else {
+ // we've already seen this format, so just grab its suffix
+ assert suffixes.containsKey(formatName);
+ }
+
+ group.fields.add(field);
+
+ String previousValue = fieldInfo.putAttribute(PER_FIELD_FORMAT_KEY, formatName);
+ assert previousValue == null;
+
+ previousValue = fieldInfo.putAttribute(PER_FIELD_SUFFIX_KEY, Integer.toString(group.suffix));
+ assert previousValue == null;
+ }
+
+ // Second pass: write postings
+ for(Map.Entry<PostingsFormat,FieldsGroup> ent : formatToGroups.entrySet()) {
+ PostingsFormat format = ent.getKey();
+ final FieldsGroup group = ent.getValue();
+
+ // Exposes only the fields from this group:
+ Fields maskedFields = new FilterFields(fields) {
+ @Override
+ public Iterator<String> iterator() {
+ return group.fields.iterator();
+ }
+ };
+
+ format.fieldsConsumer(group.state).write(maskedFields);
+ }
+ }
+ }
private class FieldsReader extends FieldsProducer {
@@ -239,6 +249,12 @@ public abstract class PerFieldPostingsFo
}
@Override
+ public final FieldsConsumer fieldsConsumer(SegmentWriteState state)
+ throws IOException {
+ return new FieldsWriter(state);
+ }
+
+ @Override
public final FieldsProducer fieldsProducer(SegmentReadState state)
throws IOException {
return new FieldsReader(state);
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/AutomatonTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/AutomatonTermsEnum.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/AutomatonTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/AutomatonTermsEnum.java Thu Sep 19 20:57:09 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
-import java.util.Comparator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
@@ -65,7 +64,6 @@ class AutomatonTermsEnum extends Filtere
// of terms where we should simply do sequential reads instead.
private boolean linear = false;
private final BytesRef linearUpperBound = new BytesRef(10);
- private final Comparator<BytesRef> termComp;
/**
* Construct an enumerator based upon an automaton, enumerating the specified
@@ -85,8 +83,6 @@ class AutomatonTermsEnum extends Filtere
// used for path tracking, where each bit is a numbered state.
visited = new long[runAutomaton.getSize()];
-
- termComp = getComparator();
}
/**
@@ -99,10 +95,10 @@ class AutomatonTermsEnum extends Filtere
if (runAutomaton.run(term.bytes, term.offset, term.length))
return linear ? AcceptStatus.YES : AcceptStatus.YES_AND_SEEK;
else
- return (linear && termComp.compare(term, linearUpperBound) < 0) ?
+ return (linear && term.compareTo(linearUpperBound) < 0) ?
AcceptStatus.NO : AcceptStatus.NO_AND_SEEK;
} else {
- return (linear && termComp.compare(term, linearUpperBound) < 0) ?
+ return (linear && term.compareTo(linearUpperBound) < 0) ?
AcceptStatus.NO : AcceptStatus.NO_AND_SEEK;
}
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java Thu Sep 19 20:57:09 2013
@@ -761,8 +761,6 @@ public class CheckIndex {
BytesRef lastTerm = null;
- Comparator<BytesRef> termComp = terms.getComparator();
-
long sumTotalTermFreq = 0;
long sumDocFreq = 0;
FixedBitSet visitedDocs = new FixedBitSet(maxDoc);
@@ -780,7 +778,7 @@ public class CheckIndex {
if (lastTerm == null) {
lastTerm = BytesRef.deepCopyOf(term);
} else {
- if (termComp.compare(lastTerm, term) >= 0) {
+ if (lastTerm.compareTo(term) >= 0) {
throw new RuntimeException("terms out of order: lastTerm=" + lastTerm + " term=" + term);
}
lastTerm.copyBytes(term);
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java Thu Sep 19 20:57:09 2013
@@ -20,7 +20,6 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Comparator;
import java.util.List;
import org.apache.lucene.codecs.PostingsFormat; // javadocs
@@ -611,11 +610,6 @@ public class DocTermOrds {
termsEnum = reader.fields().terms(field).iterator(null);
}
- @Override
- public Comparator<BytesRef> getComparator() {
- return termsEnum.getComparator();
- }
-
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
return termsEnum.docs(liveDocs, reuse, flags);
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java Thu Sep 19 20:57:09 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
-import java.util.Comparator;
import java.util.Iterator;
import org.apache.lucene.search.CachingWrapperFilter;
@@ -98,11 +97,6 @@ public class FilterAtomicReader extends
public TermsEnum iterator(TermsEnum reuse) throws IOException {
return in.iterator(reuse);
}
-
- @Override
- public Comparator<BytesRef> getComparator() {
- return in.getComparator();
- }
@Override
public long size() throws IOException {
@@ -200,11 +194,6 @@ public class FilterAtomicReader extends
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
return in.docsAndPositions(liveDocs, reuse, flags);
}
-
- @Override
- public Comparator<BytesRef> getComparator() {
- return in.getComparator();
- }
}
/** Base class for filtering {@link DocsEnum} implementations. */
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java Thu Sep 19 20:57:09 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
-import java.util.Comparator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.AttributeSource;
@@ -28,7 +27,7 @@ import org.apache.lucene.util.Bits;
* Abstract class for enumerating a subset of all terms.
*
* <p>Term enumerations are always ordered by
- * {@link #getComparator}. Each term in the enumeration is
+ * {@link BytesRef#compareTo}. Each term in the enumeration is
* greater than all that precede it.</p>
* <p><em>Please note:</em> Consumers of this enum cannot
* call {@code seek()}, it is forward only; it throws
@@ -135,11 +134,6 @@ public abstract class FilteredTermsEnum
}
@Override
- public Comparator<BytesRef> getComparator() {
- return tenum.getComparator();
- }
-
- @Override
public int docFreq() throws IOException {
return tenum.docFreq();
}
@@ -221,7 +215,7 @@ public abstract class FilteredTermsEnum
final BytesRef t = nextSeekTerm(actualTerm);
//System.out.println(" seek to t=" + (t == null ? "null" : t.utf8ToString()) + " tenum=" + tenum);
// Make sure we always seek forward:
- assert actualTerm == null || t == null || getComparator().compare(t, actualTerm) > 0: "curTerm=" + actualTerm + " seekTerm=" + t;
+ assert actualTerm == null || t == null || t.compareTo(actualTerm) > 0: "curTerm=" + actualTerm + " seekTerm=" + t;
if (t == null || tenum.seekCeil(t) == SeekStatus.END) {
// no more terms to seek to or enum exhausted
//System.out.println(" return null");
Added: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java?rev=1524840&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java (added)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java Thu Sep 19 20:57:09 2013
@@ -0,0 +1,523 @@
+package org.apache.lucene.index;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.index.FreqProxTermsWriterPerField.FreqProxPostingsArray;
+import org.apache.lucene.util.AttributeSource; // javadocs
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+
+/** Implements limited (iterators only, no stats) {@link
+ * Fields} interface over the in-RAM buffered
+ * fields/terms/postings, to flush postings through the
+ * PostingsFormat. */
+
+class FreqProxFields extends Fields {
+ final Map<String,FreqProxTermsWriterPerField> fields = new LinkedHashMap<String,FreqProxTermsWriterPerField>();
+
+ public FreqProxFields(List<FreqProxTermsWriterPerField> fieldList) {
+ // NOTE: fields are already sorted by field name
+ for(FreqProxTermsWriterPerField field : fieldList) {
+ fields.put(field.fieldInfo.name, field);
+ }
+ }
+
+ public Iterator<String> iterator() {
+ return fields.keySet().iterator();
+ }
+
+ @Override
+ public Terms terms(String field) throws IOException {
+ FreqProxTermsWriterPerField perField = fields.get(field);
+ return perField == null ? null : new FreqProxTerms(perField);
+ }
+
+ @Override
+ public int size() {
+ //return fields.size();
+ throw new UnsupportedOperationException();
+ }
+
+ private static class FreqProxTerms extends Terms {
+ final FreqProxTermsWriterPerField terms;
+
+ public FreqProxTerms(FreqProxTermsWriterPerField terms) {
+ this.terms = terms;
+ }
+
+ @Override
+ public TermsEnum iterator(TermsEnum reuse) {
+ FreqProxTermsEnum termsEnum;
+ if (reuse instanceof FreqProxTermsEnum && ((FreqProxTermsEnum) reuse).terms == this.terms) {
+ termsEnum = (FreqProxTermsEnum) reuse;
+ } else {
+ termsEnum = new FreqProxTermsEnum(terms);
+ }
+ termsEnum.reset();
+ return termsEnum;
+ }
+
+ @Override
+ public long size() {
+ //return terms.termsHashPerField.bytesHash.size();
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long getSumTotalTermFreq() {
+ //return terms.sumTotalTermFreq;
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long getSumDocFreq() {
+ //return terms.sumDocFreq;
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int getDocCount() {
+ //return terms.docCount;
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean hasOffsets() {
+ // NOTE: the in-memory buffer may have indexed offsets
+ // because that's what FieldInfo said when we started,
+ // but during indexing this may have been downgraded:
+ return terms.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+ }
+
+ @Override
+ public boolean hasPositions() {
+ // NOTE: the in-memory buffer may have indexed positions
+ // because that's what FieldInfo said when we started,
+ // but during indexing this may have been downgraded:
+ return terms.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+ }
+
+ @Override
+ public boolean hasPayloads() {
+ return terms.hasPayloads;
+ }
+ }
+
+ private static class FreqProxTermsEnum extends TermsEnum {
+ final FreqProxTermsWriterPerField terms;
+ final int[] sortedTermIDs;
+ final FreqProxPostingsArray postingsArray;
+ final BytesRef scratch = new BytesRef();
+ final int numTerms;
+ int ord;
+
+ public FreqProxTermsEnum(FreqProxTermsWriterPerField terms) {
+ this.terms = terms;
+ this.numTerms = terms.termsHashPerField.bytesHash.size();
+ sortedTermIDs = terms.sortedTermIDs;
+ assert sortedTermIDs != null;
+ postingsArray = (FreqProxPostingsArray) terms.termsHashPerField.postingsArray;
+ }
+
+ public void reset() {
+ ord = -1;
+ }
+
+ public SeekStatus seekCeil(BytesRef text) {
+
+ // TODO: we could instead keep the BytesRefHash
+ // intact so this is a hash lookup
+
+ // binary search:
+ int lo = 0;
+ int hi = numTerms - 1;
+ while (hi >= lo) {
+ int mid = (lo + hi) >>> 1;
+ int textStart = postingsArray.textStarts[sortedTermIDs[mid]];
+ terms.termsHashPerField.bytePool.setBytesRef(scratch, textStart);
+ int cmp = scratch.compareTo(text);
+ if (cmp < 0) {
+ lo = mid + 1;
+ } else if (cmp > 0) {
+ hi = mid - 1;
+ } else {
+ // found:
+ ord = mid;
+ return SeekStatus.FOUND;
+ }
+ }
+
+ // not found:
+ ord = lo + 1;
+ if (ord == numTerms) {
+ return SeekStatus.END;
+ } else {
+ return SeekStatus.NOT_FOUND;
+ }
+ }
+
+ public void seekExact(long ord) {
+ this.ord = (int) ord;
+ int textStart = postingsArray.textStarts[sortedTermIDs[this.ord]];
+ terms.termsHashPerField.bytePool.setBytesRef(scratch, textStart);
+ }
+
+ @Override
+ public BytesRef next() {
+ ord++;
+ if (ord >= numTerms) {
+ return null;
+ } else {
+ int textStart = postingsArray.textStarts[sortedTermIDs[ord]];
+ terms.termsHashPerField.bytePool.setBytesRef(scratch, textStart);
+ return scratch;
+ }
+ }
+
+ @Override
+ public BytesRef term() {
+ return scratch;
+ }
+
+ @Override
+ public long ord() {
+ return ord;
+ }
+
+ @Override
+ public int docFreq() {
+ // We do not store this per-term, and we cannot
+ // implement this at merge time w/o an added pass
+ // through the postings:
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long totalTermFreq() {
+ // We do not store this per-term, and we cannot
+ // implement this at merge time w/o an added pass
+ // through the postings:
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) {
+ if (liveDocs != null) {
+ throw new IllegalArgumentException("liveDocs must be null");
+ }
+
+ FreqProxDocsEnum docsEnum;
+
+ if (!terms.hasFreq && (flags & DocsEnum.FLAG_FREQS) != 0) {
+ // Caller wants freqs but we didn't index them;
+ // don't lie:
+ throw new IllegalArgumentException("did not index freq");
+ }
+
+ if (reuse instanceof FreqProxDocsEnum) {
+ docsEnum = (FreqProxDocsEnum) reuse;
+ if (docsEnum.postingsArray != postingsArray) {
+ docsEnum = new FreqProxDocsEnum(terms, postingsArray);
+ }
+ } else {
+ docsEnum = new FreqProxDocsEnum(terms, postingsArray);
+ }
+ docsEnum.reset(sortedTermIDs[ord]);
+ return docsEnum;
+ }
+
+ @Override
+ public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) {
+ if (liveDocs != null) {
+ throw new IllegalArgumentException("liveDocs must be null");
+ }
+ FreqProxDocsAndPositionsEnum posEnum;
+
+ if (!terms.hasProx) {
+ // Caller wants positions but we didn't index them;
+ // don't lie:
+ throw new IllegalArgumentException("did not index positions");
+ }
+
+ if (!terms.hasOffsets && (flags & DocsAndPositionsEnum.FLAG_OFFSETS) != 0) {
+ // Caller wants offsets but we didn't index them;
+ // don't lie:
+ throw new IllegalArgumentException("did not index offsets");
+ }
+
+ if (reuse instanceof FreqProxDocsAndPositionsEnum) {
+ posEnum = (FreqProxDocsAndPositionsEnum) reuse;
+ if (posEnum.postingsArray != postingsArray) {
+ posEnum = new FreqProxDocsAndPositionsEnum(terms, postingsArray);
+ }
+ } else {
+ posEnum = new FreqProxDocsAndPositionsEnum(terms, postingsArray);
+ }
+ posEnum.reset(sortedTermIDs[ord]);
+ return posEnum;
+ }
+
+ /**
+ * Expert: Returns the TermsEnums internal state to position the TermsEnum
+ * without re-seeking the term dictionary.
+ * <p>
+ * NOTE: A seek by {@link TermState} might not capture the
+ * {@link AttributeSource}'s state. Callers must maintain the
+ * {@link AttributeSource} states separately
+ *
+ * @see TermState
+ * @see #seekExact(BytesRef, TermState)
+ */
+ public TermState termState() throws IOException {
+ return new TermState() {
+ @Override
+ public void copyFrom(TermState other) {
+ throw new UnsupportedOperationException();
+ }
+ };
+ }
+ }
+
+ private static class FreqProxDocsEnum extends DocsEnum {
+
+ final FreqProxTermsWriterPerField terms;
+ final FreqProxPostingsArray postingsArray;
+ final ByteSliceReader reader = new ByteSliceReader();
+ final boolean readTermFreq;
+ int docID;
+ int freq;
+ boolean ended;
+ int termID;
+
+ public FreqProxDocsEnum(FreqProxTermsWriterPerField terms, FreqProxPostingsArray postingsArray) {
+ this.terms = terms;
+ this.postingsArray = postingsArray;
+ this.readTermFreq = terms.hasFreq;
+ }
+
+ public void reset(int termID) {
+ this.termID = termID;
+ terms.termsHashPerField.initReader(reader, termID, 0);
+ ended = false;
+ docID = 0;
+ }
+
+ @Override
+ public int docID() {
+ return docID;
+ }
+
+ @Override
+ public int freq() {
+ // Don't lie here ... don't want codecs writings lots
+ // of wasted 1s into the index:
+ if (!readTermFreq) {
+ throw new IllegalStateException("freq was not indexed");
+ } else {
+ return freq;
+ }
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ if (reader.eof()) {
+ if (ended) {
+ return NO_MORE_DOCS;
+ } else {
+ ended = true;
+ docID = postingsArray.lastDocIDs[termID];
+ if (readTermFreq) {
+ freq = postingsArray.termFreqs[termID];
+ }
+ }
+ } else {
+ int code = reader.readVInt();
+ if (!readTermFreq) {
+ docID += code;
+ } else {
+ docID += code >>> 1;
+ if ((code & 1) != 0) {
+ freq = 1;
+ } else {
+ freq = reader.readVInt();
+ }
+ }
+
+ assert docID != postingsArray.lastDocIDs[termID];
+ }
+
+ return docID;
+ }
+
+ @Override
+ public int advance(int target) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long cost() {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ private static class FreqProxDocsAndPositionsEnum extends DocsAndPositionsEnum {
+
+ final FreqProxTermsWriterPerField terms;
+ final FreqProxPostingsArray postingsArray;
+ final ByteSliceReader reader = new ByteSliceReader();
+ final ByteSliceReader posReader = new ByteSliceReader();
+ final boolean readOffsets;
+ int docID;
+ int freq;
+ int pos;
+ int startOffset;
+ int endOffset;
+ int posLeft;
+ int termID;
+ boolean ended;
+ boolean hasPayload;
+ BytesRef payload = new BytesRef();
+
+ public FreqProxDocsAndPositionsEnum(FreqProxTermsWriterPerField terms, FreqProxPostingsArray postingsArray) {
+ this.terms = terms;
+ this.postingsArray = postingsArray;
+ this.readOffsets = terms.hasOffsets;
+ assert terms.hasProx;
+ assert terms.hasFreq;
+ }
+
+ public void reset(int termID) {
+ this.termID = termID;
+ terms.termsHashPerField.initReader(reader, termID, 0);
+ terms.termsHashPerField.initReader(posReader, termID, 1);
+ ended = false;
+ docID = 0;
+ posLeft = 0;
+ }
+
+ @Override
+ public int docID() {
+ return docID;
+ }
+
+ @Override
+ public int freq() {
+ return freq;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ while (posLeft != 0) {
+ nextPosition();
+ }
+
+ if (reader.eof()) {
+ if (ended) {
+ return NO_MORE_DOCS;
+ } else {
+ ended = true;
+ docID = postingsArray.lastDocIDs[termID];
+ freq = postingsArray.termFreqs[termID];
+ }
+ } else {
+ int code = reader.readVInt();
+ docID += code >>> 1;
+ if ((code & 1) != 0) {
+ freq = 1;
+ } else {
+ freq = reader.readVInt();
+ }
+
+ assert docID != postingsArray.lastDocIDs[termID];
+ }
+
+ posLeft = freq;
+ pos = 0;
+ startOffset = 0;
+ return docID;
+ }
+
+ @Override
+ public int advance(int target) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long cost() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int nextPosition() throws IOException {
+ assert posLeft > 0;
+ posLeft--;
+ int code = posReader.readVInt();
+ pos += code >>> 1;
+ if ((code & 1) != 0) {
+ hasPayload = true;
+ // has a payload
+ payload.length = posReader.readVInt();
+ if (payload.bytes.length < payload.length) {
+ payload.grow(payload.length);
+ }
+ posReader.readBytes(payload.bytes, 0, payload.length);
+ } else {
+ hasPayload = false;
+ }
+
+ if (readOffsets) {
+ startOffset += posReader.readVInt();
+ endOffset = startOffset + posReader.readVInt();
+ }
+
+ return pos;
+ }
+
+ @Override
+ public int startOffset() {
+ if (!readOffsets) {
+ throw new IllegalStateException("offsets were not indexed");
+ }
+ return startOffset;
+ }
+
+ @Override
+ public int endOffset() {
+ if (!readOffsets) {
+ throw new IllegalStateException("offsets were not indexed");
+ }
+ return endOffset;
+ }
+
+ @Override
+ public BytesRef getPayload() {
+ if (hasPayload) {
+ return payload;
+ } else {
+ return null;
+ }
+ }
+ }
+}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java Thu Sep 19 20:57:09 2013
@@ -19,19 +19,62 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
import java.util.Map;
-import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CollectionUtil;
-import org.apache.lucene.util.IOUtils;
final class FreqProxTermsWriter extends TermsHashConsumer {
@Override
void abort() {}
+ private void applyDeletes(SegmentWriteState state, Fields fields) throws IOException {
+ // Process any pending Term deletes for this newly
+ // flushed segment:
+ if (state.segDeletes != null && state.segDeletes.terms.size() > 0) {
+ Map<Term,Integer> segDeletes = state.segDeletes.terms;
+ List<Term> deleteTerms = new ArrayList<Term>(segDeletes.keySet());
+ Collections.sort(deleteTerms);
+ String lastField = null;
+ TermsEnum termsEnum = null;
+ DocsEnum docsEnum = null;
+ for(Term deleteTerm : deleteTerms) {
+ if (deleteTerm.field().equals(lastField) == false) {
+ lastField = deleteTerm.field();
+ Terms terms = fields.terms(lastField);
+ if (terms != null) {
+ termsEnum = terms.iterator(termsEnum);
+ }
+ }
+
+ if (termsEnum != null && termsEnum.seekExact(deleteTerm.bytes())) {
+ docsEnum = termsEnum.docs(null, docsEnum, 0);
+ int delDocLimit = segDeletes.get(deleteTerm);
+ while (true) {
+ int doc = docsEnum.nextDoc();
+ if (doc == DocsEnum.NO_MORE_DOCS) {
+ break;
+ }
+ if (doc < delDocLimit) {
+ if (state.liveDocs == null) {
+ state.liveDocs = state.segmentInfo.getCodec().liveDocsFormat().newLiveDocs(state.segmentInfo.getDocCount());
+ }
+ if (state.liveDocs.get(doc)) {
+ state.delCountOnFlush++;
+ state.liveDocs.clear(doc);
+ }
+ } else {
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+
// TODO: would be nice to factor out more of this, eg the
// FreqProxFieldMergeState, and code to visit all Fields
// under the same FieldInfo together, up into TermsHash*.
@@ -47,63 +90,20 @@ final class FreqProxTermsWriter extends
for (TermsHashConsumerPerField f : fieldsToFlush.values()) {
final FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField) f;
if (perField.termsHashPerField.bytesHash.size() > 0) {
+ perField.sortPostings();
+ assert perField.fieldInfo.isIndexed();
allFields.add(perField);
}
}
- final int numAllFields = allFields.size();
-
// Sort by field name
CollectionUtil.introSort(allFields);
- final FieldsConsumer consumer = state.segmentInfo.getCodec().postingsFormat().fieldsConsumer(state);
+ Fields fields = new FreqProxFields(allFields);
- boolean success = false;
+ applyDeletes(state, fields);
- try {
- TermsHash termsHash = null;
-
- /*
- Current writer chain:
- FieldsConsumer
- -> IMPL: FormatPostingsTermsDictWriter
- -> TermsConsumer
- -> IMPL: FormatPostingsTermsDictWriter.TermsWriter
- -> DocsConsumer
- -> IMPL: FormatPostingsDocsWriter
- -> PositionsConsumer
- -> IMPL: FormatPostingsPositionsWriter
- */
-
- for (int fieldNumber = 0; fieldNumber < numAllFields; fieldNumber++) {
- final FieldInfo fieldInfo = allFields.get(fieldNumber).fieldInfo;
-
- final FreqProxTermsWriterPerField fieldWriter = allFields.get(fieldNumber);
-
- // If this field has postings then add them to the
- // segment
- fieldWriter.flush(fieldInfo.name, consumer, state);
-
- TermsHashPerField perField = fieldWriter.termsHashPerField;
- assert termsHash == null || termsHash == perField.termsHash;
- termsHash = perField.termsHash;
- int numPostings = perField.bytesHash.size();
- perField.reset();
- perField.shrinkHash(numPostings);
- fieldWriter.reset();
- }
-
- if (termsHash != null) {
- termsHash.reset();
- }
- success = true;
- } finally {
- if (success) {
- IOUtils.close(consumer);
- } else {
- IOUtils.closeWhileHandlingException(consumer);
- }
- }
+ state.segmentInfo.getCodec().postingsFormat().fieldsConsumer(state).write(fields);
}
BytesRef payload;
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java Thu Sep 19 20:57:09 2013
@@ -17,19 +17,10 @@ package org.apache.lucene.index;
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.Comparator;
-import java.util.Map;
-
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.codecs.FieldsConsumer;
-import org.apache.lucene.codecs.PostingsConsumer;
-import org.apache.lucene.codecs.TermStats;
-import org.apache.lucene.codecs.TermsConsumer;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.RamUsageEstimator;
// TODO: break into separate freq and prox writers as
@@ -42,11 +33,16 @@ final class FreqProxTermsWriterPerField
final FieldInfo fieldInfo;
final DocumentsWriterPerThread.DocState docState;
final FieldInvertState fieldState;
- private boolean hasFreq;
- private boolean hasProx;
- private boolean hasOffsets;
+ boolean hasFreq;
+ boolean hasProx;
+ boolean hasOffsets;
PayloadAttribute payloadAttribute;
OffsetAttribute offsetAttribute;
+ long sumTotalTermFreq;
+ long sumDocFreq;
+
+ // How many docs have this field:
+ int docCount;
public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriter parent, FieldInfo fieldInfo) {
this.termsHashPerField = termsHashPerField;
@@ -68,6 +64,12 @@ final class FreqProxTermsWriterPerField
@Override
void finish() {
+ sumDocFreq += fieldState.uniqueTermCount;
+ sumTotalTermFreq += fieldState.length;
+ if (fieldState.length > 0) {
+ docCount++;
+ }
+
if (hasPayloads) {
fieldInfo.setStorePayloads();
}
@@ -83,14 +85,6 @@ final class FreqProxTermsWriterPerField
return fieldInfo.name.compareTo(other.fieldInfo.name);
}
- // Called after flush
- void reset() {
- // Record, up front, whether our in-RAM format will be
- // with or without term freqs:
- setIndexOptions(fieldInfo.getIndexOptions());
- payloadAttribute = null;
- }
-
private void setIndexOptions(IndexOptions indexOptions) {
if (indexOptions == null) {
// field could later be updated with indexed=true, so set everything on
@@ -318,233 +312,10 @@ final class FreqProxTermsWriterPerField
BytesRef payload;
- /* Walk through all unique text tokens (Posting
- * instances) found in this field and serialize them
- * into a single RAM segment. */
- void flush(String fieldName, FieldsConsumer consumer, final SegmentWriteState state)
- throws IOException {
-
- if (!fieldInfo.isIndexed()) {
- return; // nothing to flush, don't bother the codec with the unindexed field
- }
-
- final TermsConsumer termsConsumer = consumer.addField(fieldInfo);
- final Comparator<BytesRef> termComp = termsConsumer.getComparator();
-
- // CONFUSING: this.indexOptions holds the index options
- // that were current when we first saw this field. But
- // it's possible this has changed, eg when other
- // documents are indexed that cause a "downgrade" of the
- // IndexOptions. So we must decode the in-RAM buffer
- // according to this.indexOptions, but then write the
- // new segment to the directory according to
- // currentFieldIndexOptions:
- final IndexOptions currentFieldIndexOptions = fieldInfo.getIndexOptions();
- assert currentFieldIndexOptions != null;
-
- final boolean writeTermFreq = currentFieldIndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
- final boolean writePositions = currentFieldIndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
- final boolean writeOffsets = currentFieldIndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
-
- final boolean readTermFreq = this.hasFreq;
- final boolean readPositions = this.hasProx;
- final boolean readOffsets = this.hasOffsets;
-
- //System.out.println("flush readTF=" + readTermFreq + " readPos=" + readPositions + " readOffs=" + readOffsets);
-
- // Make sure FieldInfo.update is working correctly!:
- assert !writeTermFreq || readTermFreq;
- assert !writePositions || readPositions;
- assert !writeOffsets || readOffsets;
-
- assert !writeOffsets || writePositions;
-
- final Map<Term,Integer> segDeletes;
- if (state.segDeletes != null && state.segDeletes.terms.size() > 0) {
- segDeletes = state.segDeletes.terms;
- } else {
- segDeletes = null;
- }
-
- final int[] termIDs = termsHashPerField.sortPostings(termComp);
- final int numTerms = termsHashPerField.bytesHash.size();
- final BytesRef text = new BytesRef();
- final FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
- final ByteSliceReader freq = new ByteSliceReader();
- final ByteSliceReader prox = new ByteSliceReader();
-
- FixedBitSet visitedDocs = new FixedBitSet(state.segmentInfo.getDocCount());
- long sumTotalTermFreq = 0;
- long sumDocFreq = 0;
-
- Term protoTerm = new Term(fieldName);
- for (int i = 0; i < numTerms; i++) {
- final int termID = termIDs[i];
- //System.out.println("term=" + termID);
- // Get BytesRef
- final int textStart = postings.textStarts[termID];
- termsHashPerField.bytePool.setBytesRef(text, textStart);
-
- termsHashPerField.initReader(freq, termID, 0);
- if (readPositions || readOffsets) {
- termsHashPerField.initReader(prox, termID, 1);
- }
-
- // TODO: really TermsHashPerField should take over most
- // of this loop, including merge sort of terms from
- // multiple threads and interacting with the
- // TermsConsumer, only calling out to us (passing us the
- // DocsConsumer) to handle delivery of docs/positions
-
- final PostingsConsumer postingsConsumer = termsConsumer.startTerm(text);
-
- final int delDocLimit;
- if (segDeletes != null) {
- protoTerm.bytes = text;
- final Integer docIDUpto = segDeletes.get(protoTerm);
- if (docIDUpto != null) {
- delDocLimit = docIDUpto;
- } else {
- delDocLimit = 0;
- }
- } else {
- delDocLimit = 0;
- }
-
- // Now termStates has numToMerge FieldMergeStates
- // which all share the same term. Now we must
- // interleave the docID streams.
- int docFreq = 0;
- long totalTermFreq = 0;
- int docID = 0;
-
- while(true) {
- //System.out.println(" cycle");
- final int termFreq;
- if (freq.eof()) {
- if (postings.lastDocCodes[termID] != -1) {
- // Return last doc
- docID = postings.lastDocIDs[termID];
- if (readTermFreq) {
- termFreq = postings.termFreqs[termID];
- } else {
- termFreq = -1;
- }
- postings.lastDocCodes[termID] = -1;
- } else {
- // EOF
- break;
- }
- } else {
- final int code = freq.readVInt();
- if (!readTermFreq) {
- docID += code;
- termFreq = -1;
- } else {
- docID += code >>> 1;
- if ((code & 1) != 0) {
- termFreq = 1;
- } else {
- termFreq = freq.readVInt();
- }
- }
-
- assert docID != postings.lastDocIDs[termID];
- }
-
- docFreq++;
- assert docID < state.segmentInfo.getDocCount(): "doc=" + docID + " maxDoc=" + state.segmentInfo.getDocCount();
-
- // NOTE: we could check here if the docID was
- // deleted, and skip it. However, this is somewhat
- // dangerous because it can yield non-deterministic
- // behavior since we may see the docID before we see
- // the term that caused it to be deleted. This
- // would mean some (but not all) of its postings may
- // make it into the index, which'd alter the docFreq
- // for those terms. We could fix this by doing two
- // passes, ie first sweep marks all del docs, and
- // 2nd sweep does the real flush, but I suspect
- // that'd add too much time to flush.
- visitedDocs.set(docID);
- postingsConsumer.startDoc(docID, writeTermFreq ? termFreq : -1);
- if (docID < delDocLimit) {
- // Mark it deleted. TODO: we could also skip
- // writing its postings; this would be
- // deterministic (just for this Term's docs).
-
- // TODO: can we do this reach-around in a cleaner way????
- if (state.liveDocs == null) {
- state.liveDocs = docState.docWriter.codec.liveDocsFormat().newLiveDocs(state.segmentInfo.getDocCount());
- }
- if (state.liveDocs.get(docID)) {
- state.delCountOnFlush++;
- state.liveDocs.clear(docID);
- }
- }
-
- totalTermFreq += termFreq;
-
- // Carefully copy over the prox + payload info,
- // changing the format to match Lucene's segment
- // format.
-
- if (readPositions || readOffsets) {
- // we did record positions (& maybe payload) and/or offsets
- int position = 0;
- int offset = 0;
- for(int j=0;j<termFreq;j++) {
- final BytesRef thisPayload;
-
- if (readPositions) {
- final int code = prox.readVInt();
- position += code >>> 1;
-
- if ((code & 1) != 0) {
-
- // This position has a payload
- final int payloadLength = prox.readVInt();
-
- if (payload == null) {
- payload = new BytesRef();
- payload.bytes = new byte[payloadLength];
- } else if (payload.bytes.length < payloadLength) {
- payload.grow(payloadLength);
- }
-
- prox.readBytes(payload.bytes, 0, payloadLength);
- payload.length = payloadLength;
- thisPayload = payload;
-
- } else {
- thisPayload = null;
- }
-
- if (readOffsets) {
- final int startOffset = offset + prox.readVInt();
- final int endOffset = startOffset + prox.readVInt();
- if (writePositions) {
- if (writeOffsets) {
- assert startOffset >=0 && endOffset >= startOffset : "startOffset=" + startOffset + ",endOffset=" + endOffset + ",offset=" + offset;
- postingsConsumer.addPosition(position, thisPayload, startOffset, endOffset);
- } else {
- postingsConsumer.addPosition(position, thisPayload, -1, -1);
- }
- }
- offset = startOffset;
- } else if (writePositions) {
- postingsConsumer.addPosition(position, thisPayload, -1, -1);
- }
- }
- }
- }
- postingsConsumer.finishDoc();
- }
- termsConsumer.finishTerm(text, new TermStats(docFreq, writeTermFreq ? totalTermFreq : -1));
- sumTotalTermFreq += totalTermFreq;
- sumDocFreq += docFreq;
- }
+ int[] sortedTermIDs;
- termsConsumer.finish(writeTermFreq ? sumTotalTermFreq : -1, sumDocFreq, visitedDocs.cardinality());
+ void sortPostings() {
+ assert sortedTermIDs == null;
+ sortedTermIDs = termsHashPerField.sortPostings();
}
}
Added: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MappedMultiFields.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MappedMultiFields.java?rev=1524840&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MappedMultiFields.java (added)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MappedMultiFields.java Thu Sep 19 20:57:09 2013
@@ -0,0 +1,136 @@
+package org.apache.lucene.index;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.util.Bits;
+
+import static org.apache.lucene.index.FilterAtomicReader.FilterFields;
+import static org.apache.lucene.index.FilterAtomicReader.FilterTerms;
+import static org.apache.lucene.index.FilterAtomicReader.FilterTermsEnum;
+
+/** A {@link Fields} implementation that merges multiple
+ * Fields into one, and maps around deleted documents.
+ * This is used for merging. */
+
+class MappedMultiFields extends FilterFields {
+ final MergeState mergeState;
+
+ public MappedMultiFields(MergeState mergeState, MultiFields multiFields) {
+ super(multiFields);
+ this.mergeState = mergeState;
+ }
+
+ @Override
+ public Terms terms(String field) throws IOException {
+ MultiTerms terms = (MultiTerms) in.terms(field);
+ if (terms == null) {
+ return null;
+ } else {
+ return new MappedMultiTerms(mergeState, terms);
+ }
+ }
+
+ private static class MappedMultiTerms extends FilterTerms {
+ final MergeState mergeState;
+
+ public MappedMultiTerms(MergeState mergeState, MultiTerms multiTerms) {
+ super(multiTerms);
+ this.mergeState = mergeState;
+ }
+
+ @Override
+ public TermsEnum iterator(TermsEnum reuse) throws IOException {
+ return new MappedMultiTermsEnum(mergeState, (MultiTermsEnum) in.iterator(reuse));
+ }
+
+ @Override
+ public long size() throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long getSumTotalTermFreq() throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long getSumDocFreq() throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int getDocCount() throws IOException {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ private static class MappedMultiTermsEnum extends FilterTermsEnum {
+ final MergeState mergeState;
+
+ public MappedMultiTermsEnum(MergeState mergeState, MultiTermsEnum multiTermsEnum) {
+ super(multiTermsEnum);
+ this.mergeState = mergeState;
+ }
+
+ @Override
+ public int docFreq() throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long totalTermFreq() throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
+ if (liveDocs != null) {
+ throw new IllegalArgumentException("liveDocs must be null");
+ }
+ MappingMultiDocsEnum mappingDocsEnum;
+ if (reuse instanceof MappingMultiDocsEnum) {
+ mappingDocsEnum = (MappingMultiDocsEnum) reuse;
+ } else {
+ mappingDocsEnum = new MappingMultiDocsEnum(mergeState);
+ }
+
+ MultiDocsEnum docsEnum = (MultiDocsEnum) in.docs(liveDocs, mappingDocsEnum.multiDocsEnum, flags);
+ mappingDocsEnum.reset(docsEnum);
+ return mappingDocsEnum;
+ }
+
+ @Override
+ public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
+ if (liveDocs != null) {
+ throw new IllegalArgumentException("liveDocs must be null");
+ }
+ MappingMultiDocsAndPositionsEnum mappingDocsAndPositionsEnum;
+ if (reuse instanceof MappingMultiDocsAndPositionsEnum) {
+ mappingDocsAndPositionsEnum = (MappingMultiDocsAndPositionsEnum) reuse;
+ } else {
+ mappingDocsAndPositionsEnum = new MappingMultiDocsAndPositionsEnum(mergeState);
+ }
+
+ MultiDocsAndPositionsEnum docsAndPositionsEnum = (MultiDocsAndPositionsEnum) in.docsAndPositions(liveDocs, mappingDocsAndPositionsEnum.multiDocsAndPositionsEnum, flags);
+ mappingDocsAndPositionsEnum.reset(docsAndPositionsEnum);
+ return mappingDocsAndPositionsEnum;
+ }
+ }
+}
Copied: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MappingMultiDocsAndPositionsEnum.java (from r1522894, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MappingMultiDocsAndPositionsEnum.java?p2=lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MappingMultiDocsAndPositionsEnum.java&p1=lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java&r1=1522894&r2=1524840&rev=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MappingMultiDocsAndPositionsEnum.java Thu Sep 19 20:57:09 2013
@@ -1,4 +1,4 @@
-package org.apache.lucene.codecs;
+package org.apache.lucene.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -18,9 +18,6 @@ package org.apache.lucene.codecs;
*/
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.index.MergeState;
-import org.apache.lucene.index.MultiDocsAndPositionsEnum;
import org.apache.lucene.index.MultiDocsAndPositionsEnum.EnumWithSlice;
import java.io.IOException;
@@ -32,7 +29,7 @@ import java.io.IOException;
* @lucene.experimental
*/
-public final class MappingMultiDocsAndPositionsEnum extends DocsAndPositionsEnum {
+final class MappingMultiDocsAndPositionsEnum extends DocsAndPositionsEnum {
private MultiDocsAndPositionsEnum.EnumWithSlice[] subs;
int numSubs;
int upto;
@@ -41,9 +38,11 @@ public final class MappingMultiDocsAndPo
int currentBase;
int doc = -1;
private MergeState mergeState;
+ MultiDocsAndPositionsEnum multiDocsAndPositionsEnum;
/** Sole constructor. */
- public MappingMultiDocsAndPositionsEnum() {
+ public MappingMultiDocsAndPositionsEnum(MergeState mergeState) {
+ this.mergeState = mergeState;
}
MappingMultiDocsAndPositionsEnum reset(MultiDocsAndPositionsEnum postingsEnum) {
@@ -51,15 +50,10 @@ public final class MappingMultiDocsAndPo
this.subs = postingsEnum.getSubs();
upto = -1;
current = null;
+ this.multiDocsAndPositionsEnum = postingsEnum;
return this;
}
- /** Sets the {@link MergeState}, which is used to re-map
- * document IDs. */
- public void setMergeState(MergeState mergeState) {
- this.mergeState = mergeState;
- }
-
/** How many sub-readers we are merging.
* @see #getSubs */
public int getNumSubs() {
@@ -103,6 +97,13 @@ public final class MappingMultiDocsAndPo
int doc = current.nextDoc();
if (doc != NO_MORE_DOCS) {
+
+ mergeState.checkAbortCount++;
+ if (mergeState.checkAbortCount > 60000) {
+ mergeState.checkAbort.work(mergeState.checkAbortCount/5.0);
+ mergeState.checkAbortCount = 0;
+ }
+
// compact deletions
doc = currentMap.get(doc);
if (doc == -1) {
Copied: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MappingMultiDocsEnum.java (from r1522894, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsEnum.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MappingMultiDocsEnum.java?p2=lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MappingMultiDocsEnum.java&p1=lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsEnum.java&r1=1522894&r2=1524840&rev=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsEnum.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MappingMultiDocsEnum.java Thu Sep 19 20:57:09 2013
@@ -1,4 +1,4 @@
-package org.apache.lucene.codecs;
+package org.apache.lucene.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -17,9 +17,6 @@ package org.apache.lucene.codecs;
* limitations under the License.
*/
-import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.index.MergeState;
-import org.apache.lucene.index.MultiDocsEnum;
import org.apache.lucene.index.MultiDocsEnum.EnumWithSlice;
import java.io.IOException;
@@ -31,7 +28,7 @@ import java.io.IOException;
* @lucene.experimental
*/
-public final class MappingMultiDocsEnum extends DocsEnum {
+final class MappingMultiDocsEnum extends DocsEnum {
private MultiDocsEnum.EnumWithSlice[] subs;
int numSubs;
int upto;
@@ -39,26 +36,23 @@ public final class MappingMultiDocsEnum
DocsEnum current;
int currentBase;
int doc = -1;
- private MergeState mergeState;
+ private final MergeState mergeState;
+ MultiDocsEnum multiDocsEnum;
/** Sole constructor. */
- public MappingMultiDocsEnum() {
+ public MappingMultiDocsEnum(MergeState mergeState) {
+ this.mergeState = mergeState;
}
MappingMultiDocsEnum reset(MultiDocsEnum docsEnum) {
this.numSubs = docsEnum.getNumSubs();
this.subs = docsEnum.getSubs();
+ this.multiDocsEnum = docsEnum;
upto = -1;
current = null;
return this;
}
- /** Sets the {@link MergeState}, which is used to re-map
- * document IDs. */
- public void setMergeState(MergeState mergeState) {
- this.mergeState = mergeState;
- }
-
/** How many sub-readers we are merging.
* @see #getSubs */
public int getNumSubs() {
@@ -103,6 +97,13 @@ public final class MappingMultiDocsEnum
int doc = current.nextDoc();
if (doc != NO_MORE_DOCS) {
+
+ mergeState.checkAbortCount++;
+ if (mergeState.checkAbortCount > 60000) {
+ mergeState.checkAbort.work(mergeState.checkAbortCount/5.0);
+ mergeState.checkAbortCount = 0;
+ }
+
// compact deletions
doc = currentMap.get(doc);
if (doc == -1) {
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MergeState.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MergeState.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MergeState.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MergeState.java Thu Sep 19 20:57:09 2013
@@ -151,6 +151,10 @@ public class MergeState {
/** InfoStream for debugging messages. */
public final InfoStream infoStream;
+ /** Counter used for periodic calls to checkAbort
+ * @lucene.internal */
+ public int checkAbortCount;
+
// TODO: get rid of this? it tells you which segments are 'aligned' (e.g. for bulk merging)
// but is this really so expensive to compute again in different components, versus once in SM?
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java Thu Sep 19 20:57:09 2013
@@ -19,7 +19,6 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
-import java.util.Comparator;
import java.util.List;
import org.apache.lucene.util.BytesRef;
@@ -36,7 +35,6 @@ import org.apache.lucene.util.automaton.
public final class MultiTerms extends Terms {
private final Terms[] subs;
private final ReaderSlice[] subSlices;
- private final Comparator<BytesRef> termComp;
private final boolean hasOffsets;
private final boolean hasPositions;
private final boolean hasPayloads;
@@ -51,28 +49,16 @@ public final class MultiTerms extends Te
this.subs = subs;
this.subSlices = subSlices;
- Comparator<BytesRef> _termComp = null;
assert subs.length > 0 : "inefficient: don't use MultiTerms over one sub";
boolean _hasOffsets = true;
boolean _hasPositions = true;
boolean _hasPayloads = false;
for(int i=0;i<subs.length;i++) {
- if (_termComp == null) {
- _termComp = subs[i].getComparator();
- } else {
- // We cannot merge sub-readers that have
- // different TermComps
- final Comparator<BytesRef> subTermComp = subs[i].getComparator();
- if (subTermComp != null && !subTermComp.equals(_termComp)) {
- throw new IllegalStateException("sub-readers have different BytesRef.Comparators; cannot merge");
- }
- }
_hasOffsets &= subs[i].hasOffsets();
_hasPositions &= subs[i].hasPositions();
_hasPayloads |= subs[i].hasPayloads();
}
- termComp = _termComp;
hasOffsets = _hasOffsets;
hasPositions = _hasPositions;
hasPayloads = hasPositions && _hasPayloads; // if all subs have pos, and at least one has payloads.
@@ -158,11 +144,6 @@ public final class MultiTerms extends Te
}
@Override
- public Comparator<BytesRef> getComparator() {
- return termComp;
- }
-
- @Override
public boolean hasOffsets() {
return hasOffsets;
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java Thu Sep 19 20:57:09 2013
@@ -23,7 +23,6 @@ import org.apache.lucene.util.Bits;
import java.io.IOException;
import java.util.Arrays;
-import java.util.Comparator;
/**
* Exposes {@link TermsEnum} API, merged from {@link TermsEnum} API of sub-segments.
@@ -47,7 +46,6 @@ public final class MultiTermsEnum extend
private int numTop;
private int numSubs;
private BytesRef current;
- private Comparator<BytesRef> termComp;
static class TermsEnumIndex {
public final static TermsEnumIndex[] EMPTY_ARRAY = new TermsEnumIndex[0];
@@ -95,36 +93,18 @@ public final class MultiTermsEnum extend
return current;
}
- @Override
- public Comparator<BytesRef> getComparator() {
- return termComp;
- }
-
/** The terms array must be newly created TermsEnum, ie
* {@link TermsEnum#next} has not yet been called. */
public TermsEnum reset(TermsEnumIndex[] termsEnumsIndex) throws IOException {
assert termsEnumsIndex.length <= top.length;
numSubs = 0;
numTop = 0;
- termComp = null;
queue.clear();
for(int i=0;i<termsEnumsIndex.length;i++) {
final TermsEnumIndex termsEnumIndex = termsEnumsIndex[i];
assert termsEnumIndex != null;
- // init our term comp
- if (termComp == null) {
- queue.termComp = termComp = termsEnumIndex.termsEnum.getComparator();
- } else {
- // We cannot merge sub-readers that have
- // different TermComps
- final Comparator<BytesRef> subTermComp = termsEnumIndex.termsEnum.getComparator();
- if (subTermComp != null && !subTermComp.equals(termComp)) {
- throw new IllegalStateException("sub-readers have different BytesRef.Comparators: " + subTermComp + " vs " + termComp + "; cannot merge");
- }
- }
-
final BytesRef term = termsEnumIndex.termsEnum.next();
if (term != null) {
final TermsEnumWithSlice entry = subs[termsEnumIndex.subIndex];
@@ -149,7 +129,7 @@ public final class MultiTermsEnum extend
numTop = 0;
boolean seekOpt = false;
- if (lastSeek != null && termComp.compare(lastSeek, term) <= 0) {
+ if (lastSeek != null && lastSeek.compareTo(term) <= 0) {
seekOpt = true;
}
@@ -167,7 +147,7 @@ public final class MultiTermsEnum extend
if (seekOpt) {
final BytesRef curTerm = currentSubs[i].current;
if (curTerm != null) {
- final int cmp = termComp.compare(term, curTerm);
+ final int cmp = term.compareTo(curTerm);
if (cmp == 0) {
status = true;
} else if (cmp < 0) {
@@ -201,7 +181,7 @@ public final class MultiTermsEnum extend
lastSeekExact = false;
boolean seekOpt = false;
- if (lastSeek != null && termComp.compare(lastSeek, term) <= 0) {
+ if (lastSeek != null && lastSeek.compareTo(term) <= 0) {
seekOpt = true;
}
@@ -219,7 +199,7 @@ public final class MultiTermsEnum extend
if (seekOpt) {
final BytesRef curTerm = currentSubs[i].current;
if (curTerm != null) {
- final int cmp = termComp.compare(term, curTerm);
+ final int cmp = term.compareTo(curTerm);
if (cmp == 0) {
status = SeekStatus.FOUND;
} else if (cmp < 0) {
@@ -519,14 +499,13 @@ public final class MultiTermsEnum extend
}
private final static class TermMergeQueue extends PriorityQueue<TermsEnumWithSlice> {
- Comparator<BytesRef> termComp;
TermMergeQueue(int size) {
super(size);
}
@Override
protected boolean lessThan(TermsEnumWithSlice termsA, TermsEnumWithSlice termsB) {
- final int cmp = termComp.compare(termsA.current, termsB.current);
+ final int cmp = termsA.current.compareTo(termsB.current);
if (cmp != 0) {
return cmp < 0;
} else {
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java Thu Sep 19 20:57:09 2013
@@ -22,9 +22,8 @@ import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.FieldInfosWriter;
-import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.codecs.FieldInfosWriter;
import org.apache.lucene.codecs.StoredFieldsWriter;
import org.apache.lucene.codecs.TermVectorsWriter;
import org.apache.lucene.index.FieldInfo.DocValuesType;
@@ -375,19 +374,10 @@ final class SegmentMerger {
docBase += maxDoc;
}
- final FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(segmentWriteState);
- boolean success = false;
- try {
- consumer.merge(mergeState,
- new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
- slices.toArray(ReaderSlice.EMPTY_ARRAY)));
- success = true;
- } finally {
- if (success) {
- IOUtils.close(consumer);
- } else {
- IOUtils.closeWhileHandlingException(consumer);
- }
- }
+ Fields mergedFields = new MappedMultiFields(mergeState,
+ new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
+ slices.toArray(ReaderSlice.EMPTY_ARRAY)));
+
+ codec.postingsFormat().fieldsConsumer(segmentWriteState).write(mergedFields);
}
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java Thu Sep 19 20:57:09 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
-import java.util.Comparator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@@ -125,11 +124,6 @@ class SortedDocValuesTermsEnum extends T
}
@Override
- public Comparator<BytesRef> getComparator() {
- return BytesRef.getUTF8SortedAsUnicodeComparator();
- }
-
- @Override
public void seekExact(BytesRef term, TermState state) throws IOException {
assert state != null && state instanceof OrdTermState;
this.seekExact(((OrdTermState)state).ord);
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java Thu Sep 19 20:57:09 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
-import java.util.Comparator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@@ -125,11 +124,6 @@ class SortedSetDocValuesTermsEnum extend
}
@Override
- public Comparator<BytesRef> getComparator() {
- return BytesRef.getUTF8SortedAsUnicodeComparator();
- }
-
- @Override
public void seekExact(BytesRef term, TermState state) throws IOException {
assert state != null && state instanceof OrdTermState;
this.seekExact(((OrdTermState)state).ord);
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java Thu Sep 19 20:57:09 2013
@@ -66,12 +66,6 @@ final class TermVectorsConsumer extends
hasVectors = false;
}
}
-
- for (final TermsHashConsumerPerField field : fieldsToFlush.values() ) {
- TermVectorsConsumerPerField perField = (TermVectorsConsumerPerField) field;
- perField.termsHashPerField.reset();
- perField.shrinkHash();
- }
}
/** Fills in no-term-vectors for all docs we haven't seen
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java Thu Sep 19 20:57:09 2013
@@ -156,7 +156,7 @@ final class TermVectorsConsumerPerField
TermVectorsPostingsArray postings = (TermVectorsPostingsArray) termsHashPerField.postingsArray;
final TermVectorsWriter tv = termsWriter.writer;
- final int[] termIDs = termsHashPerField.sortPostings(tv.getComparator());
+ final int[] termIDs = termsHashPerField.sortPostings();
tv.startField(fieldInfo, numPostings, doVectorPositions, doVectorOffsets, hasPayloads);
@@ -191,11 +191,6 @@ final class TermVectorsConsumerPerField
fieldInfo.setStoreTermVectors();
}
- void shrinkHash() {
- termsHashPerField.shrinkHash(maxNumPostings);
- maxNumPostings = 0;
- }
-
@Override
void start(IndexableField f) {
if (doVectorOffsets) {
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/Terms.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/Terms.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/Terms.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/Terms.java Thu Sep 19 20:57:09 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
-import java.util.Comparator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.CompiledAutomaton;
@@ -75,13 +74,6 @@ public abstract class Terms {
}
}
- /** Return the BytesRef Comparator used to sort terms
- * provided by the iterator. This method may return null
- * if there are no terms. This method may be invoked
- * many times; it's best to cache a single instance &
- * reuse it. */
- public abstract Comparator<BytesRef> getComparator();
-
/** Returns the number of terms for this field, or -1 if this
* measure isn't stored by the codec. Note that, just like
* other term measures, this measure does not take deleted
@@ -109,6 +101,8 @@ public abstract class Terms {
* measures, this measure does not take deleted documents
* into account. */
public abstract int getDocCount() throws IOException;
+
+ // TODO: shouldn't we have hasFreq() as well?
/** Returns true if documents in this field store offsets. */
public abstract boolean hasOffsets();
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java Thu Sep 19 20:57:09 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
-import java.util.Comparator;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Bits;
@@ -33,8 +32,9 @@ import org.apache.lucene.util.BytesRefIt
* #docs}.
*
* <p>Term enumerations are always ordered by
- * {@link #getComparator}. Each term in the enumeration is
- * greater than the one before it.</p>
+ * BytesRef.compareTo, which is Unicode sort
+ * order if the terms are UTF-8 bytes. Each term in the
+ * enumeration is greater than the one before it.</p>
*
* <p>The TermsEnum is unpositioned when you first obtain it
* and you must first successfully call {@link #next} or one
@@ -230,11 +230,6 @@ public abstract class TermsEnum implemen
}
@Override
- public Comparator<BytesRef> getComparator() {
- return null;
- }
-
- @Override
public int docFreq() {
throw new IllegalStateException("this method should never be called");
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java Thu Sep 19 20:57:09 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
-import java.util.Comparator;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.util.ByteBlockPool;
@@ -77,13 +76,7 @@ final class TermsHashPerField extends In
nextPerField = null;
}
- void shrinkHash(int targetSize) {
- // Fully free the bytesHash on each flush but keep the pool untouched
- // bytesHash.clear will clear the ByteStartArray and in turn the ParallelPostingsArray too
- bytesHash.clear(false);
- }
-
- public void reset() {
+ void reset() {
bytesHash.clear(false);
if (nextPerField != null)
nextPerField.reset();
@@ -107,8 +100,8 @@ final class TermsHashPerField extends In
}
/** Collapse the hash table & sort in-place. */
- public int[] sortPostings(Comparator<BytesRef> termComp) {
- return bytesHash.sort(termComp);
+ public int[] sortPostings() {
+ return bytesHash.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
}
private boolean doCall;
@@ -136,7 +129,8 @@ final class TermsHashPerField extends In
// Secondary entry point (for 2nd & subsequent TermsHash),
// because token text has already been "interned" into
- // textStart, so we hash by textStart
+ // textStart, so we hash by textStart. term vectors use
+ // this API.
public void add(int textStart) throws IOException {
int termID = bytesHash.addByPoolOffset(textStart);
if (termID >= 0) { // New posting
@@ -173,7 +167,8 @@ final class TermsHashPerField extends In
}
}
- // Primary entry point (for first TermsHash)
+ // Primary entry point (for first TermsHash); postings use
+ // this API.
@Override
void add() throws IOException {
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java Thu Sep 19 20:57:09 2013
@@ -101,7 +101,7 @@ class ConstantScoreAutoRewrite extends T
} else {
final BooleanQuery bq = getTopLevelQuery();
final BytesRefHash pendingTerms = col.pendingTerms;
- final int sort[] = pendingTerms.sort(col.termsEnum.getComparator());
+ final int sort[] = pendingTerms.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
for(int i = 0; i < size; i++) {
final int pos = sort[i];
// docFreq is not used for constant score here, we pass 1
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/DocTermOrdsRewriteMethod.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/DocTermOrdsRewriteMethod.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/DocTermOrdsRewriteMethod.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/DocTermOrdsRewriteMethod.java Thu Sep 19 20:57:09 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
-import java.util.Comparator;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
@@ -26,7 +25,6 @@ import org.apache.lucene.index.SortedSet
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.OpenBitSet;
/**
@@ -91,11 +89,6 @@ public final class DocTermOrdsRewriteMet
TermsEnum termsEnum = query.getTermsEnum(new Terms() {
@Override
- public Comparator<BytesRef> getComparator() {
- return BytesRef.getUTF8SortedAsUnicodeComparator();
- }
-
- @Override
public TermsEnum iterator(TermsEnum reuse) {
return docTermOrds.termsEnum();
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java Thu Sep 19 20:57:09 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
-import java.util.Comparator;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
@@ -26,7 +25,6 @@ import org.apache.lucene.index.SortedDoc
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.OpenBitSet;
/**
@@ -91,11 +89,6 @@ public final class FieldCacheRewriteMeth
TermsEnum termsEnum = query.getTermsEnum(new Terms() {
@Override
- public Comparator<BytesRef> getComparator() {
- return BytesRef.getUTF8SortedAsUnicodeComparator();
- }
-
- @Override
public TermsEnum iterator(TermsEnum reuse) {
return fcsi.termsEnum();
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java Thu Sep 19 20:57:09 2013
@@ -46,7 +46,7 @@ import org.apache.lucene.util.automaton.
* to the specified filter term.
*
* <p>Term enumerations are always ordered by
- * {@link #getComparator}. Each term in the enumeration is
+ * {@link BytesRef#compareTo}. Each term in the enumeration is
* greater than all that precede it.</p>
*/
public class FuzzyTermsEnum extends TermsEnum {
@@ -293,11 +293,6 @@ public class FuzzyTermsEnum extends Term
}
@Override
- public Comparator<BytesRef> getComparator() {
- return actualEnum.getComparator();
- }
-
- @Override
public long ord() throws IOException {
return actualEnum.ord();
}