You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/01/14 19:54:24 UTC
svn commit: r1433035 [3/4] - in /lucene/dev/branches/lucene4547: ./
dev-tools/ dev-tools/scripts/ lucene/ lucene/analysis/
lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/
lucene/analysis/kuromoji/src/resources/org/apache/lucene/an...
Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/PayloadIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/PayloadIterator.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/PayloadIterator.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/PayloadIterator.java Mon Jan 14 18:54:22 2013
@@ -1,12 +1,10 @@
package org.apache.lucene.facet.search;
import java.io.IOException;
-import java.util.Iterator;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.Fields;
-import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
@@ -42,99 +40,75 @@ import org.apache.lucene.util.BytesRef;
*/
public class PayloadIterator {
- protected BytesRef data;
-
private TermsEnum reuseTE;
- private DocsAndPositionsEnum currentDPE;
+ private DocsAndPositionsEnum dpe;
private boolean hasMore;
- private int curDocID, curDocBase;
+ private int curDocID;
- private final Iterator<AtomicReaderContext> leaves;
private final Term term;
- public PayloadIterator(IndexReader indexReader, Term term) throws IOException {
- leaves = indexReader.leaves().iterator();
+ public PayloadIterator(Term term) throws IOException {
this.term = term;
}
- private void nextSegment() throws IOException {
+ /**
+ * Sets the {@link AtomicReaderContext} for which {@link #getPayload(int)}
+ * calls will be made. Returns true iff this reader has payload for any of the
+ * documents belonging to the {@link Term} given to the constructor.
+ */
+ public boolean setNextReader(AtomicReaderContext context) throws IOException {
hasMore = false;
- while (leaves.hasNext()) {
- AtomicReaderContext ctx = leaves.next();
- curDocBase = ctx.docBase;
- Fields fields = ctx.reader().fields();
- if (fields != null) {
- Terms terms = fields.terms(term.field());
- if (terms != null) {
- reuseTE = terms.iterator(reuseTE);
- if (reuseTE.seekExact(term.bytes(), true)) {
- // this class is usually used to iterate on whatever a Query matched
- // if it didn't match deleted documents, we won't receive them. if it
- // did, we should iterate on them too, therefore we pass liveDocs=null
- currentDPE = reuseTE.docsAndPositions(null, currentDPE, DocsAndPositionsEnum.FLAG_PAYLOADS);
- if (currentDPE != null && (curDocID = currentDPE.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
- hasMore = true;
- break;
- }
+ Fields fields = context.reader().fields();
+ if (fields != null) {
+ Terms terms = fields.terms(term.field());
+ if (terms != null) {
+ reuseTE = terms.iterator(reuseTE);
+ if (reuseTE.seekExact(term.bytes(), true)) {
+ // this class is usually used to iterate on whatever a Query matched
+ // if it didn't match deleted documents, we won't receive them. if it
+ // did, we should iterate on them too, therefore we pass liveDocs=null
+ dpe = reuseTE.docsAndPositions(null, dpe, DocsAndPositionsEnum.FLAG_PAYLOADS);
+ if (dpe != null && (curDocID = dpe.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ hasMore = true;
}
}
}
}
- }
-
- /**
- * Initialize the iterator. Should be done before the first call to
- * {@link #getPayload(int)}. Returns {@code false} if no category list is
- * found, or the category list has no documents.
- */
- public boolean init() throws IOException {
- nextSegment();
return hasMore;
}
-
+
/**
* Returns the {@link BytesRef payload} of the given document, or {@code null}
* if the document does not exist, there are no more documents in the posting
- * list, or the document exists but has not payload. You should call
- * {@link #init()} before the first call to this method.
+ * list, or the document exists but has not payload. The given document IDs
+ * are treated as local to the reader given to
+ * {@link #setNextReader(AtomicReaderContext)}.
*/
public BytesRef getPayload(int docID) throws IOException {
if (!hasMore) {
return null;
}
- // re-basing docId->localDocID is done fewer times than currentDoc->globalDoc
- int localDocID = docID - curDocBase;
-
- if (curDocID > localDocID) {
+ if (curDocID > docID) {
// document does not exist
return null;
}
- if (curDocID < localDocID) {
- // look for the document either in that segment, or others
- while (hasMore && (curDocID = currentDPE.advance(localDocID)) == DocIdSetIterator.NO_MORE_DOCS) {
- nextSegment(); // also updates curDocID
- localDocID = docID - curDocBase;
- // nextSegment advances to nextDoc, so check if we still need to advance
- if (curDocID >= localDocID) {
- break;
+ if (curDocID < docID) {
+ curDocID = dpe.advance(docID);
+ if (curDocID != docID) { // requested document does not have a payload
+ if (curDocID == DocIdSetIterator.NO_MORE_DOCS) { // no more docs in this reader
+ hasMore = false;
}
- }
-
- // we break from the above loop when:
- // 1. we iterated over all segments (hasMore=false)
- // 2. current segment advanced to a doc, either requested or higher
- if (!hasMore || curDocID != localDocID) {
return null;
}
}
// we're on the document
- assert currentDPE.freq() == 1 : "expecting freq=1 (got " + currentDPE.freq() + ") term=" + term + " doc=" + (curDocID + curDocBase);
- int pos = currentDPE.nextPosition();
- assert pos != -1 : "no positions for term=" + term + " doc=" + (curDocID + curDocBase);
- return currentDPE.getPayload();
+ assert dpe.freq() == 1 : "expecting freq=1 (got " + dpe.freq() + ") term=" + term + " doc=" + curDocID;
+ int pos = dpe.nextPosition();
+ assert pos != -1 : "no positions for term=" + term + " doc=" + curDocID;
+ return dpe.getPayload();
}
}
Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIdCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIdCollector.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIdCollector.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIdCollector.java Mon Jan 14 18:54:22 2013
@@ -62,7 +62,7 @@ public abstract class ScoredDocIdCollect
}
@Override
- public ScoredDocIDsIterator scoredDocIdsIterator() {
+ protected ScoredDocIDsIterator scoredDocIdsIterator() {
return new ScoredDocIDsIterator() {
private DocIdSetIterator docIdsIter = docIds.iterator();
@@ -129,7 +129,7 @@ public abstract class ScoredDocIdCollect
}
@Override
- public ScoredDocIDsIterator scoredDocIdsIterator() {
+ protected ScoredDocIDsIterator scoredDocIdsIterator() {
return new ScoredDocIDsIterator() {
private DocIdSetIterator docIdsIter = docIds.iterator();
@@ -189,8 +189,7 @@ public abstract class ScoredDocIdCollect
* do not require scoring, it is better to set it to <i>false</i>.
*/
public static ScoredDocIdCollector create(int maxDoc, boolean enableScoring) {
- return enableScoring ? new ScoringDocIdCollector(maxDoc)
- : new NonScoringDocIdCollector(maxDoc);
+ return enableScoring ? new ScoringDocIdCollector(maxDoc) : new NonScoringDocIdCollector(maxDoc);
}
private ScoredDocIdCollector(int maxDoc) {
@@ -198,13 +197,14 @@ public abstract class ScoredDocIdCollect
docIds = new FixedBitSet(maxDoc);
}
+ protected abstract ScoredDocIDsIterator scoredDocIdsIterator() throws IOException;
+
/** Returns the default score used when scoring is disabled. */
public abstract float getDefaultScore();
/** Set the default score. Only applicable if scoring is disabled. */
public abstract void setDefaultScore(float defaultScore);
- public abstract ScoredDocIDsIterator scoredDocIdsIterator() throws IOException;
public ScoredDocIDs getScoredDocIDs() {
return new ScoredDocIDs() {
Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java Mon Jan 14 18:54:22 2013
@@ -4,22 +4,23 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
+import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
import java.util.logging.Level;
import java.util.logging.Logger;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.util.IntsRef;
-
import org.apache.lucene.facet.search.aggregator.Aggregator;
-import org.apache.lucene.facet.search.params.FacetSearchParams;
import org.apache.lucene.facet.search.params.FacetRequest;
+import org.apache.lucene.facet.search.params.FacetSearchParams;
import org.apache.lucene.facet.search.results.FacetResult;
import org.apache.lucene.facet.search.results.IntermediateFacetResult;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.util.PartitionsUtils;
import org.apache.lucene.facet.util.ScoredDocIdsUtils;
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.util.IntsRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -179,11 +180,11 @@ public class StandardFacetsAccumulator e
List<FacetResult> res = new ArrayList<FacetResult>();
for (FacetRequest fr : searchParams.getFacetRequests()) {
FacetResultsHandler frHndlr = fr.createFacetResultsHandler(taxonomyReader);
- IntermediateFacetResult tmpResult = fr2tmpRes.get(fr);
+ IntermediateFacetResult tmpResult = fr2tmpRes.get(fr);
if (tmpResult == null) {
continue; // do not add a null to the list.
}
- FacetResult facetRes = frHndlr.renderFacetResult(tmpResult);
+ FacetResult facetRes = frHndlr.renderFacetResult(tmpResult);
// final labeling if allowed (because labeling is a costly operation)
if (isAllowLabeling()) {
frHndlr.labelResult(facetRes);
@@ -213,18 +214,15 @@ public class StandardFacetsAccumulator e
/** Check if it is worth to use complements */
protected boolean shouldComplement(ScoredDocIDs docids) {
- return
- mayComplement() &&
- (docids.size() > indexReader.numDocs() * getComplementThreshold()) ;
+ return mayComplement() && (docids.size() > indexReader.numDocs() * getComplementThreshold()) ;
}
/**
* Iterate over the documents for this partition and fill the facet arrays with the correct
* count/complement count/value.
- * @throws IOException If there is a low-level I/O error.
*/
- private final void fillArraysForPartition(ScoredDocIDs docids,
- FacetArrays facetArrays, int partition) throws IOException {
+ private final void fillArraysForPartition(ScoredDocIDs docids, FacetArrays facetArrays, int partition)
+ throws IOException {
if (isUsingComplements) {
initArraysByTotalCounts(facetArrays, partition, docids.size());
@@ -236,27 +234,41 @@ public class StandardFacetsAccumulator e
IntsRef ordinals = new IntsRef(32); // a reasonable start capacity for most common apps
for (Entry<CategoryListIterator, Aggregator> entry : categoryLists.entrySet()) {
- CategoryListIterator categoryList = entry.getKey();
- if (!categoryList.init()) {
- continue;
- }
-
- Aggregator categorator = entry.getValue();
- ScoredDocIDsIterator iterator = docids.iterator();
+ final ScoredDocIDsIterator iterator = docids.iterator();
+ final CategoryListIterator categoryListIter = entry.getKey();
+ final Aggregator aggregator = entry.getValue();
+ Iterator<AtomicReaderContext> contexts = indexReader.leaves().iterator();
+ AtomicReaderContext current = null;
+ int maxDoc = -1;
while (iterator.next()) {
int docID = iterator.getDocID();
- categoryList.getOrdinals(docID, ordinals);
+ while (docID >= maxDoc) { // find the segment which contains this document
+ if (!contexts.hasNext()) {
+ throw new RuntimeException("ScoredDocIDs contains documents outside this reader's segments !?");
+ }
+ current = contexts.next();
+ maxDoc = current.docBase + current.reader().maxDoc();
+ if (docID < maxDoc) { // segment has docs, check if it has categories
+ boolean validSegment = categoryListIter.setNextReader(current);
+ validSegment &= aggregator.setNextReader(current);
+ if (!validSegment) { // if categoryList or aggregtor say it's an invalid segment, skip all docs
+ while (docID < maxDoc && iterator.next()) {
+ docID = iterator.getDocID();
+ }
+ }
+ }
+ }
+ docID -= current.docBase;
+ categoryListIter.getOrdinals(docID, ordinals);
if (ordinals.length == 0) {
- continue;
+ continue; // document does not have category ordinals
}
- categorator.aggregate(docID, iterator.getScore(), ordinals);
+ aggregator.aggregate(docID, iterator.getScore(), ordinals);
}
}
}
- /**
- * Init arrays for partition by total counts, optionally applying a factor
- */
+ /** Init arrays for partition by total counts, optionally applying a factor */
private final void initArraysByTotalCounts(FacetArrays facetArrays, int partition, int nAccumulatedDocs) {
int[] intArray = facetArrays.getIntArray();
totalFacetCounts.fillTotalCountsForPartition(intArray, partition);
@@ -302,10 +314,9 @@ public class StandardFacetsAccumulator e
for (FacetRequest facetRequest : searchParams.getFacetRequests()) {
Aggregator categoryAggregator = facetRequest.createAggregator(
- isUsingComplements, facetArrays, indexReader, taxonomyReader);
+ isUsingComplements, facetArrays, taxonomyReader);
- CategoryListIterator cli =
- facetRequest.createCategoryListIterator(indexReader, taxonomyReader, searchParams, partition);
+ CategoryListIterator cli = facetRequest.createCategoryListIterator(taxonomyReader, searchParams, partition);
// get the aggregator
Aggregator old = categoryLists.put(cli, categoryAggregator);
Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/TotalFacetCounts.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/TotalFacetCounts.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/TotalFacetCounts.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/TotalFacetCounts.java Mon Jan 14 18:54:22 2013
@@ -170,7 +170,7 @@ public class TotalFacetCounts {
Aggregator aggregator = new CountingAggregator(counts[partition]);
HashMap<CategoryListIterator, Aggregator> map = new HashMap<CategoryListIterator, Aggregator>();
for (CategoryListParams clp: facetIndexingParams.getAllCategoryListParams()) {
- final CategoryListIterator cli = clIteraor(clCache, clp, indexReader, partition);
+ final CategoryListIterator cli = clIteraor(clCache, clp, partition);
map.put(cli, aggregator);
}
return map;
@@ -181,14 +181,14 @@ public class TotalFacetCounts {
return new TotalFacetCounts(taxonomy, facetIndexingParams, counts, CreationType.Computed);
}
- static CategoryListIterator clIteraor(CategoryListCache clCache, CategoryListParams clp,
- IndexReader indexReader, int partition) throws IOException {
+ static CategoryListIterator clIteraor(CategoryListCache clCache, CategoryListParams clp, int partition)
+ throws IOException {
if (clCache != null) {
CategoryListData cld = clCache.get(clp);
if (cld != null) {
return cld.iterator(partition);
}
}
- return clp.createCategoryListIterator(indexReader, partition);
+ return clp.createCategoryListIterator(partition);
}
}
\ No newline at end of file
Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/Aggregator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/Aggregator.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/Aggregator.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/Aggregator.java Mon Jan 14 18:54:22 2013
@@ -2,6 +2,7 @@ package org.apache.lucene.facet.search.a
import java.io.IOException;
+import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.IntsRef;
/*
@@ -22,22 +23,23 @@ import org.apache.lucene.util.IntsRef;
*/
/**
- * An Aggregator is the analogue of Lucene's Collector (see
- * {@link org.apache.lucene.search.Collector}), for processing the categories
- * belonging to a certain document. The Aggregator is responsible for doing
- * whatever it wishes with the categories it is fed, e.g., counting the number
- * of times that each category appears, or performing some computation on their
- * association values.
- * <P>
- * Much of the function of an Aggregator implementation is not described by this
- * interface. This includes the constructor and getter methods to retrieve the
- * results of the aggregation.
+ * Aggregates the categories of documents given to
+ * {@link #aggregate(int, float, IntsRef)}. Note that the document IDs are local
+ * to the reader given to {@link #setNextReader(AtomicReaderContext)}.
*
* @lucene.experimental
*/
public interface Aggregator {
/**
+ * Sets the {@link AtomicReaderContext} for which
+ * {@link #aggregate(int, float, IntsRef)} calls will be made. If this method
+ * returns false, {@link #aggregate(int, float, IntsRef)} should not be called
+ * for this reader.
+ */
+ public boolean setNextReader(AtomicReaderContext context) throws IOException;
+
+ /**
* Aggregate the ordinals of the given document ID (and its score). The given
* ordinals offset is always zero.
*/
Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/CountingAggregator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/CountingAggregator.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/CountingAggregator.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/CountingAggregator.java Mon Jan 14 18:54:22 2013
@@ -2,6 +2,7 @@ package org.apache.lucene.facet.search.a
import java.io.IOException;
+import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.IntsRef;
/*
@@ -57,4 +58,9 @@ public class CountingAggregator implemen
return counterArray == null ? 0 : counterArray.hashCode();
}
+ @Override
+ public boolean setNextReader(AtomicReaderContext context) throws IOException {
+ return true;
+ }
+
}
Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/ScoringAggregator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/ScoringAggregator.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/ScoringAggregator.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/ScoringAggregator.java Mon Jan 14 18:54:22 2013
@@ -2,6 +2,7 @@ package org.apache.lucene.facet.search.a
import java.io.IOException;
+import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.IntsRef;
/*
@@ -58,4 +59,9 @@ public class ScoringAggregator implement
return hashCode;
}
+ @Override
+ public boolean setNextReader(AtomicReaderContext context) throws IOException {
+ return true;
+ }
+
}
Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/associations/AssociationFloatSumAggregator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/associations/AssociationFloatSumAggregator.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/associations/AssociationFloatSumAggregator.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/associations/AssociationFloatSumAggregator.java Mon Jan 14 18:54:22 2013
@@ -6,7 +6,7 @@ import org.apache.lucene.facet.associati
import org.apache.lucene.facet.associations.FloatAssociationsPayloadIterator;
import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.search.aggregator.Aggregator;
-import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.collections.IntToFloatMap;
@@ -39,13 +39,13 @@ public class AssociationFloatSumAggregat
protected final float[] sumArray;
protected final FloatAssociationsPayloadIterator associations;
- public AssociationFloatSumAggregator(IndexReader reader, float[] sumArray) throws IOException {
- this(CategoryListParams.DEFAULT_TERM.field(), reader, sumArray);
+ public AssociationFloatSumAggregator(float[] sumArray) throws IOException {
+ this(CategoryListParams.DEFAULT_TERM.field(), sumArray);
}
- public AssociationFloatSumAggregator(String field, IndexReader reader, float[] sumArray) throws IOException {
+ public AssociationFloatSumAggregator(String field, float[] sumArray) throws IOException {
this.field = field;
- associations = new FloatAssociationsPayloadIterator(reader, field, new CategoryFloatAssociation());
+ associations = new FloatAssociationsPayloadIterator(field, new CategoryFloatAssociation());
this.sumArray = sumArray;
}
@@ -76,4 +76,9 @@ public class AssociationFloatSumAggregat
return field.hashCode();
}
+ @Override
+ public boolean setNextReader(AtomicReaderContext context) throws IOException {
+ return associations.setNextReader(context);
+ }
+
}
Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/associations/AssociationIntSumAggregator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/associations/AssociationIntSumAggregator.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/associations/AssociationIntSumAggregator.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/associations/AssociationIntSumAggregator.java Mon Jan 14 18:54:22 2013
@@ -6,7 +6,7 @@ import org.apache.lucene.facet.associati
import org.apache.lucene.facet.associations.IntAssociationsPayloadIterator;
import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.search.aggregator.Aggregator;
-import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.collections.IntToIntMap;
@@ -39,13 +39,13 @@ public class AssociationIntSumAggregator
protected final int[] sumArray;
protected final IntAssociationsPayloadIterator associations;
- public AssociationIntSumAggregator(IndexReader reader, int[] sumArray) throws IOException {
- this(CategoryListParams.DEFAULT_TERM.field(), reader, sumArray);
+ public AssociationIntSumAggregator(int[] sumArray) throws IOException {
+ this(CategoryListParams.DEFAULT_TERM.field(), sumArray);
}
- public AssociationIntSumAggregator(String field, IndexReader reader, int[] sumArray) throws IOException {
+ public AssociationIntSumAggregator(String field, int[] sumArray) throws IOException {
this.field = field;
- associations = new IntAssociationsPayloadIterator(reader, field, new CategoryIntAssociation());
+ associations = new IntAssociationsPayloadIterator(field, new CategoryIntAssociation());
this.sumArray = sumArray;
}
@@ -76,4 +76,9 @@ public class AssociationIntSumAggregator
return field.hashCode();
}
+ @Override
+ public boolean setNextReader(AtomicReaderContext context) throws IOException {
+ return associations.setNextReader(context);
+ }
+
}
Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/cache/CategoryListData.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/cache/CategoryListData.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/cache/CategoryListData.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/cache/CategoryListData.java Mon Jan 14 18:54:22 2013
@@ -6,6 +6,7 @@ import org.apache.lucene.facet.index.par
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.search.CategoryListIterator;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.IntsRef;
@@ -56,25 +57,30 @@ public class CategoryListData {
}
/** Compute category list data for caching for faster iteration. */
- CategoryListData(IndexReader reader, TaxonomyReader taxo,
- FacetIndexingParams iparams, CategoryListParams clp) throws IOException {
+ CategoryListData(IndexReader reader, TaxonomyReader taxo, FacetIndexingParams iparams, CategoryListParams clp)
+ throws IOException {
- final int maxDoc = reader.maxDoc();
- int[][][]dpf = new int[maxDoc][][];
+ int[][][]dpf = new int[reader.maxDoc()][][];
int numPartitions = (int)Math.ceil(taxo.getSize()/(double)iparams.getPartitionSize());
IntsRef ordinals = new IntsRef(32);
for (int part = 0; part < numPartitions; part++) {
- CategoryListIterator cli = clp.createCategoryListIterator(reader, part);
- if (cli.init()) {
- for (int doc = 0; doc < maxDoc; doc++) {
- cli.getOrdinals(doc, ordinals);
- if (ordinals.length > 0) {
- if (dpf[doc] == null) {
- dpf[doc] = new int[numPartitions][];
- }
- dpf[doc][part] = new int[ordinals.length];
- for (int i = 0; i < ordinals.length; i++) {
- dpf[doc][part][i] = ordinals.ints[i];
+ for (AtomicReaderContext context : reader.leaves()) {
+ CategoryListIterator cli = clp.createCategoryListIterator(part);
+ if (cli.setNextReader(context)) {
+ final int maxDoc = context.reader().maxDoc();
+ for (int i = 0; i < maxDoc; i++) {
+ cli.getOrdinals(i, ordinals);
+ if (ordinals.length > 0) {
+ int doc = i + context.docBase;
+ if (dpf[doc] == null) {
+ dpf[doc] = new int[numPartitions][];
+ }
+ if (dpf[doc][part] == null) {
+ dpf[doc][part] = new int[ordinals.length];
+ }
+ for (int j = 0; j < ordinals.length; j++) {
+ dpf[doc][part][j] = ordinals.ints[j];
+ }
}
}
}
@@ -93,6 +99,7 @@ public class CategoryListData {
/** Internal: category list iterator over uncompressed category info in RAM */
private static class RAMCategoryListIterator implements CategoryListIterator {
+ private int docBase;
private final int part;
private final int[][][] dpc;
@@ -102,13 +109,15 @@ public class CategoryListData {
}
@Override
- public boolean init() throws IOException {
+ public boolean setNextReader(AtomicReaderContext context) throws IOException {
+ docBase = context.docBase;
return dpc != null && dpc.length > part;
}
-
+
@Override
public void getOrdinals(int docID, IntsRef ints) throws IOException {
ints.length = 0;
+ docID += docBase;
if (dpc.length > docID && dpc[docID] != null && dpc[docID][part] != null) {
if (ints.ints.length < dpc[docID][part].length) {
ints.grow(dpc[docID][part].length);
Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/params/CountFacetRequest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/params/CountFacetRequest.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/params/CountFacetRequest.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/params/CountFacetRequest.java Mon Jan 14 18:54:22 2013
@@ -1,7 +1,5 @@
package org.apache.lucene.facet.search.params;
-import org.apache.lucene.index.IndexReader;
-
import org.apache.lucene.facet.search.FacetArrays;
import org.apache.lucene.facet.search.aggregator.Aggregator;
import org.apache.lucene.facet.search.aggregator.ComplementCountingAggregator;
@@ -47,8 +45,7 @@ public class CountFacetRequest extends F
}
@Override
- public Aggregator createAggregator(boolean useComplements,
- FacetArrays arrays, IndexReader reader, TaxonomyReader taxonomy) {
+ public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy) {
// we rely on that, if needed, result is cleared by arrays!
int[] a = arrays.getIntArray();
if (useComplements) {
Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/params/FacetRequest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/params/FacetRequest.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/params/FacetRequest.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/params/FacetRequest.java Mon Jan 14 18:54:22 2013
@@ -2,8 +2,6 @@ package org.apache.lucene.facet.search.p
import java.io.IOException;
-import org.apache.lucene.index.IndexReader;
-
import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.search.CategoryListIterator;
import org.apache.lucene.facet.search.FacetArrays;
@@ -11,8 +9,8 @@ import org.apache.lucene.facet.search.Fa
import org.apache.lucene.facet.search.TopKFacetResultsHandler;
import org.apache.lucene.facet.search.TopKInEachNodeHandler;
import org.apache.lucene.facet.search.aggregator.Aggregator;
-import org.apache.lucene.facet.search.cache.CategoryListData;
import org.apache.lucene.facet.search.cache.CategoryListCache;
+import org.apache.lucene.facet.search.cache.CategoryListData;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
@@ -314,33 +312,29 @@ public abstract class FacetRequest imple
* computation.
* @param arrays
* provider for facet arrays in use for current computation.
- * @param indexReader
- * index reader in effect.
* @param taxonomy
* reader of taxonomy in effect.
* @throws IOException If there is a low-level I/O error.
*/
- public abstract Aggregator createAggregator(boolean useComplements,
- FacetArrays arrays, IndexReader indexReader,
- TaxonomyReader taxonomy) throws IOException;
+ public abstract Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy)
+ throws IOException;
/**
- * Create the category list iterator for the specified partition.
- * If a non null cache is provided which contains the required data,
- * use it for the iteration.
+ * Create the category list iterator for the specified partition. If a non
+ * null cache is provided which contains the required data, use it for the
+ * iteration.
*/
- public CategoryListIterator createCategoryListIterator(IndexReader reader,
- TaxonomyReader taxo, FacetSearchParams sParams, int partition)
+ public CategoryListIterator createCategoryListIterator(TaxonomyReader taxo, FacetSearchParams sParams, int partition)
throws IOException {
CategoryListCache clCache = sParams.getCategoryListCache();
CategoryListParams clParams = sParams.getFacetIndexingParams().getCategoryListParams(categoryPath);
- if (clCache!=null) {
+ if (clCache != null) {
CategoryListData clData = clCache.get(clParams);
- if (clData!=null) {
+ if (clData != null) {
return clData.iterator(partition);
}
}
- return clParams.createCategoryListIterator(reader, partition);
+ return clParams.createCategoryListIterator(partition);
}
/**
Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/params/ScoreFacetRequest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/params/ScoreFacetRequest.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/params/ScoreFacetRequest.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/params/ScoreFacetRequest.java Mon Jan 14 18:54:22 2013
@@ -1,7 +1,5 @@
package org.apache.lucene.facet.search.params;
-import org.apache.lucene.index.IndexReader;
-
import org.apache.lucene.facet.search.FacetArrays;
import org.apache.lucene.facet.search.aggregator.Aggregator;
import org.apache.lucene.facet.search.aggregator.ScoringAggregator;
@@ -38,9 +36,7 @@ public class ScoreFacetRequest extends F
}
@Override
- public Aggregator createAggregator(boolean useComplements,
- FacetArrays arrays, IndexReader reader,
- TaxonomyReader taxonomy) {
+ public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy) {
assert !useComplements : "complements are not supported by this FacetRequest";
return new ScoringAggregator(arrays.getFloatArray());
}
Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/params/associations/AssociationFloatSumFacetRequest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/params/associations/AssociationFloatSumFacetRequest.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/params/associations/AssociationFloatSumFacetRequest.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/params/associations/AssociationFloatSumFacetRequest.java Mon Jan 14 18:54:22 2013
@@ -2,8 +2,6 @@ package org.apache.lucene.facet.search.p
import java.io.IOException;
-import org.apache.lucene.index.IndexReader;
-
import org.apache.lucene.facet.search.FacetArrays;
import org.apache.lucene.facet.search.aggregator.Aggregator;
import org.apache.lucene.facet.search.aggregator.associations.AssociationFloatSumAggregator;
@@ -45,10 +43,10 @@ public class AssociationFloatSumFacetReq
}
@Override
- public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, IndexReader reader,
- TaxonomyReader taxonomy) throws IOException {
+ public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy)
+ throws IOException {
assert !useComplements : "complements are not supported by this FacetRequest";
- return new AssociationFloatSumAggregator(reader, arrays.getFloatArray());
+ return new AssociationFloatSumAggregator(arrays.getFloatArray());
}
@Override
Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/params/associations/AssociationIntSumFacetRequest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/params/associations/AssociationIntSumFacetRequest.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/params/associations/AssociationIntSumFacetRequest.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/params/associations/AssociationIntSumFacetRequest.java Mon Jan 14 18:54:22 2013
@@ -2,8 +2,6 @@ package org.apache.lucene.facet.search.p
import java.io.IOException;
-import org.apache.lucene.index.IndexReader;
-
import org.apache.lucene.facet.search.FacetArrays;
import org.apache.lucene.facet.search.aggregator.Aggregator;
import org.apache.lucene.facet.search.aggregator.associations.AssociationIntSumAggregator;
@@ -45,10 +43,10 @@ public class AssociationIntSumFacetReque
}
@Override
- public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, IndexReader reader,
- TaxonomyReader taxonomy) throws IOException {
+ public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy)
+ throws IOException {
assert !useComplements : "complements are not supported by this FacetRequest";
- return new AssociationIntSumAggregator(reader, arrays.getIntArray());
+ return new AssociationIntSumAggregator(arrays.getIntArray());
}
@Override
Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/sampling/Sampler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/sampling/Sampler.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/sampling/Sampler.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/sampling/Sampler.java Mon Jan 14 18:54:22 2013
@@ -60,6 +60,7 @@ public abstract class Sampler {
/**
* Construct with certain {@link SamplingParams}
+ *
* @param params sampling params in effect
* @throws IllegalArgumentException if the provided SamplingParams are not valid
*/
@@ -110,16 +111,15 @@ public abstract class Sampler {
* @param sampleSetSize required size of sample set
* @return sample of the input set in the required size
*/
- protected abstract SampleResult createSample(ScoredDocIDs docids, int actualSize,
- int sampleSetSize) throws IOException;
+ protected abstract SampleResult createSample(ScoredDocIDs docids, int actualSize, int sampleSetSize)
+ throws IOException;
/**
* Get a fixer of sample facet accumulation results. Default implementation
* returns a <code>TakmiSampleFixer</code> which is adequate only for
* counting. For any other accumulator, provide a different fixer.
*/
- public SampleFixer getSampleFixer(
- IndexReader indexReader, TaxonomyReader taxonomyReader,
+ public SampleFixer getSampleFixer(IndexReader indexReader, TaxonomyReader taxonomyReader,
FacetSearchParams searchParams) {
return new TakmiSampleFixer(indexReader, taxonomyReader, searchParams);
}
@@ -161,10 +161,10 @@ public abstract class Sampler {
OverSampledFacetRequest sampledFreq = null;
try {
- sampledFreq = (OverSampledFacetRequest)facetResult.getFacetRequest();
+ sampledFreq = (OverSampledFacetRequest) facetResult.getFacetRequest();
} catch (ClassCastException e) {
throw new IllegalArgumentException(
- "It is only valid to call this method with result obtained for a" +
+ "It is only valid to call this method with result obtained for a " +
"facet request created through sampler.overSamlpingSearchParams()",
e);
}
@@ -215,19 +215,15 @@ public abstract class Sampler {
}
@Override
- public CategoryListIterator createCategoryListIterator(IndexReader reader,
- TaxonomyReader taxo, FacetSearchParams sParams, int partition)
- throws IOException {
- return orig.createCategoryListIterator(reader, taxo, sParams, partition);
+ public CategoryListIterator createCategoryListIterator(TaxonomyReader taxo, FacetSearchParams sParams,
+ int partition) throws IOException {
+ return orig.createCategoryListIterator(taxo, sParams, partition);
}
-
@Override
- public Aggregator createAggregator(boolean useComplements,
- FacetArrays arrays, IndexReader indexReader,
- TaxonomyReader taxonomy) throws IOException {
- return orig.createAggregator(useComplements, arrays, indexReader,
- taxonomy);
+ public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy)
+ throws IOException {
+ return orig.createAggregator(useComplements, arrays, taxonomy);
}
@Override
@@ -245,4 +241,5 @@ public abstract class Sampler {
return orig.supportsComplements();
}
}
+
}
Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/sampling/TakmiSampleFixer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/sampling/TakmiSampleFixer.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/sampling/TakmiSampleFixer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/sampling/TakmiSampleFixer.java Mon Jan 14 18:54:22 2013
@@ -91,8 +91,7 @@ class TakmiSampleFixer implements Sample
* full set of matching documents.
* @throws IOException If there is a low-level I/O error.
*/
- private void recount(FacetResultNode fresNode, ScoredDocIDs docIds)
- throws IOException {
+ private void recount(FacetResultNode fresNode, ScoredDocIDs docIds) throws IOException {
// TODO (Facet): change from void to return the new, smaller docSet, and use
// that for the children, as this will make their intersection ops faster.
// can do this only when the new set is "sufficiently" smaller.
@@ -109,8 +108,7 @@ class TakmiSampleFixer implements Sample
Bits liveDocs = MultiFields.getLiveDocs(indexReader);
int updatedCount = countIntersection(MultiFields.getTermDocsEnum(indexReader, liveDocs,
drillDownTerm.field(), drillDownTerm.bytes(),
- 0),
- docIds.iterator());
+ 0), docIds.iterator());
fresNode.setValue(updatedCount);
}
Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/util/MultiCategoryListIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/util/MultiCategoryListIterator.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/util/MultiCategoryListIterator.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/util/MultiCategoryListIterator.java Mon Jan 14 18:54:22 2013
@@ -5,6 +5,7 @@ import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.facet.search.CategoryListIterator;
+import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.IntsRef;
/*
@@ -42,9 +43,10 @@ public class MultiCategoryListIterator i
}
@Override
- public boolean init() throws IOException {
+ public boolean setNextReader(AtomicReaderContext context) throws IOException {
+ validIterators.clear();
for (CategoryListIterator cli : iterators) {
- if (cli.init()) {
+ if (cli.setNextReader(context)) {
validIterators.add(cli);
}
}
Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java Mon Jan 14 18:54:22 2013
@@ -3,17 +3,18 @@ package org.apache.lucene.facet.util;
import java.io.IOException;
import java.util.Arrays;
+import org.apache.lucene.facet.search.ScoredDocIDs;
+import org.apache.lucene.facet.search.ScoredDocIDsIterator;
+import org.apache.lucene.index.AtomicReader;
+import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.OpenBitSet;
+import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.OpenBitSetDISI;
-import org.apache.lucene.facet.search.ScoredDocIDs;
-import org.apache.lucene.facet.search.ScoredDocIDsIterator;
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -49,48 +50,57 @@ public class ScoredDocIdsUtils {
* @param reader holding the number of documents & information about deletions.
*/
public final static ScoredDocIDs getComplementSet(final ScoredDocIDs docids, final IndexReader reader)
- throws IOException {
+ throws IOException {
final int maxDoc = reader.maxDoc();
DocIdSet docIdSet = docids.getDocIDs();
- final OpenBitSet complement;
- if (docIdSet instanceof OpenBitSet) {
+ final FixedBitSet complement;
+ if (docIdSet instanceof FixedBitSet) {
// That is the most common case, if ScoredDocIdsCollector was used.
- complement = ((OpenBitSet) docIdSet).clone();
+ complement = ((FixedBitSet) docIdSet).clone();
} else {
- complement = new OpenBitSetDISI(docIdSet.iterator(), maxDoc);
+ complement = new FixedBitSet(maxDoc);
+ DocIdSetIterator iter = docIdSet.iterator();
+ int doc;
+ while ((doc = iter.nextDoc()) < maxDoc) {
+ complement.set(doc);
+ }
}
-
complement.flip(0, maxDoc);
-
- // Remove all Deletions from the complement set
clearDeleted(reader, complement);
return createScoredDocIds(complement, maxDoc);
}
-
- /**
- * Clear all deleted documents from a given open-bit-set according to a given reader
- */
- private static void clearDeleted(final IndexReader reader,
- final OpenBitSet set) throws IOException {
-
+
+ /** Clear all deleted documents from a given open-bit-set according to a given reader */
+ private static void clearDeleted(final IndexReader reader, final FixedBitSet set) throws IOException {
+
// If there are no deleted docs
if (!reader.hasDeletions()) {
return; // return immediately
}
- Bits bits = MultiFields.getLiveDocs(reader);
-
DocIdSetIterator it = set.iterator();
- int doc = DocIdSetIterator.NO_MORE_DOCS;
- while ((doc = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
- if (!bits.get(doc)) {
- set.fastClear(doc);
+ int doc = it.nextDoc();
+ for (AtomicReaderContext context : reader.leaves()) {
+ AtomicReader r = context.reader();
+ final int maxDoc = r.maxDoc() + context.docBase;
+ if (doc >= maxDoc) { // skip this segment
+ continue;
+ }
+ if (!r.hasDeletions()) { // skip all docs that belong to this reader as it has no deletions
+ while ((doc = it.nextDoc()) < maxDoc) {}
+ continue;
}
+ Bits liveDocs = r.getLiveDocs();
+ do {
+ if (!liveDocs.get(doc - context.docBase)) {
+ set.clear(doc);
+ }
+ } while ((doc = it.nextDoc()) < maxDoc);
}
}
-
+
/**
* Create a subset of an existing ScoredDocIDs object.
*
@@ -274,8 +284,7 @@ public class ScoredDocIdsUtils {
if (target <= next) {
target = next + 1;
}
- return next = target >= maxDoc ? NO_MORE_DOCS
- : target;
+ return next = target >= maxDoc ? NO_MORE_DOCS : target;
}
@Override
@@ -420,4 +429,5 @@ public class ScoredDocIdsUtils {
}
}
}
+
}
\ No newline at end of file
Modified: lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/FacetTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/FacetTestBase.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/FacetTestBase.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/FacetTestBase.java Mon Jan 14 18:54:22 2013
@@ -317,8 +317,7 @@ public abstract class FacetTestBase exte
}
/** Validate results equality */
- protected static void assertSameResults(List<FacetResult> expected,
- List<FacetResult> actual) {
+ protected static void assertSameResults(List<FacetResult> expected, List<FacetResult> actual) {
String expectedResults = resStringValueOnly(expected);
String actualResults = resStringValueOnly(actual);
if (!expectedResults.equals(actualResults)) {
Modified: lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/AdaptiveAccumulatorTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/AdaptiveAccumulatorTest.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/AdaptiveAccumulatorTest.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/AdaptiveAccumulatorTest.java Mon Jan 14 18:54:22 2013
@@ -29,12 +29,11 @@ import org.apache.lucene.facet.taxonomy.
public class AdaptiveAccumulatorTest extends BaseSampleTestTopK {
@Override
- protected FacetsAccumulator getSamplingAccumulator(Sampler sampler,
- TaxonomyReader taxoReader, IndexReader indexReader,
- FacetSearchParams searchParams) {
- AdaptiveFacetsAccumulator res = new AdaptiveFacetsAccumulator(searchParams,
- indexReader, taxoReader);
+ protected FacetsAccumulator getSamplingAccumulator(Sampler sampler, TaxonomyReader taxoReader,
+ IndexReader indexReader, FacetSearchParams searchParams) {
+ AdaptiveFacetsAccumulator res = new AdaptiveFacetsAccumulator(searchParams, indexReader, taxoReader);
res.setSampler(sampler);
return res;
}
+
}
Modified: lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/CategoryListIteratorTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/CategoryListIteratorTest.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/CategoryListIteratorTest.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/CategoryListIteratorTest.java Mon Jan 14 18:54:22 2013
@@ -14,6 +14,7 @@ import org.apache.lucene.analysis.tokena
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
@@ -106,30 +107,31 @@ public class CategoryListIteratorTest ex
IndexReader reader = writer.getReader();
writer.close();
- IntsRef ordinals = new IntsRef();
- CategoryListIterator cli = new PayloadCategoryListIteraor(reader, new Term("f","1"), encoder.createMatchingDecoder());
- cli.init();
int totalCategories = 0;
- for (int i = 0; i < data.length; i++) {
- Set<Integer> values = new HashSet<Integer>();
- for (int j = 0; j < data[i].length; j++) {
- values.add(data[i].ints[j]);
- }
- cli.getOrdinals(i, ordinals);
- assertTrue("no ordinals for document " + i, ordinals.length > 0);
- for (int j = 0; j < ordinals.length; j++) {
- assertTrue("expected category not found: " + ordinals.ints[j], values.contains(ordinals.ints[j]));
+ IntsRef ordinals = new IntsRef();
+ CategoryListIterator cli = new PayloadCategoryListIteraor(new Term("f","1"), encoder.createMatchingDecoder());
+ for (AtomicReaderContext context : reader.leaves()) {
+ cli.setNextReader(context);
+ int maxDoc = context.reader().maxDoc();
+ int dataIdx = context.docBase;
+ for (int doc = 0; doc < maxDoc; doc++, dataIdx++) {
+ Set<Integer> values = new HashSet<Integer>();
+ for (int j = 0; j < data[dataIdx].length; j++) {
+ values.add(data[dataIdx].ints[j]);
+ }
+ cli.getOrdinals(doc, ordinals);
+ assertTrue("no ordinals for document " + doc, ordinals.length > 0);
+ for (int j = 0; j < ordinals.length; j++) {
+ assertTrue("expected category not found: " + ordinals.ints[j], values.contains(ordinals.ints[j]));
+ }
+ totalCategories += ordinals.length;
}
- totalCategories += ordinals.length;
}
- assertEquals("Missing categories!",10,totalCategories);
+ assertEquals("Missing categories!", 10, totalCategories);
reader.close();
dir.close();
}
- /**
- * Test that a document with no payloads does not confuse the payload decoder.
- */
@Test
public void testPayloadIteratorWithInvalidDoc() throws Exception {
Directory dir = newDirectory();
@@ -160,24 +162,28 @@ public class CategoryListIteratorTest ex
IndexReader reader = writer.getReader();
writer.close();
- IntsRef ordinals = new IntsRef();
- CategoryListIterator cli = new PayloadCategoryListIteraor(reader, new Term("f","1"), encoder.createMatchingDecoder());
- assertTrue("Failed to initialize payload iterator", cli.init());
int totalCategories = 0;
- for (int i = 0; i < data.length; i++) {
- Set<Integer> values = new HashSet<Integer>();
- for (int j = 0; j < data[i].length; j++) {
- values.add(data[i].ints[j]);
- }
- cli.getOrdinals(i, ordinals);
- if (i == 0) {
- assertTrue("document 0 must have a payload", ordinals.length > 0);
- for (int j = 0; j < ordinals.length; j++) {
- assertTrue("expected category not found: " + ordinals.ints[j], values.contains(ordinals.ints[j]));
+ IntsRef ordinals = new IntsRef();
+ CategoryListIterator cli = new PayloadCategoryListIteraor(new Term("f","1"), encoder.createMatchingDecoder());
+ for (AtomicReaderContext context : reader.leaves()) {
+ cli.setNextReader(context);
+ int maxDoc = context.reader().maxDoc();
+ int dataIdx = context.docBase;
+ for (int doc = 0; doc < maxDoc; doc++, dataIdx++) {
+ Set<Integer> values = new HashSet<Integer>();
+ for (int j = 0; j < data[dataIdx].length; j++) {
+ values.add(data[dataIdx].ints[j]);
+ }
+ cli.getOrdinals(doc, ordinals);
+ if (dataIdx == 0) {
+ assertTrue("document 0 must have a payload", ordinals.length > 0);
+ for (int j = 0; j < ordinals.length; j++) {
+ assertTrue("expected category not found: " + ordinals.ints[j], values.contains(ordinals.ints[j]));
+ }
+ totalCategories += ordinals.length;
+ } else {
+ assertTrue("only document 0 should have a payload", ordinals.length == 0);
}
- totalCategories += ordinals.length;
- } else {
- assertTrue("only document 0 should have a payload", ordinals.length == 0);
}
}
assertEquals("Wrong number of total categories!", 2, totalCategories);
Modified: lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/TestCategoryListCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/TestCategoryListCache.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/TestCategoryListCache.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/TestCategoryListCache.java Mon Jan 14 18:54:22 2013
@@ -22,6 +22,7 @@ import org.apache.lucene.facet.search.pa
import org.apache.lucene.facet.search.params.FacetSearchParams;
import org.apache.lucene.facet.search.results.FacetResult;
import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.index.AtomicReaderContext;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -132,8 +133,8 @@ public class TestCategoryListCache exten
}
}
@Override
- public boolean init() throws IOException {
- return it.init();
+ public boolean setNextReader(AtomicReaderContext context) throws IOException {
+ return it.setNextReader(context);
}
};
}
Modified: lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/params/MultiCategoryListIteratorTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/params/MultiCategoryListIteratorTest.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/params/MultiCategoryListIteratorTest.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/params/MultiCategoryListIteratorTest.java Mon Jan 14 18:54:22 2013
@@ -17,6 +17,7 @@ import org.apache.lucene.facet.taxonomy.
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.facet.util.MultiCategoryListIterator;
+import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
@@ -100,21 +101,24 @@ public class MultiCategoryListIteratorTe
clCache.loadAndRegister(clp, indexReader, taxoReader, indexingParams);
iterators[i] = clCache.get(clp).iterator(0); // no partitions
} else {
- iterators[i] = new PayloadCategoryListIteraor(indexReader, clp.getTerm(), decoder);
+ iterators[i] = new PayloadCategoryListIteraor(clp.getTerm(), decoder);
}
}
MultiCategoryListIterator cli = new MultiCategoryListIterator(iterators);
- assertTrue("failed to init multi-iterator", cli.init());
- IntsRef ordinals = new IntsRef();
- int maxDoc = indexReader.maxDoc();
- for (int i = 0; i < maxDoc; i++) {
- cli.getOrdinals(i, ordinals);
- assertTrue("document " + i + " does not have categories", ordinals.length > 0);
- for (int j = 0; j < ordinals.length; j++) {
- CategoryPath cp = taxoReader.getPath(ordinals.ints[j]);
- assertNotNull("ordinal " + ordinals.ints[j] + " not found in taxonomy", cp);
- if (cp.length == 2) {
- assertEquals("invalid category for document " + i, i, Integer.parseInt(cp.components[1]));
+ for (AtomicReaderContext context : indexReader.leaves()) {
+ assertTrue("failed to init multi-iterator", cli.setNextReader(context));
+ IntsRef ordinals = new IntsRef();
+ final int maxDoc = context.reader().maxDoc();
+ for (int i = 0; i < maxDoc; i++) {
+ cli.getOrdinals(i, ordinals);
+ assertTrue("document " + i + " does not have categories", ordinals.length > 0);
+ for (int j = 0; j < ordinals.length; j++) {
+ CategoryPath cp = taxoReader.getPath(ordinals.ints[j]);
+ assertNotNull("ordinal " + ordinals.ints[j] + " not found in taxonomy", cp);
+ if (cp.length == 2) {
+ int globalDoc = i + context.docBase;
+ assertEquals("invalid category for document " + globalDoc, globalDoc, Integer.parseInt(cp.components[1]));
+ }
}
}
}
Modified: lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/sampling/BaseSampleTestTopK.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/sampling/BaseSampleTestTopK.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/sampling/BaseSampleTestTopK.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/sampling/BaseSampleTestTopK.java Mon Jan 14 18:54:22 2013
@@ -59,9 +59,8 @@ public abstract class BaseSampleTestTopK
return res;
}
- protected abstract FacetsAccumulator getSamplingAccumulator(Sampler sampler,
- TaxonomyReader taxoReader, IndexReader indexReader,
- FacetSearchParams searchParams);
+ protected abstract FacetsAccumulator getSamplingAccumulator(Sampler sampler, TaxonomyReader taxoReader,
+ IndexReader indexReader, FacetSearchParams searchParams);
/**
* Try out faceted search with sampling enabled and complements either disabled or enforced
@@ -89,7 +88,7 @@ public abstract class BaseSampleTestTopK
// try several times in case of failure, because the test has a chance to fail
// if the top K facets are not sufficiently common with the sample set
- for (int nTrial=0; nTrial<RETRIES; nTrial++) {
+ for (int nTrial = 0; nTrial < RETRIES; nTrial++) {
try {
// complement with sampling!
final Sampler sampler = createSampler(nTrial, docCollector.getScoredDocIDs(), useRandomSampler);
@@ -99,7 +98,7 @@ public abstract class BaseSampleTestTopK
break; // succeeded
} catch (NotSameResultError e) {
- if (nTrial>=RETRIES-1) {
+ if (nTrial >= RETRIES - 1) {
throw e; // no more retries allowed, must fail
}
}
@@ -119,14 +118,11 @@ public abstract class BaseSampleTestTopK
assertSameResults(expected, sampledResults);
}
- private FacetsCollector samplingCollector(
- final boolean complement,
- final Sampler sampler,
+ private FacetsCollector samplingCollector(final boolean complement, final Sampler sampler,
FacetSearchParams samplingSearchParams) {
FacetsCollector samplingFC = new FacetsCollector(samplingSearchParams, indexReader, taxoReader) {
@Override
- protected FacetsAccumulator initFacetsAccumulator(
- FacetSearchParams facetSearchParams, IndexReader indexReader,
+ protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams, IndexReader indexReader,
TaxonomyReader taxonomyReader) {
FacetsAccumulator acc = getSamplingAccumulator(sampler, taxonomyReader, indexReader, facetSearchParams);
acc.setComplementThreshold(complement ? FacetsAccumulator.FORCE_COMPLEMENT : FacetsAccumulator.DISABLE_COMPLEMENT);
@@ -144,12 +140,13 @@ public abstract class BaseSampleTestTopK
samplingParams.setMinSampleSize((int) (100 * retryFactor));
samplingParams.setMaxSampleSize((int) (10000 * retryFactor));
samplingParams.setOversampleFactor(5.0 * retryFactor);
+ samplingParams.setSamplingThreshold(11000); //force sampling
- samplingParams.setSamplingThreshold(11000); //force sampling
Sampler sampler = useRandomSampler ?
new RandomSampler(samplingParams, new Random(random().nextLong())) :
new RepeatableSampler(samplingParams);
assertTrue("must enable sampling for this test!",sampler.shouldSample(scoredDocIDs));
return sampler;
}
+
}
Modified: lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/util/TestScoredDocIDsUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/util/TestScoredDocIDsUtils.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/util/TestScoredDocIDsUtils.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/util/TestScoredDocIDsUtils.java Mon Jan 14 18:54:22 2013
@@ -9,6 +9,9 @@ import org.apache.lucene.document.Docume
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
+import org.apache.lucene.facet.search.ScoredDocIDs;
+import org.apache.lucene.facet.search.ScoredDocIDsIterator;
+import org.apache.lucene.facet.search.ScoredDocIdCollector;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
@@ -21,14 +24,9 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.OpenBitSet;
-import org.apache.lucene.util.OpenBitSetDISI;
-import org.junit.Test;
-
+import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.facet.search.ScoredDocIDs;
-import org.apache.lucene.facet.search.ScoredDocIDsIterator;
-import org.apache.lucene.facet.search.ScoredDocIdCollector;
+import org.junit.Test;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -52,21 +50,21 @@ public class TestScoredDocIDsUtils exten
@Test
public void testComplementIterator() throws Exception {
final int n = atLeast(10000);
- final OpenBitSet bits = new OpenBitSet(n);
- for (int i = 0; i < 5 * n; i++) {
- bits.flip(random().nextInt(n));
+ final FixedBitSet bits = new FixedBitSet(n);
+ Random random = random();
+ for (int i = 0; i < n; i++) {
+ int idx = random.nextInt(n);
+ bits.flip(idx, idx + 1);
}
- OpenBitSet verify = new OpenBitSet(n);
- verify.or(bits);
+ FixedBitSet verify = new FixedBitSet(bits);
ScoredDocIDs scoredDocIDs = ScoredDocIdsUtils.createScoredDocIds(bits, n);
Directory dir = newDirectory();
- IndexReader reader = createReaderWithNDocs(random(), n, dir);
+ IndexReader reader = createReaderWithNDocs(random, n, dir);
try {
- assertEquals(n - verify.cardinality(), ScoredDocIdsUtils.getComplementSet(scoredDocIDs,
- reader).size());
+ assertEquals(n - verify.cardinality(), ScoredDocIdsUtils.getComplementSet(scoredDocIDs, reader).size());
} finally {
reader.close();
dir.close();
@@ -147,7 +145,7 @@ public class TestScoredDocIDsUtils exten
searcher.search(q, collector);
ScoredDocIDs scoredDocIds = collector.getScoredDocIDs();
- OpenBitSet resultSet = new OpenBitSetDISI(scoredDocIds.getDocIDs().iterator(), reader.maxDoc());
+ FixedBitSet resultSet = (FixedBitSet) scoredDocIds.getDocIDs();
// Getting the complement set of the query result
ScoredDocIDs complementSet = ScoredDocIdsUtils.getComplementSet(scoredDocIds, reader);
@@ -164,12 +162,11 @@ public class TestScoredDocIDsUtils exten
assertFalse(
"Complement-Set must not contain deleted documents (doc="+docNum+")",
live != null && !live.get(docNum));
- assertNull(
- "Complement-Set must not contain docs from the original set (doc="+ docNum+")",
+ assertNull("Complement-Set must not contain docs from the original set (doc="+ docNum+")",
reader.document(docNum).getField("del"));
assertFalse(
"Complement-Set must not contain docs from the original set (doc="+docNum+")",
- resultSet.fastGet(docNum));
+ resultSet.get(docNum));
}
} finally {
reader.close();
Modified: lucene/dev/branches/lucene4547/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java (original)
+++ lucene/dev/branches/lucene4547/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java Mon Jan 14 18:54:22 2013
@@ -24,6 +24,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.fst.*;
+import org.apache.lucene.util.packed.PackedInts;
/**
* Finite state automata based implementation of "autocomplete" functionality.
@@ -237,7 +238,8 @@ public class FSTCompletionBuilder {
final Object empty = outputs.getNoOutput();
final Builder<Object> builder = new Builder<Object>(
FST.INPUT_TYPE.BYTE1, 0, 0, true, true,
- shareMaxTailLength, outputs, null, false, true);
+ shareMaxTailLength, outputs, null, false,
+ PackedInts.DEFAULT, true, 15);
BytesRef scratch = new BytesRef();
BytesRef entry;
Modified: lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java (original)
+++ lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java Mon Jan 14 18:54:22 2013
@@ -40,6 +40,7 @@ import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util._TestUtil;
+import org.apache.lucene.util.packed.PackedInts;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
@@ -288,7 +289,16 @@ public class FSTTester<T> {
outputs,
null,
willRewrite,
- true);
+ PackedInts.DEFAULT,
+ true,
+ 15);
+ if (LuceneTestCase.VERBOSE) {
+ if (willRewrite) {
+ System.out.println("TEST: packed FST");
+ } else {
+ System.out.println("TEST: non-packed FST");
+ }
+ }
for(InputOutput<T> pair : pairs) {
if (pair.output instanceof List) {
Modified: lucene/dev/branches/lucene4547/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/solr/CHANGES.txt?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/solr/CHANGES.txt (original)
+++ lucene/dev/branches/lucene4547/solr/CHANGES.txt Mon Jan 14 18:54:22 2013
@@ -41,8 +41,6 @@ Detailed Change List
Other Changes
----------------------
-* SOLR-3735: Relocate the example mime-to-extension mapping, and
- upgrade Velocity Engine to 1.7 (ehatcher)
================== 4.1.0 ==================
@@ -50,14 +48,14 @@ Versions of Major Components
---------------------
Apache Tika 1.2
Carrot2 3.6.2
-Velocity 1.6.4 and Velocity Tools 2.0
+Velocity 1.7 and Velocity Tools 2.0
Apache UIMA 2.3.1
Apache ZooKeeper 3.4.5
Upgrading from Solr 4.0.0
----------------------
-Custom java parsing plugins need to migrade from throwing the internal
+Custom java parsing plugins need to migrate from throwing the internal
ParseException to throwing SyntaxError.
BaseDistributedSearchTestCase now randomizes the servlet context it uses when
@@ -150,7 +148,7 @@ New Features
CoreAdmin API the same way as the data directory. (Mark Miller)
* SOLR-4028: When using ZK chroot, it would be nice if Solr would create the
- initial path when it doesn't exist. (Tomas Fernandez Lobbe via Mark Miller)
+ initial path when it doesn't exist. (Tomás Fernández Löbbe via Mark Miller)
* SOLR-3948: Calculate/display deleted documents in admin interface.
(Shawn Heisey via Mark Miller)
@@ -209,6 +207,9 @@ New Features
* SOLR-2201: DIH's "formatDate" function now supports a timezone as an optional
fourth parameter (James Dyer, Mark Waddle)
+* SOLR-4302: New parameter 'indexInfo' (defaults to true) in CoreAdmin STATUS
+ command can be used to omit index specific information (Shahar Davidson via shalin)
+
Optimizations
----------------------
@@ -226,12 +227,12 @@ Optimizations
dynamicField's (steffkes)
* SOLR-3941: The "commitOnLeader" part of distributed recovery can use
- openSearcher=false. (Tomas Fernandez Lobbe via Mark Miller)
+ openSearcher=false. (Tomás Fernández Löbbe via Mark Miller)
* SOLR-4063: Allow CoreContainer to load multiple SolrCores in parallel rather
than just serially. (Mark Miller)
-* SOLR-4199: When doing zk retries due to connectionloss, rather than just
+* SOLR-4199: When doing zk retries due to connection loss, rather than just
retrying for 2 minutes, retry in proportion to the session timeout.
(Mark Miller)
@@ -250,6 +251,10 @@ Optimizations
* SOLR-3982: Admin UI: Various Dataimport Improvements (steffkes)
+* SOLR-4296: Admin UI: Improve Dataimport Auto-Refresh (steffkes)
+
+* SOLR-3458: Allow multiple Items to stay open on Plugins-Page (steffkes)
+
Bug Fixes
----------------------
@@ -362,7 +367,7 @@ Bug Fixes
* SOLR-4081: QueryParsing.toString, used during debugQuery=true, did not
correctly handle ExtendedQueries such as WrappedQuery
- (used when cache=false), spatial queries, and frange queires.
+ (used when cache=false), spatial queries, and frange queries.
(Eirik Lygre, yonik)
* SOLR-3959: Ensure the internal comma separator of poly fields is escaped
@@ -403,7 +408,7 @@ Bug Fixes
* SOLR-4162: ZkCli usage examples are not correct because the zkhost parameter
is not present and it is mandatory for all commands.
- (Tomas Fernandez Lobbe via Mark Miller)
+ (Tomás Fernández Löbbe via Mark Miller)
* SOLR-4071: Validate that name is pass to Collections API create, and behave the
same way as on startup when collection.configName is not explicitly passed.
@@ -495,7 +500,7 @@ Bug Fixes
* SOLR-4279: Wrong exception message if _version_ field is multivalued (shalin)
* SOLR-4170: The 'backup' ReplicationHandler command can sometimes use a stale
- index directory rather than the current one. (Mark Miller, Marcin Rzewuck)
+ index directory rather than the current one. (Mark Miller, Marcin Rzewucki)
* SOLR-3876: Solr Admin UI is completely dysfunctional on IE 9 (steffkes)
@@ -503,6 +508,17 @@ Bug Fixes
import works fine with SolrCloud clusters (Deniz Durmus, James Dyer,
Erick Erickson, shalin)
+* SOLR-4291: Harden the Overseer work queue thread loop. (Mark Miller)
+
+* SOLR-3820: Solr Admin Query form is missing some edismax request parameters
+ (steffkes)
+
+* SOLR-4217: post.jar no longer ignores -Dparams when -Durl is used.
+ (Alexandre Rafalovitch, ehatcher)
+
+* SOLR-4303: On replication, if the generation of the master is lower than the
+ slave we need to force a full copy of the index. (Mark Miller, Gregg Donovan)
+
Other Changes
----------------------
@@ -580,6 +596,16 @@ Other Changes
* SOLR-4208: ExtendedDismaxQParserPlugin has been refactored to make
subclassing easier. (Tomás Fernández Löbbe, hossman)
+* SOLR-3735: Relocate the example mime-to-extension mapping, and
+ upgrade Velocity Engine to 1.7 (ehatcher)
+
+* SOLR-4287: Removed "apache-" prefix from Solr distribution and artifact
+ filenames. (Ryan Ernst, Robert Muir, Steve Rowe)
+
+* SOLR-4016: Deduplication does not work with atomic/partial updates so
+ disallow atomic update requests which change signature generating fields.
+ (Joel Nothman, yonik, shalin)
+
================== 4.0.0 ==================
Versions of Major Components
@@ -862,7 +888,7 @@ Bug Fixes
* SOLR-3527: SolrCmdDistributor drops some of the important commit attributes
(maxOptimizeSegments, softCommit, expungeDeletes) when sending a commit to
- replicas. (Andy Laird, Tomas Fernandez Lobbe, Mark Miller)
+ replicas. (Andy Laird, Tomás Fernández Löbbe, Mark Miller)
* SOLR-3844: SolrCore reload can fail because it tries to remove the index
write lock while already holding it. (Mark Miller)
@@ -1273,7 +1299,7 @@ New Features
* SOLR-571: The autowarmCount for LRUCaches (LRUCache and FastLRUCache) now
supports "percentages" which get evaluated relative the current size of
the cache when warming happens.
- (Tomas Fernandez Lobbe and hossman)
+ (Tomás Fernández Löbbe and hossman)
* SOLR-1932: New relevancy function queries: termfreq, tf, docfreq, idf
norm, maxdoc, numdocs. (yonik)
@@ -1644,12 +1670,12 @@ Bug Fixes
down to it via acceptDocs since LUCENE-1536. (Mike Hugo, yonik)
* SOLR-3214: If you use multiple fl entries rather than a comma separated list, all but the first
- entry can be ignored if you are using distributed search. (Tomas Fernandez Lobbe via Mark Miller)
+ entry can be ignored if you are using distributed search. (Tomás Fernández Löbbe via Mark Miller)
* SOLR-3352: eDismax: pf2 should kick in for a query with 2 terms (janhoy)
* SOLR-3361: ReplicationHandler "maxNumberOfBackups" doesn't work if backups are triggered on commit
- (James Dyer, Tomas Fernandez Lobbe)
+ (James Dyer, Tomás Fernández Löbbe)
* SOLR-2605: fixed tracking of the 'defaultCoreName' in CoreContainer so that
CoreAdminHandler could return consistent information regardless of wether
@@ -1864,7 +1890,17 @@ Documentation
* SOLR-2232: Improved README info on solr.solr.home in examples
(Eric Pugh and hossman)
-
+
+================== 3.6.2 ==================
+
+Bug Fixes
+----------------------
+* SOLR-3790: ConcurrentModificationException could be thrown when using hl.fl=*.
+ (yonik, koji)
+
+* SOLR-3589: Edismax parser does not honor mm parameter if analyzer splits a token.
+ (Tom Burton-West, Robert Muir)
+
================== 3.6.1 ==================
More information about this release, including any errata related to the
release notes, upgrade instructions, or other changes may be found online at:
@@ -1877,7 +1913,7 @@ Bug Fixes
(Uwe Schindler, Mike McCandless, Robert Muir)
* SOLR-3361: ReplicationHandler "maxNumberOfBackups" doesn't work if backups are triggered on commit
- (James Dyer, Tomas Fernandez Lobbe)
+ (James Dyer, Tomás Fernández Löbbe)
* SOLR-3375: Fix charset problems with HttpSolrServer (Roger HÃ¥kansson, yonik, siren)
Modified: lucene/dev/branches/lucene4547/solr/README.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/solr/README.txt?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/solr/README.txt (original)
+++ lucene/dev/branches/lucene4547/solr/README.txt Mon Jan 14 18:54:22 2013
@@ -45,11 +45,11 @@ example/
Please see example/README.txt for information about running this
example.
-dist/apache-solr-XX.war
+dist/solr-XX.war
The Apache Solr Application. Deploy this WAR file to any servlet
container to run Apache Solr.
-dist/apache-solr-<component>-XX.jar
+dist/solr-<component>-XX.jar
The Apache Solr libraries. To compile Apache Solr Plugins,
one or more of these will be required. The core library is
required at a minimum. (see http://wiki.apache.org/solr/SolrPlugins
Modified: lucene/dev/branches/lucene4547/solr/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/solr/common-build.xml?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/solr/common-build.xml (original)
+++ lucene/dev/branches/lucene4547/solr/common-build.xml Mon Jan 14 18:54:22 2013
@@ -25,7 +25,7 @@
<property name="Name" value="Solr" />
<property name="version" value="5.0-SNAPSHOT"/>
- <property name="fullname" value="apache-${ant.project.name}"/>
+ <property name="fullname" value="${ant.project.name}"/>
<property name="fullnamever" value="${fullname}-${version}"/>
<property name="final.name" value="${fullnamever}"/>
@@ -114,7 +114,7 @@
<attribute name="property" default="@{name}.uptodate"/>
<attribute name="classpath.property" default="@{name}.jar"/>
<!-- set jarfile only, if the target jar file has no generic name -->
- <attribute name="jarfile" default="${common-solr.dir}/build/contrib/solr-@{name}/apache-solr-@{name}-${version}.jar"/>
+ <attribute name="jarfile" default="${common-solr.dir}/build/contrib/solr-@{name}/solr-@{name}-${version}.jar"/>
<sequential>
<!--<echo message="Checking '@{jarfile}' against source folder '${common.dir}/contrib/@{name}/src/java'"/>-->
<property name="@{classpath.property}" location="@{jarfile}"/>
@@ -214,13 +214,13 @@
</target>
<target name="check-solr-core-javadocs-uptodate" unless="solr-core-javadocs.uptodate">
- <uptodate property="solr-core-javadocs.uptodate" targetfile="${build.dir}/solr-core/apache-solr-core-${version}-javadoc.jar">
+ <uptodate property="solr-core-javadocs.uptodate" targetfile="${build.dir}/solr-core/solr-core-${version}-javadoc.jar">
<srcfiles dir="${common-solr.dir}/core/src/java" includes="**/*.java"/>
</uptodate>
</target>
<target name="check-solrj-javadocs-uptodate" unless="solrj-javadocs.uptodate">
- <uptodate property="solrj-javadocs.uptodate" targetfile="${build.dir}/solr-solrj/apache-solr-solrj-${version}-javadoc.jar">
+ <uptodate property="solrj-javadocs.uptodate" targetfile="${build.dir}/solr-solrj/solr-solrj-${version}-javadoc.jar">
<srcfiles dir="${common-solr.dir}/solrj/src/java" includes="**/*.java"/>
</uptodate>
</target>
Modified: lucene/dev/branches/lucene4547/solr/contrib/uima/README.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/solr/contrib/uima/README.txt?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/solr/contrib/uima/README.txt (original)
+++ lucene/dev/branches/lucene4547/solr/contrib/uima/README.txt Mon Jan 14 18:54:22 2013
@@ -19,7 +19,7 @@ To start using Solr UIMA Metadata Extrac
<lib dir="../../contrib/uima/lib" />
<lib dir="../../contrib/uima/lucene-libs" />
- <lib dir="../../dist/" regex="apache-solr-uima-\d.*\.jar" />
+ <lib dir="../../dist/" regex="solr-uima-\d.*\.jar" />
2. modify your schema.xml adding the fields you want to be hold metadata specifying proper values for type, indexed, stored and multiValued options:
Modified: lucene/dev/branches/lucene4547/solr/contrib/uima/src/test-files/uima/solr/collection1/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/solr/contrib/uima/src/test-files/uima/solr/collection1/conf/solrconfig.xml?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/solr/contrib/uima/src/test-files/uima/solr/collection1/conf/solrconfig.xml (original)
+++ lucene/dev/branches/lucene4547/solr/contrib/uima/src/test-files/uima/solr/collection1/conf/solrconfig.xml Mon Jan 14 18:54:22 2013
@@ -44,8 +44,8 @@
in that directory which completely match the regex (anchored on both
ends) will be included.
-->
- <lib dir="../../dist/" regex="apache-solr-cell-\d.*\.jar" />
- <lib dir="../../dist/" regex="apache-solr-clustering-\d.*\.jar" />
+ <lib dir="../../dist/" regex="solr-cell-\d.*\.jar" />
+ <lib dir="../../dist/" regex="solr-clustering-\d.*\.jar" />
<!--
If a dir option (with or without a regex) is used and nothing is
found that matches, it will be ignored