You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/08/14 14:33:37 UTC
svn commit: r1372858 - in /lucene/dev/trunk/lucene: ./
core/src/java/org/apache/lucene/codecs/
core/src/java/org/apache/lucene/codecs/lucene40/
core/src/java/org/apache/lucene/index/
core/src/test/org/apache/lucene/index/ facet/src/examples/org/apache/...
Author: rmuir
Date: Tue Aug 14 12:33:36 2012
New Revision: 1372858
URL: http://svn.apache.org/viewvc?rev=1372858&view=rev
Log:
LUCENE-4304: remove PayloadProcessorProvider, map facet ords using a FilterAtomicReader
Added:
lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/index/OrdinalMappingAtomicReader.java (with props)
Removed:
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/PayloadProcessorProvider.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestPayloadProcessorProvider.java
lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/index/FacetsPayloadProcessorProvider.java
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/TermsConsumer.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MergeState.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestDoc.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java
lucene/dev/trunk/lucene/facet/src/examples/org/apache/lucene/facet/example/merge/TaxonomyMergeUtils.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1372858&r1=1372857&r2=1372858&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Tue Aug 14 12:33:36 2012
@@ -35,6 +35,13 @@ API Changes
the leaf atomic reader contexts for all readers in the tree.
(Uwe Schindler, Robert Muir)
+* LUCENE-4304: removed PayloadProcessorProvider. If you want to change
+ payloads (or other things) when merging indexes, its recommended
+ to just use a FilterAtomicReader + IndexWriter.addIndexes. See the
+ OrdinalMappingAtomicReader and TaxonomyMergeUtils in the facets
+ module if you want an example of this.
+ (Mike McCandless, Uwe Schindler, Shai Erera, Robert Muir)
+
Bug Fixes
* LUCENE-4297: BooleanScorer2 would multiply the coord() factor
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java?rev=1372858&r1=1372857&r2=1372858&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java Tue Aug 14 12:33:36 2012
@@ -123,18 +123,7 @@ public final class MappingMultiDocsAndPo
@Override
public BytesRef getPayload() throws IOException {
- BytesRef payload = current.getPayload();
- if (mergeState.currentPayloadProcessor[upto] != null && payload != null) {
- // to not violate the D&P api, we must give the processor a private copy
- // TODO: reuse a BytesRef if there is a PPP
- payload = BytesRef.deepCopyOf(payload);
- mergeState.currentPayloadProcessor[upto].processPayload(payload);
- if (payload.length == 0) {
- // don't let PayloadProcessors corrumpt the index
- return null;
- }
- }
- return payload;
+ return current.getPayload();
}
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java?rev=1372858&r1=1372857&r2=1372858&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java Tue Aug 14 12:33:36 2012
@@ -27,8 +27,6 @@ import org.apache.lucene.index.FieldInfo
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.MergeState;
-import org.apache.lucene.index.PayloadProcessorProvider.PayloadProcessor;
-import org.apache.lucene.index.PayloadProcessorProvider.ReaderPayloadProcessor;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
@@ -170,12 +168,7 @@ public abstract class TermVectorsWriter
final AtomicReader reader = mergeState.readers.get(i);
final int maxDoc = reader.maxDoc();
final Bits liveDocs = reader.getLiveDocs();
- // set PayloadProcessor
- if (mergeState.payloadProcessorProvider != null) {
- mergeState.currentReaderPayloadProcessor = mergeState.readerPayloadProcessor[i];
- } else {
- mergeState.currentReaderPayloadProcessor = null;
- }
+
for (int docID = 0; docID < maxDoc; docID++) {
if (liveDocs != null && !liveDocs.get(docID)) {
// skip deleted docs
@@ -215,9 +208,6 @@ public abstract class TermVectorsWriter
TermsEnum termsEnum = null;
DocsAndPositionsEnum docsAndPositionsEnum = null;
- final ReaderPayloadProcessor readerPayloadProcessor = mergeState.currentReaderPayloadProcessor;
- PayloadProcessor payloadProcessor = null;
-
for(String fieldName : vectors) {
final FieldInfo fieldInfo = mergeState.fieldInfos.fieldInfo(fieldName);
@@ -250,10 +240,6 @@ public abstract class TermVectorsWriter
final int freq = (int) termsEnum.totalTermFreq();
startTerm(termsEnum.term(), freq);
-
- if (hasPayloads && readerPayloadProcessor != null) {
- payloadProcessor = readerPayloadProcessor.getProcessor(fieldName, termsEnum.term());
- }
if (hasPositions || hasOffsets) {
docsAndPositionsEnum = termsEnum.docsAndPositions(null, docsAndPositionsEnum);
@@ -268,17 +254,7 @@ public abstract class TermVectorsWriter
final int startOffset = docsAndPositionsEnum.startOffset();
final int endOffset = docsAndPositionsEnum.endOffset();
- BytesRef payload = docsAndPositionsEnum.getPayload();
-
- if (payloadProcessor != null && payload != null) {
- // to not violate the D&P api, we must give the processor a private copy
- payload = BytesRef.deepCopyOf(payload);
- payloadProcessor.processPayload(payload);
- if (payload.length == 0) {
- // don't let PayloadProcessors corrumpt the index
- payload = null;
- }
- }
+ final BytesRef payload = docsAndPositionsEnum.getPayload();
assert !hasPositions || pos >= 0;
addPosition(pos, startOffset, endOffset, payload);
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/TermsConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/TermsConsumer.java?rev=1372858&r1=1372857&r2=1372858&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/TermsConsumer.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/TermsConsumer.java Tue Aug 14 12:33:36 2012
@@ -154,14 +154,7 @@ public abstract class TermsConsumer {
postingsEnumIn = (MultiDocsAndPositionsEnum) termsEnum.docsAndPositions(null, postingsEnumIn, DocsAndPositionsEnum.FLAG_PAYLOADS);
assert postingsEnumIn != null;
postingsEnum.reset(postingsEnumIn);
- // set PayloadProcessor
- if (mergeState.payloadProcessorProvider != null) {
- for (int i = 0; i < mergeState.readers.size(); i++) {
- if (mergeState.readerPayloadProcessor[i] != null) {
- mergeState.currentPayloadProcessor[i] = mergeState.readerPayloadProcessor[i].getProcessor(mergeState.fieldInfo.name, term);
- }
- }
- }
+
final PostingsConsumer postingsConsumer = startTerm(term);
final TermStats stats = postingsConsumer.merge(mergeState, postingsEnum, visitedDocs);
if (stats.docFreq > 0) {
@@ -188,14 +181,7 @@ public abstract class TermsConsumer {
postingsEnumIn = (MultiDocsAndPositionsEnum) termsEnum.docsAndPositions(null, postingsEnumIn);
assert postingsEnumIn != null;
postingsEnum.reset(postingsEnumIn);
- // set PayloadProcessor
- if (mergeState.payloadProcessorProvider != null) {
- for (int i = 0; i < mergeState.readers.size(); i++) {
- if (mergeState.readerPayloadProcessor[i] != null) {
- mergeState.currentPayloadProcessor[i] = mergeState.readerPayloadProcessor[i].getProcessor(mergeState.fieldInfo.name, term);
- }
- }
- }
+
final PostingsConsumer postingsConsumer = startTerm(term);
final TermStats stats = postingsConsumer.merge(mergeState, postingsEnum, visitedDocs);
if (stats.docFreq > 0) {
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java?rev=1372858&r1=1372857&r2=1372858&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java Tue Aug 14 12:33:36 2012
@@ -315,12 +315,7 @@ public final class Lucene40TermVectorsWr
int numDocs = 0;
for (int i = 0; i < mergeState.readers.size(); i++) {
final AtomicReader reader = mergeState.readers.get(i);
- // set PayloadProcessor
- if (mergeState.payloadProcessorProvider != null) {
- mergeState.currentReaderPayloadProcessor = mergeState.readerPayloadProcessor[i];
- } else {
- mergeState.currentReaderPayloadProcessor = null;
- }
+
final SegmentReader matchingSegmentReader = mergeState.matchingSegmentReaders[idx++];
Lucene40TermVectorsReader matchingVectorsReader = null;
if (matchingSegmentReader != null) {
@@ -353,8 +348,8 @@ public final class Lucene40TermVectorsWr
final int maxDoc = reader.maxDoc();
final Bits liveDocs = reader.getLiveDocs();
int totalNumDocs = 0;
- if (matchingVectorsReader != null && mergeState.currentReaderPayloadProcessor == null) {
- // We can bulk-copy because the fieldInfos are "congruent" and there is no payload processor
+ if (matchingVectorsReader != null) {
+ // We can bulk-copy because the fieldInfos are "congruent"
for (int docNum = 0; docNum < maxDoc;) {
if (!liveDocs.get(docNum)) {
// skip deleted docs
@@ -404,8 +399,8 @@ public final class Lucene40TermVectorsWr
int rawDocLengths2[])
throws IOException {
final int maxDoc = reader.maxDoc();
- if (matchingVectorsReader != null && mergeState.currentReaderPayloadProcessor == null) {
- // We can bulk-copy because the fieldInfos are "congruent" and there is no payload processor
+ if (matchingVectorsReader != null) {
+ // We can bulk-copy because the fieldInfos are "congruent"
int docCount = 0;
while (docCount < maxDoc) {
int len = Math.min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java?rev=1372858&r1=1372857&r2=1372858&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java Tue Aug 14 12:33:36 2012
@@ -260,9 +260,6 @@ public class IndexWriter implements Clos
// to allow users to query an IndexWriter settings.
private final LiveIndexWriterConfig config;
- // The PayloadProcessorProvider to use when segments are merged
- private PayloadProcessorProvider payloadProcessorProvider;
-
DirectoryReader getReader() throws IOException {
return getReader(true);
}
@@ -2406,8 +2403,7 @@ public class IndexWriter implements Clos
false, codec, null, null);
SegmentMerger merger = new SegmentMerger(info, infoStream, trackingDir, config.getTermIndexInterval(),
- MergeState.CheckAbort.NONE, payloadProcessorProvider,
- globalFieldNumberMap, context);
+ MergeState.CheckAbort.NONE, globalFieldNumberMap, context);
for (IndexReader reader : readers) { // add new indexes
merger.add(reader);
@@ -3510,7 +3506,7 @@ public class IndexWriter implements Clos
final TrackingDirectoryWrapper dirWrapper = new TrackingDirectoryWrapper(directory);
SegmentMerger merger = new SegmentMerger(merge.info.info, infoStream, dirWrapper, config.getTermIndexInterval(), checkAbort,
- payloadProcessorProvider, globalFieldNumberMap, context);
+ globalFieldNumberMap, context);
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "merging " + segString(merge.segments));
@@ -4065,38 +4061,6 @@ public class IndexWriter implements Clos
synchronized void deletePendingFiles() throws IOException {
deleter.deletePendingFiles();
}
-
- /**
- * Sets the {@link PayloadProcessorProvider} to use when merging payloads.
- * Note that the given <code>pcp</code> will be invoked for every segment that
- * is merged, not only external ones that are given through
- * {@link #addIndexes}. If you want only the payloads of the external segments
- * to be processed, you can return <code>null</code> whenever a
- * {@link PayloadProcessorProvider.ReaderPayloadProcessor} is requested for the {@link Directory} of the
- * {@link IndexWriter}.
- * <p>
- * The default is <code>null</code> which means payloads are processed
- * normally (copied) during segment merges. You can also unset it by passing
- * <code>null</code>.
- * <p>
- * <b>NOTE:</b> the set {@link PayloadProcessorProvider} will be in effect
- * immediately, potentially for already running merges too. If you want to be
- * sure it is used for further operations only, such as {@link #addIndexes} or
- * {@link #forceMerge}, you can call {@link #waitForMerges()} before.
- */
- public void setPayloadProcessorProvider(PayloadProcessorProvider pcp) {
- ensureOpen();
- payloadProcessorProvider = pcp;
- }
-
- /**
- * Returns the {@link PayloadProcessorProvider} that is used during segment
- * merges to process payloads.
- */
- public PayloadProcessorProvider getPayloadProcessorProvider() {
- ensureOpen();
- return payloadProcessorProvider;
- }
/**
* NOTE: this method creates a compound file for all files returned by
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MergeState.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MergeState.java?rev=1372858&r1=1372857&r2=1372858&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MergeState.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MergeState.java Tue Aug 14 12:33:36 2012
@@ -19,8 +19,6 @@ package org.apache.lucene.index;
import java.util.List;
-import org.apache.lucene.index.PayloadProcessorProvider.PayloadProcessor;
-import org.apache.lucene.index.PayloadProcessorProvider.ReaderPayloadProcessor;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.InfoStream;
@@ -194,14 +192,6 @@ public class MergeState {
// Updated per field;
public FieldInfo fieldInfo;
- // Used to process payloads
- // TODO: this is a FactoryFactory here basically
- // and we could make a codec(wrapper) to do all of this privately so IW is uninvolved
- public PayloadProcessorProvider payloadProcessorProvider;
- public ReaderPayloadProcessor[] readerPayloadProcessor;
- public ReaderPayloadProcessor currentReaderPayloadProcessor;
- public PayloadProcessor[] currentPayloadProcessor;
-
// TODO: get rid of this? it tells you which segments are 'aligned' (e.g. for bulk merging)
// but is this really so expensive to compute again in different components, versus once in SM?
public SegmentReader[] matchingSegmentReaders;
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java?rev=1372858&r1=1372857&r2=1372858&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java Tue Aug 14 12:33:36 2012
@@ -56,13 +56,11 @@ final class SegmentMerger {
// note, just like in codec apis Directory 'dir' is NOT the same as segmentInfo.dir!!
SegmentMerger(SegmentInfo segmentInfo, InfoStream infoStream, Directory dir, int termIndexInterval,
- MergeState.CheckAbort checkAbort, PayloadProcessorProvider payloadProcessorProvider,
- FieldInfos.FieldNumbers fieldNumbers, IOContext context) {
+ MergeState.CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context) {
mergeState.segmentInfo = segmentInfo;
mergeState.infoStream = infoStream;
mergeState.readers = new ArrayList<AtomicReader>();
mergeState.checkAbort = checkAbort;
- mergeState.payloadProcessorProvider = payloadProcessorProvider;
directory = dir;
this.termIndexInterval = termIndexInterval;
this.codec = segmentInfo.getCodec();
@@ -274,8 +272,6 @@ final class SegmentMerger {
// Remap docIDs
mergeState.docMaps = new MergeState.DocMap[numReaders];
mergeState.docBase = new int[numReaders];
- mergeState.readerPayloadProcessor = new PayloadProcessorProvider.ReaderPayloadProcessor[numReaders];
- mergeState.currentPayloadProcessor = new PayloadProcessorProvider.PayloadProcessor[numReaders];
int docBase = 0;
@@ -289,10 +285,6 @@ final class SegmentMerger {
mergeState.docMaps[i] = docMap;
docBase += docMap.numDocs();
- if (mergeState.payloadProcessorProvider != null) {
- mergeState.readerPayloadProcessor[i] = mergeState.payloadProcessorProvider.getReaderProcessor(reader);
- }
-
i++;
}
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestDoc.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestDoc.java?rev=1372858&r1=1372857&r2=1372858&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestDoc.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestDoc.java Tue Aug 14 12:33:36 2012
@@ -204,7 +204,7 @@ public class TestDoc extends LuceneTestC
final SegmentInfo si = new SegmentInfo(si1.info.dir, Constants.LUCENE_MAIN_VERSION, merged, -1, false, codec, null, null);
SegmentMerger merger = new SegmentMerger(si, InfoStream.getDefault(), trackingDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL,
- MergeState.CheckAbort.NONE, null, new FieldInfos.FieldNumbers(), context);
+ MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), context);
merger.add(r1);
merger.add(r2);
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java?rev=1372858&r1=1372857&r2=1372858&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java Tue Aug 14 12:33:36 2012
@@ -82,7 +82,7 @@ public class TestSegmentMerger extends L
final SegmentInfo si = new SegmentInfo(mergedDir, Constants.LUCENE_MAIN_VERSION, mergedSegment, -1, false, codec, null, null);
SegmentMerger merger = new SegmentMerger(si, InfoStream.getDefault(), mergedDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL,
- MergeState.CheckAbort.NONE, null, new FieldInfos.FieldNumbers(), newIOContext(random()));
+ MergeState.CheckAbort.NONE, new FieldInfos.FieldNumbers(), newIOContext(random()));
merger.add(reader1);
merger.add(reader2);
MergeState mergeState = merger.merge();
Modified: lucene/dev/trunk/lucene/facet/src/examples/org/apache/lucene/facet/example/merge/TaxonomyMergeUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/examples/org/apache/lucene/facet/example/merge/TaxonomyMergeUtils.java?rev=1372858&r1=1372857&r2=1372858&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/examples/org/apache/lucene/facet/example/merge/TaxonomyMergeUtils.java (original)
+++ lucene/dev/trunk/lucene/facet/src/examples/org/apache/lucene/facet/example/merge/TaxonomyMergeUtils.java Tue Aug 14 12:33:36 2012
@@ -1,17 +1,20 @@
package org.apache.lucene.facet.example.merge;
import java.io.IOException;
+import java.util.List;
+import org.apache.lucene.index.AtomicReader;
+import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DirectoryReader;
-import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.PayloadProcessorProvider;
+import org.apache.lucene.index.MultiReader;
import org.apache.lucene.store.Directory;
import org.apache.lucene.facet.example.ExampleUtils;
-import org.apache.lucene.facet.index.FacetsPayloadProcessorProvider;
+import org.apache.lucene.facet.index.OrdinalMappingAtomicReader;
import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams;
+import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.DiskOrdinalMap;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.MemoryOrdinalMap;
@@ -84,13 +87,17 @@ public class TaxonomyMergeUtils {
// merge the taxonomies
destTaxWriter.addTaxonomy(srcTaxDir, map);
- PayloadProcessorProvider payloadProcessor = new FacetsPayloadProcessorProvider(
- srcIndexDir, map.getMap(), new DefaultFacetIndexingParams());
- destIndexWriter.setPayloadProcessorProvider(payloadProcessor);
+ int ordinalMap[] = map.getMap();
+ FacetIndexingParams params = new DefaultFacetIndexingParams();
- IndexReader reader = DirectoryReader.open(srcIndexDir);
+ DirectoryReader reader = DirectoryReader.open(srcIndexDir, -1);
+ List<AtomicReaderContext> leaves = reader.leaves();
+ AtomicReader wrappedLeaves[] = new AtomicReader[leaves.size()];
+ for (int i = 0; i < leaves.size(); i++) {
+ wrappedLeaves[i] = new OrdinalMappingAtomicReader(leaves.get(i).reader(), ordinalMap, params);
+ }
try {
- destIndexWriter.addIndexes(reader);
+ destIndexWriter.addIndexes(new MultiReader(wrappedLeaves));
// commit changes to taxonomy and index respectively.
destTaxWriter.commit();
Added: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/index/OrdinalMappingAtomicReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/index/OrdinalMappingAtomicReader.java?rev=1372858&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/index/OrdinalMappingAtomicReader.java (added)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/index/OrdinalMappingAtomicReader.java Tue Aug 14 12:33:36 2012
@@ -0,0 +1,224 @@
+package org.apache.lucene.facet.index;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.facet.index.params.CategoryListParams;
+import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams;
+import org.apache.lucene.facet.index.params.FacetIndexingParams;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.OrdinalMap;
+import org.apache.lucene.index.AtomicReader;
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.FilterAtomicReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.encoding.IntDecoder;
+import org.apache.lucene.util.encoding.IntEncoder;
+
+/**
+ * A {@link FilterAtomicReader} for updating facets ordinal references,
+ * based on an ordinal map. You should use this code in conjunction with merging
+ * taxonomies - after you merge taxonomies, you receive an {@link OrdinalMap}
+ * which maps the 'old' payloads to the 'new' ones. You can use that map to
+ * re-map the payloads which contain the facets information (ordinals) either
+ * before or while merging the indexes.
+ * <p>
+ * For re-mapping the ordinals during index merge, do the following:
+ *
+ * <pre class="prettyprint">
+ * // merge the old taxonomy with the new one.
+ * OrdinalMap map = DirectoryTaxonomyWriter.addTaxonomies();
+ * int[] ordmap = map.getMap();
+ *
+ * // Add the index and re-map ordinals on the go
+ * DirectoryReader reader = DirectoryReader.open(oldDir);
+ * IndexWriterConfig conf = new IndexWriterConfig(VER, ANALYZER);
+ * IndexWriter writer = new IndexWriter(newDir, conf);
+ * List<AtomicReaderContext> leaves = reader.leaves();
+ * AtomicReader wrappedLeaves[] = new AtomicReader[leaves.size()];
+ * for (int i = 0; i < leaves.size(); i++) {
+ * wrappedLeaves[i] = new OrdinalMappingAtomicReader(leaves.get(i).reader(), ordmap);
+ * }
+ * writer.addIndexes(new MultiReader(wrappedLeaves));
+ * writer.commit();
+ * </pre>
+ *
+ * @lucene.experimental
+ */
+public class OrdinalMappingAtomicReader extends FilterAtomicReader {
+ private final int[] ordinalMap;
+ // a little obtuse: but we dont need to create Term objects this way
+ private final Map<String,Map<BytesRef,CategoryListParams>> termMap =
+ new HashMap<String,Map<BytesRef,CategoryListParams>>(1);
+
+ /**
+ * Wraps an AtomicReader, mapping ordinals according to the ordinalMap.
+ * Calls {@link #OrdinalMappingAtomicReader(AtomicReader, int[], FacetIndexingParams)
+ * OrdinalMappingAtomicReader(in, ordinalMap, new DefaultFacetIndexingParams())}
+ */
+ public OrdinalMappingAtomicReader(AtomicReader in, int[] ordinalMap) {
+ this(in, ordinalMap, new DefaultFacetIndexingParams());
+ }
+
+ /**
+ * Wraps an AtomicReader, mapping ordinals according to the ordinalMap,
+ * using the provided indexingParams.
+ */
+ public OrdinalMappingAtomicReader(AtomicReader in, int[] ordinalMap, FacetIndexingParams indexingParams) {
+ super(in);
+ this.ordinalMap = ordinalMap;
+ for (CategoryListParams params: indexingParams.getAllCategoryListParams()) {
+ Term term = params.getTerm();
+ Map<BytesRef,CategoryListParams> fieldMap = termMap.get(term.field());
+ if (fieldMap == null) {
+ fieldMap = new HashMap<BytesRef,CategoryListParams>(1);
+ termMap.put(term.field(), fieldMap);
+ }
+ fieldMap.put(term.bytes(), params);
+ }
+ }
+
+ @Override
+ public Fields getTermVectors(int docID) throws IOException {
+ Fields fields = super.getTermVectors(docID);
+ if (fields == null) {
+ return null;
+ } else {
+ return new OrdinalMappingFields(fields);
+ }
+ }
+
+ @Override
+ public Fields fields() throws IOException {
+ Fields fields = super.fields();
+ if (fields == null) {
+ return null;
+ } else {
+ return new OrdinalMappingFields(fields);
+ }
+ }
+
+ private class OrdinalMappingFields extends FilterFields {
+
+ public OrdinalMappingFields(Fields in) {
+ super(in);
+ }
+
+ @Override
+ public Terms terms(String field) throws IOException {
+ Terms terms = super.terms(field);
+ if (terms == null) {
+ return terms;
+ }
+ Map<BytesRef,CategoryListParams> termsMap = termMap.get(field);
+ if (termsMap == null) {
+ return terms;
+ } else {
+ return new OrdinalMappingTerms(terms, termsMap);
+ }
+ }
+ }
+
+ private class OrdinalMappingTerms extends FilterTerms {
+ private final Map<BytesRef,CategoryListParams> termsMap;
+
+ public OrdinalMappingTerms(Terms in, Map<BytesRef,CategoryListParams> termsMap) {
+ super(in);
+ this.termsMap = termsMap;
+ }
+
+ @Override
+ public TermsEnum iterator(TermsEnum reuse) throws IOException {
+ // TODO: should we reuse the inner termsenum?
+ return new OrdinalMappingTermsEnum(super.iterator(reuse), termsMap);
+ }
+ }
+
+ private class OrdinalMappingTermsEnum extends FilterTermsEnum {
+ private final Map<BytesRef,CategoryListParams> termsMap;
+
+ public OrdinalMappingTermsEnum(TermsEnum in, Map<BytesRef,CategoryListParams> termsMap) {
+ super(in);
+ this.termsMap = termsMap;
+ }
+
+ @Override
+ public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
+ // TODO: we could reuse our D&P enum if we need
+ DocsAndPositionsEnum inner = super.docsAndPositions(liveDocs, reuse, flags);
+ if (inner == null) {
+ return inner;
+ }
+
+ CategoryListParams params = termsMap.get(term());
+ if (params == null) {
+ return inner;
+ }
+
+ return new OrdinalMappingDocsAndPositionsEnum(inner, params);
+ }
+ }
+
+ private class OrdinalMappingDocsAndPositionsEnum extends FilterDocsAndPositionsEnum {
+ private final IntEncoder encoder;
+ private final IntDecoder decoder;
+ private final ByteArrayOutputStream os = new ByteArrayOutputStream();
+ private final BytesRef payloadOut = new BytesRef();
+
+ public OrdinalMappingDocsAndPositionsEnum(DocsAndPositionsEnum in, CategoryListParams params) {
+ super(in);
+ encoder = params.createEncoder();
+ decoder = encoder.createMatchingDecoder();
+ }
+
+ @Override
+ public BytesRef getPayload() throws IOException {
+ BytesRef payload = super.getPayload();
+ if (payload == null) {
+ return payload;
+ } else {
+ InputStream is = new ByteArrayInputStream(payload.bytes, payload.offset, payload.length);
+ decoder.reInit(is);
+ os.reset();
+ encoder.reInit(os);
+ long ordinal;
+ while ((ordinal = decoder.decode()) != IntDecoder.EOS) {
+ int newOrdinal = ordinalMap[(int)ordinal];
+ encoder.encode(newOrdinal);
+ }
+ encoder.close();
+ // TODO (Facet): avoid copy?
+ byte out[] = os.toByteArray();
+ payloadOut.bytes = out;
+ payloadOut.offset = 0;
+ payloadOut.length = out.length;
+ return payloadOut;
+ }
+ }
+ }
+}