You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/08/03 01:05:29 UTC
svn commit: r1368749 - in /lucene/dev/branches/branch_4x: ./ lucene/
lucene/core/ lucene/core/src/java/org/apache/lucene/index/
lucene/core/src/java/org/apache/lucene/search/
lucene/core/src/test/org/apache/lucene/index/
Author: mikemccand
Date: Thu Aug 2 23:05:29 2012
New Revision: 1368749
URL: http://svn.apache.org/viewvc?rev=1368749&view=rev
Log:
LUCENE-4203: add IndexWriter.tryDeleteDocument
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/lucene/ (props changed)
lucene/dev/branches/branch_4x/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_4x/lucene/core/ (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/AtomicReaderContext.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/IndexReaderContext.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/NRTManager.java
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java
Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1368749&r1=1368748&r2=1368749&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Thu Aug 2 23:05:29 2012
@@ -42,6 +42,11 @@ New features
implementations to optimize the enum implementation. (Robert Muir,
Mike McCandless)
+* LUCENE-4203: Add IndexWriter.tryDeleteDocument(AtomicReader reader,
+ int docID), to attempt deletion by docID as long as the provided
+ reader is an NRT reader, and the segment has not yet been merged
+ away (Mike McCandless).
+
API Changes
* LUCENE-4138: update of morfologik (Polish morphological analyzer) to 1.5.3.
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/AtomicReaderContext.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/AtomicReaderContext.java?rev=1368749&r1=1368748&r2=1368749&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/AtomicReaderContext.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/AtomicReaderContext.java Thu Aug 2 23:05:29 2012
@@ -1,8 +1,5 @@
package org.apache.lucene.index;
-import java.util.Collections;
-import java.util.List;
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -20,6 +17,9 @@ import java.util.List;
* limitations under the License.
*/
+import java.util.Collections;
+import java.util.List;
+
/**
* {@link IndexReaderContext} for {@link AtomicReader} instances
* @lucene.experimental
@@ -51,8 +51,9 @@ public final class AtomicReaderContext e
@Override
public List<AtomicReaderContext> leaves() {
- if (!isTopLevel)
+ if (!isTopLevel) {
throw new UnsupportedOperationException("This is not a top-level context.");
+ }
assert leaves != null;
return leaves;
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/IndexReaderContext.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/IndexReaderContext.java?rev=1368749&r1=1368748&r2=1368749&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/IndexReaderContext.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/IndexReaderContext.java Thu Aug 2 23:05:29 2012
@@ -1,7 +1,5 @@
package org.apache.lucene.index;
-import java.util.List;
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -19,6 +17,8 @@ import java.util.List;
* limitations under the License.
*/
+import java.util.List;
+
/**
* A struct like class that represents a hierarchical relationship between
* {@link IndexReader} instances.
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java?rev=1368749&r1=1368748&r2=1368749&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java Thu Aug 2 23:05:29 2012
@@ -1243,6 +1243,78 @@ public class IndexWriter implements Clos
}
}
+ /** Expert: attempts to delete by document ID, as long as
+ * the provided reader is a near-real-time reader (from {@link
+ * DirectoryReader#open(IndexWriter,boolean)}). If the
+ * provided reader is an NRT reader obtained from this
+ * writer, and its segment has not been merged away, then
+ * the delete succeeds and this method returns true; else, it
+ * returns false the caller must then separately delete by
+ * Term or Query.
+ *
+ * <b>NOTE</b>: this method can only delete documents
+ * visible to the currently open NRT reader. If you need
+ * to delete documents indexed after opening the NRT
+ * reader you must use the other deleteDocument methods
+ * (e.g., {@link #deleteDocuments(Term)}). */
+ public synchronized boolean tryDeleteDocument(IndexReader readerIn, int docID) throws IOException {
+
+ final AtomicReader reader;
+ if (readerIn instanceof AtomicReader) {
+ // Reader is already atomic: use the incoming docID:
+ reader = (AtomicReader) readerIn;
+ } else {
+ // Composite reader: lookup sub-reader and re-base docID:
+ List<AtomicReaderContext> leaves = readerIn.getTopReaderContext().leaves();
+ int subIndex = ReaderUtil.subIndex(docID, leaves);
+ reader = leaves.get(subIndex).reader();
+ docID -= leaves.get(subIndex).docBase;
+ assert docID >= 0;
+ assert docID < reader.maxDoc();
+ }
+
+ if (!(reader instanceof SegmentReader)) {
+ throw new IllegalArgumentException("the reader must be a SegmentReader or composite reader containing only SegmentReaders");
+ }
+
+ final SegmentInfoPerCommit info = ((SegmentReader) reader).getSegmentInfo();
+
+ // TODO: this is a slow linear search, but, number of
+ // segments should be contained unless something is
+ // seriously wrong w/ the index, so it should be a minor
+ // cost:
+
+ if (segmentInfos.indexOf(info) != -1) {
+ ReadersAndLiveDocs rld = readerPool.get(info, false);
+ if (rld != null) {
+ synchronized(bufferedDeletesStream) {
+ rld.initWritableLiveDocs();
+ if (rld.delete(docID)) {
+ final int fullDelCount = rld.info.getDelCount() + rld.getPendingDeleteCount();
+ if (fullDelCount == rld.info.info.getDocCount()) {
+ // If a merge has already registered for this
+ // segment, we leave it in the readerPool; the
+ // merge will skip merging it and will then drop
+ // it once it's done:
+ if (!mergingSegments.contains(rld.info)) {
+ segmentInfos.remove(rld.info);
+ readerPool.drop(rld.info);
+ checkpoint();
+ }
+ }
+ }
+ //System.out.println(" yes " + info.info.name + " " + docID);
+ return true;
+ }
+ } else {
+ //System.out.println(" no rld " + info.info.name + " " + docID);
+ }
+ } else {
+ //System.out.println(" no seg " + info.info.name + " " + docID);
+ }
+ return false;
+ }
+
/**
* Deletes the document(s) containing any of the
* terms. All given deletes are applied and flushed atomically
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/NRTManager.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/NRTManager.java?rev=1368749&r1=1368748&r2=1368749&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/NRTManager.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/NRTManager.java Thu Aug 2 23:05:29 2012
@@ -27,6 +27,7 @@ import java.util.concurrent.locks.Reentr
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.SegmentInfoPerCommit;
import org.apache.lucene.index.IndexReader; // javadocs
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexableField;
@@ -254,6 +255,14 @@ public class NRTManager extends Referenc
long getAndIncrementGeneration() {
return indexingGen.getAndIncrement();
}
+
+ public long tryDeleteDocument(IndexReader reader, int docID) throws IOException {
+ if (writer.tryDeleteDocument(reader, docID)) {
+ return indexingGen.get();
+ } else {
+ return -1;
+ }
+ }
}
/**
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java?rev=1368749&r1=1368748&r2=1368749&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java Thu Aug 2 23:05:29 2012
@@ -23,6 +23,9 @@ import org.apache.lucene.analysis.MockAn
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
import org.apache.lucene.document.*;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.*;
import org.apache.lucene.util.*;
import org.junit.Test;
@@ -48,10 +51,13 @@ public class TestRollingUpdates extends
final int SIZE = atLeast(20);
int id = 0;
IndexReader r = null;
+ IndexSearcher s = null;
final int numUpdates = (int) (SIZE * (2+(TEST_NIGHTLY ? 200*random().nextDouble() : 5*random().nextDouble())));
if (VERBOSE) {
System.out.println("TEST: numUpdates=" + numUpdates);
}
+ int updateCount = 0;
+ // TODO: sometimes update ids not in order...
for(int docIter=0;docIter<numUpdates;docIter++) {
final Document doc = docs.nextDoc();
final String myID = ""+id;
@@ -60,16 +66,59 @@ public class TestRollingUpdates extends
} else {
id++;
}
+ if (VERBOSE) {
+ System.out.println(" docIter=" + docIter + " id=" + id);
+ }
((Field) doc.getField("docid")).setStringValue(myID);
- w.updateDocument(new Term("docid", myID), doc);
+
+ Term idTerm = new Term("docid", myID);
+
+ final boolean doUpdate;
+ if (s != null && updateCount < SIZE) {
+ TopDocs hits = s.search(new TermQuery(idTerm), 1);
+ assertEquals(1, hits.totalHits);
+ doUpdate = !w.tryDeleteDocument(r, hits.scoreDocs[0].doc);
+ if (VERBOSE) {
+ if (doUpdate) {
+ System.out.println(" tryDeleteDocument failed");
+ } else {
+ System.out.println(" tryDeleteDocument succeeded");
+ }
+ }
+ } else {
+ doUpdate = true;
+ if (VERBOSE) {
+ System.out.println(" no searcher: doUpdate=true");
+ }
+ }
+
+ updateCount++;
+
+ if (doUpdate) {
+ w.updateDocument(idTerm, doc);
+ } else {
+ w.addDocument(doc);
+ }
if (docIter >= SIZE && random().nextInt(50) == 17) {
if (r != null) {
r.close();
}
+
final boolean applyDeletions = random().nextBoolean();
+
+ if (VERBOSE) {
+ System.out.println("TEST: reopen applyDeletions=" + applyDeletions);
+ }
+
r = w.getReader(applyDeletions);
+ if (applyDeletions) {
+ s = new IndexSearcher(r);
+ } else {
+ s = null;
+ }
assertTrue("applyDeletions=" + applyDeletions + " r.numDocs()=" + r.numDocs() + " vs SIZE=" + SIZE, !applyDeletions || r.numDocs() == SIZE);
+ updateCount = 0;
}
}