You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by mr...@apache.org on 2006/08/31 21:22:45 UTC
svn commit: r439019 - in
/jackrabbit/trunk/jackrabbit/src/main/java/org/apache/jackrabbit/core/query/lucene:
ConsistencyCheck.java ConsistencyCheckError.java MultiIndex.java
SearchIndex.java
Author: mreutegg
Date: Thu Aug 31 12:22:45 2006
New Revision: 439019
URL: http://svn.apache.org/viewvc?rev=439019&view=rev
Log:
JCR-554: Index creates many folders when re-indexing
- delete unused index segments earlier
- use UUID instances instead of String representation of a UUID
Modified:
jackrabbit/trunk/jackrabbit/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheck.java
jackrabbit/trunk/jackrabbit/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheckError.java
jackrabbit/trunk/jackrabbit/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java
jackrabbit/trunk/jackrabbit/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java
Modified: jackrabbit/trunk/jackrabbit/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheck.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheck.java?rev=439019&r1=439018&r2=439019&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheck.java (original)
+++ jackrabbit/trunk/jackrabbit/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheck.java Thu Aug 31 12:22:45 2006
@@ -20,8 +20,8 @@
import org.apache.jackrabbit.core.state.NodeState;
import org.apache.jackrabbit.core.state.ItemStateException;
import org.apache.jackrabbit.core.NodeId;
+import org.apache.jackrabbit.uuid.UUID;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
import org.apache.lucene.document.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -161,8 +161,8 @@
continue;
}
Document d = reader.document(i);
- String uuid = d.get(FieldNames.UUID);
- if (stateMgr.hasItemState(NodeId.valueOf(uuid))) {
+ UUID uuid = UUID.fromString(d.get(FieldNames.UUID));
+ if (stateMgr.hasItemState(new NodeId(uuid))) {
Document old = (Document) documents.put(uuid, d);
if (old != null) {
multipleEntries.add(uuid);
@@ -177,19 +177,23 @@
// create multiple entries errors
for (Iterator it = multipleEntries.iterator(); it.hasNext();) {
- errors.add(new MultipleEntries((String) it.next()));
+ errors.add(new MultipleEntries((UUID) it.next()));
}
// run through documents
for (Iterator it = documents.values().iterator(); it.hasNext();) {
Document d = (Document) it.next();
- String uuid = d.get(FieldNames.UUID);
- String parentUUID = d.get(FieldNames.PARENT);
- if (documents.containsKey(parentUUID) || parentUUID.length() == 0) {
+ UUID uuid = UUID.fromString(d.get(FieldNames.UUID));
+ String parentUUIDString = d.get(FieldNames.PARENT);
+ UUID parentUUID = null;
+ if (parentUUIDString.length() > 0) {
+ parentUUID = UUID.fromString(parentUUIDString);
+ }
+ if (parentUUID == null || documents.containsKey(parentUUID)) {
continue;
}
// parent is missing
- NodeId parentId = NodeId.valueOf(parentUUID);
+ NodeId parentId = new NodeId(parentUUID);
if (stateMgr.hasItemState(parentId)) {
errors.add(new MissingAncestor(uuid, parentUUID));
} else {
@@ -241,9 +245,9 @@
*/
private class MissingAncestor extends ConsistencyCheckError {
- private final String parentUUID;
+ private final UUID parentUUID;
- private MissingAncestor(String uuid, String parentUUID) {
+ private MissingAncestor(UUID uuid, UUID parentUUID) {
super("Parent of " + uuid + " missing in index. Parent: " + parentUUID, uuid);
this.parentUUID = parentUUID;
}
@@ -261,15 +265,15 @@
* @throws IOException if an error occurs while repairing.
*/
public void repair() throws IOException {
- String pUUID = parentUUID;
- while (pUUID != null && !documents.containsKey(pUUID)) {
+ NodeId parentId = new NodeId(parentUUID);
+ while (parentId != null && !documents.containsKey(parentId.getUUID())) {
try {
- NodeState n = (NodeState) stateMgr.getItemState(NodeId.valueOf(pUUID));
+ NodeState n = (NodeState) stateMgr.getItemState(parentId);
log.info("Reparing missing node " + getPath(n));
Document d = index.createDocument(n);
index.addDocument(d);
- documents.put(n.getNodeId().toString(), d);
- pUUID = n.getParentId().toString();
+ documents.put(n.getNodeId().getUUID(), d);
+ parentId = n.getParentId();
} catch (ItemStateException e) {
throw new IOException(e.toString());
} catch (RepositoryException e) {
@@ -284,7 +288,7 @@
*/
private class UnknownParent extends ConsistencyCheckError {
- private UnknownParent(String uuid, String parentUUID) {
+ private UnknownParent(UUID uuid, UUID parentUUID) {
super("Node " + uuid + " has unknown parent: " + parentUUID, uuid);
}
@@ -309,7 +313,7 @@
*/
private class MultipleEntries extends ConsistencyCheckError {
- MultipleEntries(String uuid) {
+ MultipleEntries(UUID uuid) {
super("Multiple entries found for node " + uuid, uuid);
}
@@ -328,15 +332,14 @@
*/
public void repair() throws IOException {
// first remove all occurrences
- Term id = new Term(FieldNames.UUID, uuid);
- index.removeAllDocuments(id);
+ index.removeAllDocuments(uuid);
// then re-index the node
try {
- NodeState node = (NodeState) stateMgr.getItemState(NodeId.valueOf(uuid));
+ NodeState node = (NodeState) stateMgr.getItemState(new NodeId(uuid));
log.info("Re-indexing duplicate node occurrences in index: " + getPath(node));
Document d = index.createDocument(node);
index.addDocument(d);
- documents.put(node.getNodeId().toString(), d);
+ documents.put(node.getNodeId().getUUID(), d);
} catch (ItemStateException e) {
throw new IOException(e.toString());
} catch (RepositoryException e) {
@@ -350,7 +353,7 @@
*/
private class NodeDeleted extends ConsistencyCheckError {
- NodeDeleted(String uuid) {
+ NodeDeleted(UUID uuid) {
super("Node " + uuid + " does not longer exist.", uuid);
}
@@ -368,7 +371,7 @@
*/
public void repair() throws IOException {
log.info("Removing deleted node from index: " + uuid);
- index.removeDocument(new Term(FieldNames.UUID, uuid));
+ index.removeDocument(uuid);
}
}
}
Modified: jackrabbit/trunk/jackrabbit/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheckError.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheckError.java?rev=439019&r1=439018&r2=439019&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheckError.java (original)
+++ jackrabbit/trunk/jackrabbit/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheckError.java Thu Aug 31 12:22:45 2006
@@ -16,6 +16,8 @@
*/
package org.apache.jackrabbit.core.query.lucene;
+import org.apache.jackrabbit.uuid.UUID;
+
import java.io.IOException;
/**
@@ -31,9 +33,9 @@
/**
* The UUID of the affected node.
*/
- protected final String uuid;
+ protected final UUID uuid;
- ConsistencyCheckError(String message, String uuid) {
+ ConsistencyCheckError(String message, UUID uuid) {
this.message = message;
this.uuid = uuid;
}
Modified: jackrabbit/trunk/jackrabbit/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java?rev=439019&r1=439018&r2=439019&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java (original)
+++ jackrabbit/trunk/jackrabbit/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java Thu Aug 31 12:22:45 2006
@@ -22,6 +22,7 @@
import org.apache.jackrabbit.core.state.NoSuchItemStateException;
import org.apache.jackrabbit.core.state.NodeState;
import org.apache.jackrabbit.uuid.Constants;
+import org.apache.jackrabbit.uuid.UUID;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.lucene.document.Document;
@@ -189,6 +190,11 @@
private long currentTransactionId = -1;
/**
+ * Flag indicating whether re-indexing is running.
+ */
+ private boolean reindexing = false;
+
+ /**
* Creates a new MultiIndex.
*
* @param indexDir the base file system
@@ -267,11 +273,13 @@
// do an initial index if there are no indexes at all
if (indexNames.size() == 0) {
+ reindexing = true;
// traverse and index workspace
executeAndLog(new Start(Action.INTERNAL_TRANSACTION));
NodeState rootState = (NodeState) stateMgr.getItemState(rootId);
createIndex(rootState, stateMgr);
executeAndLog(new Commit(getTransactionId()));
+ reindexing = false;
}
} catch (ItemStateException e) {
throw new IOException("Error indexing root node: " + e.getMessage());
@@ -283,7 +291,7 @@
flushTask = new TimerTask() {
public void run() {
checkFlush();
- }
+ }
};
FLUSH_TIMER.schedule(flushTask, 0, 1000);
}
@@ -291,7 +299,7 @@
/**
* Atomically updates the index by removing some documents and adding others.
*
- * @param remove Iterator of <code>Term</code>s that identify documents to
+ * @param remove Iterator of <code>UUID</code>s that identify documents to
* remove
* @param add Iterator of <code>Document</code>s to add. Calls to
* <code>next()</code> on this iterator may return
@@ -308,8 +316,7 @@
boolean flush = false;
while (remove.hasNext()) {
- String uuid = ((Term) remove.next()).text();
- executeAndLog(new DeleteNode(transactionId, uuid));
+ executeAndLog(new DeleteNode(transactionId, (UUID) remove.next()));
}
while (add.hasNext()) {
Document doc = (Document) add.next();
@@ -350,33 +357,34 @@
}
/**
- * Deletes the first document that matches the <code>idTerm</code>.
+ * Deletes the first document that matches the <code>uuid</code>.
*
- * @param idTerm document that match this term will be deleted.
+ * @param uuid document that match this <code>uuid</code> will be deleted.
* @throws IOException if an error occurs while deleting the document.
*/
- void removeDocument(Term idTerm) throws IOException {
- List remove = Arrays.asList(new Term[]{idTerm});
+ void removeDocument(UUID uuid) throws IOException {
+ List remove = Arrays.asList(new UUID[]{uuid});
update(remove.iterator(), EmptyIterator.INSTANCE);
}
/**
- * Deletes all documents that match the <code>idTerm</code>.
+ * Deletes all documents that match the <code>uuid</code>.
*
- * @param idTerm documents that match this term will be deleted.
+ * @param uuid documents that match this <code>uuid</code> will be deleted.
* @return the number of deleted documents.
* @throws IOException if an error occurs while deleting documents.
*/
- synchronized int removeAllDocuments(Term idTerm) throws IOException {
+ synchronized int removeAllDocuments(UUID uuid) throws IOException {
synchronized (updateMonitor) {
updateInProgress = true;
}
int num;
try {
+ Term idTerm = new Term(FieldNames.UUID, uuid.toString());
executeAndLog(new Start(Action.INTERNAL_TRANSACTION));
num = volatileIndex.removeDocument(idTerm);
if (num > 0) {
- redoLog.append(new DeleteNode(getTransactionId(), idTerm.text()));
+ redoLog.append(new DeleteNode(getTransactionId(), uuid));
}
for (int i = 0; i < indexes.size(); i++) {
PersistentIndex index = (PersistentIndex) indexes.get(i);
@@ -384,7 +392,7 @@
if (indexNames.contains(index.getName())) {
int removed = index.removeDocument(idTerm);
if (removed > 0) {
- redoLog.append(new DeleteNode(getTransactionId(), idTerm.text()));
+ redoLog.append(new DeleteNode(getTransactionId(), uuid));
}
num += removed;
}
@@ -490,6 +498,26 @@
}
/**
+ * Returns <code>true</code> if this multi index has an index segment with
+ * the given name. This method even returns <code>true</code> if an index
+ * segments has not yet been loaded / initialized but exists on disk.
+ *
+ * @param indexName the name of the index segment.
+ * @return <code>true</code> if it exists; otherwise <code>false</code>.
+ */
+ synchronized boolean hasIndex(String indexName) {
+ // check existing
+ for (Iterator it = indexes.iterator(); it.hasNext();) {
+ PersistentIndex idx = (PersistentIndex) it.next();
+ if (idx.getName().equals(indexName)) {
+ return true;
+ }
+ }
+ // check if it exists on disk
+ return new File(indexDir, indexName).exists();
+ }
+
+ /**
* Replaces the indexes with names <code>obsoleteIndexes</code> with
* <code>index</code>. Documents that must be deleted in <code>index</code>
* can be identified with <code>Term</code>s in <code>deleted</code>.
@@ -510,7 +538,10 @@
updateInProgress = true;
}
try {
- executeAndLog(new Start(Action.INTERNAL_TRANSACTION));
+ // if we are reindexing there is already an active transaction
+ if (!reindexing) {
+ executeAndLog(new Start(Action.INTERNAL_TRANSACTION));
+ }
// delete obsolete indexes
Set names = new HashSet(Arrays.asList(obsoleteIndexes));
for (Iterator it = names.iterator(); it.hasNext();) {
@@ -521,6 +552,10 @@
}
}
+ // Index merger does not log an action when it creates the target
+ // index of the merge. We have to do this here.
+ executeAndLog(new CreateIndex(getTransactionId(), index.getName(), false));
+
executeAndLog(new AddIndex(getTransactionId(), index.getName()));
// delete documents in index
@@ -530,7 +565,14 @@
}
index.commit();
- executeAndLog(new Commit(getTransactionId()));
+ if (reindexing) {
+ // do some cleanup right away when reindexing
+ attemptDelete();
+ } else {
+ // only commit if we are not reindexing
+ // when reindexing the final commit is done at the very end
+ executeAndLog(new Commit(getTransactionId()));
+ }
} finally {
synchronized (updateMonitor) {
updateInProgress = false;
@@ -767,7 +809,11 @@
throws IOException {
a.execute(this);
redoLog.append(a);
- if (a.getType() == Action.TYPE_COMMIT) {
+ // please note that flushing the redo log is only required on
+ // commit, but we also want to keep track of new indexes for sure.
+ // otherwise it might happen that unused index folders are orphaned
+ // after a crash.
+ if (a.getType() == Action.TYPE_COMMIT || a.getType() == Action.TYPE_ADD_INDEX) {
redoLog.flush();
}
return a;
@@ -840,7 +886,7 @@
if (excludedIDs.contains(id)) {
return;
}
- executeAndLog(new AddNode(getTransactionId(), id.getUUID().toString()));
+ executeAndLog(new AddNode(getTransactionId(), id.getUUID()));
checkVolatileCommit();
List children = node.getChildNodeEntries();
for (Iterator it = children.iterator(); it.hasNext();) {
@@ -1242,7 +1288,7 @@
/**
* The uuid of the node to add.
*/
- private final String uuid;
+ private final UUID uuid;
/**
* The document to add to the index, or <code>null</code> if not available.
@@ -1255,7 +1301,7 @@
* @param transactionId the id of the transaction that executes this action.
* @param uuid the uuid of the node to add.
*/
- AddNode(long transactionId, String uuid) {
+ AddNode(long transactionId, UUID uuid) {
super(transactionId, Action.TYPE_ADD_NODE);
this.uuid = uuid;
}
@@ -1267,7 +1313,7 @@
* @param doc the document to add.
*/
AddNode(long transactionId, Document doc) {
- this(transactionId, doc.get(FieldNames.UUID));
+ this(transactionId, UUID.fromString(doc.get(FieldNames.UUID)));
this.doc = doc;
}
@@ -1288,7 +1334,7 @@
if (arguments.length() != Constants.UUID_FORMATTED_LENGTH) {
throw new IllegalArgumentException("arguments is not a uuid");
}
- return new AddNode(transactionId, arguments);
+ return new AddNode(transactionId, UUID.fromString(arguments));
}
/**
@@ -1299,7 +1345,7 @@
public void execute(MultiIndex index) throws IOException {
if (doc == null) {
try {
- doc = index.createDocument(NodeId.valueOf(uuid));
+ doc = index.createDocument(new NodeId(uuid));
} catch (RepositoryException e) {
// node does not exist anymore
log.debug(e.getMessage());
@@ -1429,8 +1475,10 @@
* @inheritDoc
*/
public void undo(MultiIndex index) throws IOException {
- PersistentIndex idx = index.getOrCreateIndex(indexName, false);
- index.deleteIndex(idx);
+ if (index.hasIndex(indexName)) {
+ PersistentIndex idx = index.getOrCreateIndex(indexName, false);
+ index.deleteIndex(idx);
+ }
}
/**
@@ -1539,7 +1587,7 @@
/**
* The uuid of the node to remove.
*/
- private final String uuid;
+ private final UUID uuid;
/**
* Creates a new DeleteNode action.
@@ -1547,7 +1595,7 @@
* @param transactionId the id of the transaction that executes this action.
* @param uuid the uuid of the node to delete.
*/
- DeleteNode(long transactionId, String uuid) {
+ DeleteNode(long transactionId, UUID uuid) {
super(transactionId, Action.TYPE_DELETE_NODE);
this.uuid = uuid;
}
@@ -1567,7 +1615,7 @@
if (arguments.length() != Constants.UUID_FORMATTED_LENGTH) {
throw new IllegalArgumentException("arguments is not a uuid");
}
- return new DeleteNode(transactionId, arguments);
+ return new DeleteNode(transactionId, UUID.fromString(arguments));
}
/**
@@ -1576,7 +1624,7 @@
* @inheritDoc
*/
public void execute(MultiIndex index) throws IOException {
- Term idTerm = new Term(FieldNames.UUID, uuid);
+ Term idTerm = new Term(FieldNames.UUID, uuid.toString());
// if the document cannot be deleted from the volatile index
// delete it from one of the persistent indexes.
int num = index.volatileIndex.removeDocument(idTerm);
Modified: jackrabbit/trunk/jackrabbit/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java?rev=439019&r1=439018&r2=439019&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java (original)
+++ jackrabbit/trunk/jackrabbit/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java Thu Aug 31 12:22:45 2006
@@ -35,7 +35,6 @@
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
-import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.search.Hits;
@@ -290,8 +289,7 @@
checkOpen();
index.update(new AbstractIteratorDecorator(remove) {
public Object next() {
- NodeId id = (NodeId) super.next();
- return new Term(FieldNames.UUID, id.getUUID().toString());
+ return ((NodeId) super.next()).getUUID();
}
}, new AbstractIteratorDecorator(add) {
public Object next() {