You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by al...@apache.org on 2013/06/14 15:13:48 UTC
svn commit: r1493056 - in
/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene:
LuceneIndexEditor.java LuceneIndexEditorContext.java
Author: alexparvulescu
Date: Fri Jun 14 13:13:48 2013
New Revision: 1493056
URL: http://svn.apache.org/r1493056
Log:
OAK-860 Streamline the IndexEditor
- added a context object for the lucene index which also keeps track of the number of indexed nodes
Added:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java (with props)
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java?rev=1493056&r1=1493055&r2=1493056&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java Fri Jun 14 13:13:48 2013
@@ -22,20 +22,10 @@ import static org.apache.jackrabbit.oak.
import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newFulltextField;
import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newPathField;
import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newPropertyField;
-import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.ANALYZER;
-import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.INCLUDE_PROPERTY_TYPES;
-import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.INDEX_DATA_CHILD_NAME;
-import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PERSISTENCE_PATH;
-import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.VERSION;
import static org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newPathTerm;
-import static org.apache.lucene.store.NoLockFactory.getNoLockFactory;
-import java.io.File;
import java.io.IOException;
import java.io.InputStream;
-import java.util.concurrent.atomic.AtomicLong;
-
-import javax.jcr.PropertyType;
import org.apache.jackrabbit.JcrConstants;
import org.apache.jackrabbit.oak.api.Blob;
@@ -45,21 +35,14 @@ import org.apache.jackrabbit.oak.api.Typ
import org.apache.jackrabbit.oak.commons.PathUtils;
import org.apache.jackrabbit.oak.plugins.index.IndexEditor;
import org.apache.jackrabbit.oak.plugins.index.lucene.aggregation.AggregatedState;
-import org.apache.jackrabbit.oak.plugins.index.lucene.aggregation.NodeAggregator;
import org.apache.jackrabbit.oak.spi.commit.Editor;
import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.search.PrefixQuery;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
import org.apache.tika.sax.WriteOutContentHandler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -75,102 +58,31 @@ public class LuceneIndexEditor implement
private static final Logger log =
LoggerFactory.getLogger(LuceneIndexEditor.class);
- private static final IndexWriterConfig config = getIndexWriterConfig();
-
- private static final NodeAggregator aggregator = new NodeAggregator();
-
- private static final Parser parser = new AutoDetectParser();
-
- private AtomicLong indexedNodes;
-
- private static IndexWriterConfig getIndexWriterConfig() {
- // FIXME: Hack needed to make Lucene work in an OSGi environment
- Thread thread = Thread.currentThread();
- ClassLoader loader = thread.getContextClassLoader();
- thread.setContextClassLoader(IndexWriterConfig.class.getClassLoader());
- try {
- IndexWriterConfig config = new IndexWriterConfig(VERSION, ANALYZER);
- config.setMergeScheduler(new SerialMergeScheduler());
- return config;
- } finally {
- thread.setContextClassLoader(loader);
- }
- }
-
- private static Directory newIndexDirectory(NodeBuilder definition)
- throws CommitFailedException {
- String path = getString(definition, PERSISTENCE_PATH);
- if (path == null) {
- return new ReadWriteOakDirectory(
- definition.child(INDEX_DATA_CHILD_NAME));
- } else {
- try {
- File file = new File(path);
- file.mkdirs();
- // TODO: close() is never called
- // TODO: no locking used
- // --> using the FS backend for the index is in any case
- // troublesome in clustering scenarios and for backup
- // etc. so instead of fixing these issues we'd better
- // work on making the in-content index work without
- // problems (or look at the Solr indexer as alternative)
- return FSDirectory.open(file, getNoLockFactory());
- } catch (IOException e) {
- throw new CommitFailedException(
- "Lucene", 1, "Failed to open the index in " + path, e);
- }
- }
- }
-
- /** Parent editor, or {@code null} if this is the root editor. */
- private final LuceneIndexEditor parent;
+ private final LuceneIndexEditorContext context;
/** Name of this node, or {@code null} for the root node. */
private final String name;
+ /** Parent editor or {@code null} if this is the root editor. */
+ private final LuceneIndexEditor parent;
+
/** Path of this editor, built lazily in {@link #getPath()}. */
private String path;
- /** Index definition node builder */
- private final NodeBuilder definition;
-
- private final int propertyTypes;
-
- private IndexWriter writer = null;
-
private boolean propertiesChanged = false;
LuceneIndexEditor(NodeBuilder definition) throws CommitFailedException {
this.parent = null;
this.name = null;
this.path = "/";
- this.definition = definition;
-
- PropertyState ps = definition.getProperty(INCLUDE_PROPERTY_TYPES);
- if (ps != null) {
- int types = 0;
- for (String inc : ps.getValue(Type.STRINGS)) {
- try {
- types |= 1 << PropertyType.valueFromName(inc);
- } catch (IllegalArgumentException e) {
- log.warn("Unknown property type: " + inc);
- }
- }
- this.propertyTypes = types;
- } else {
- this.propertyTypes = -1;
- }
- this.indexedNodes = new AtomicLong(0);
+ this.context = new LuceneIndexEditorContext(definition);
}
private LuceneIndexEditor(LuceneIndexEditor parent, String name) {
this.parent = parent;
this.name = name;
this.path = null;
- this.definition = parent.definition;
- this.writer = parent.writer;
- this.propertyTypes = parent.propertyTypes;
- this.indexedNodes = parent.indexedNodes;
+ this.context = parent.context;
}
public String getPath() {
@@ -183,14 +95,6 @@ public class LuceneIndexEditor implement
@Override
public void enter(NodeState before, NodeState after)
throws CommitFailedException {
- if (parent == null) {
- try {
- writer = new IndexWriter(newIndexDirectory(definition), config);
- } catch (IOException e) {
- throw new CommitFailedException(
- "Lucene", 2, "Unable to create a new index writer", e);
- }
- }
}
@Override
@@ -199,13 +103,13 @@ public class LuceneIndexEditor implement
if (propertiesChanged || !before.exists()) {
String path = getPath();
try {
- writer.updateDocument(
- newPathTerm(path), makeDocument(path, after));
+ context.getWriter().updateDocument(newPathTerm(path),
+ makeDocument(path, after));
} catch (IOException e) {
throw new CommitFailedException(
"Lucene", 3, "Failed to index the node " + path, e);
}
- long indexed = indexedNodes.incrementAndGet();
+ long indexed = context.incIndexedNodes();
if (indexed % 1000 == 0) {
log.debug("Indexed {} nodes...", indexed);
}
@@ -213,14 +117,13 @@ public class LuceneIndexEditor implement
if (parent == null) {
try {
- writer.close();
+ context.closeWriter();
} catch (IOException e) {
throw new CommitFailedException("Lucene", 4,
"Failed to close the Lucene index", e);
}
- long indexed = indexedNodes.get();
- if (indexed > 0) {
- log.debug("Indexed {} nodes, done.", indexed);
+ if (context.getIndexedNodes() > 0) {
+ log.debug("Indexed {} nodes, done.", context.getIndexedNodes());
}
}
}
@@ -257,6 +160,7 @@ public class LuceneIndexEditor implement
String path = PathUtils.concat(getPath(), name);
try {
+ IndexWriter writer = context.getWriter();
// Remove all index entries in the removed subtree
writer.deleteDocuments(newPathTerm(path));
writer.deleteDocuments(new PrefixQuery(newPathTerm(path + "/")));
@@ -276,7 +180,7 @@ public class LuceneIndexEditor implement
for (PropertyState property : state.getProperties()) {
String pname = property.getName();
if (isVisible(pname)
- && (propertyTypes & (1 << property.getType().tag())) != 0) {
+ && (context.getPropertyTypes() & (1 << property.getType().tag())) != 0) {
if (Type.BINARY.tag() == property.getType().tag()) {
addBinaryValue(document, property, state);
} else {
@@ -288,11 +192,11 @@ public class LuceneIndexEditor implement
}
}
- for (AggregatedState agg : aggregator.getAggregates(state)) {
+ for (AggregatedState agg : context.getAggregator().getAggregates(state)) {
for (PropertyState property : agg.getProperties()) {
String pname = property.getName();
if (isVisible(pname)
- && (propertyTypes & (1 << property.getType().tag())) != 0) {
+ && (context.getPropertyTypes() & (1 << property.getType().tag())) != 0) {
if (Type.BINARY.tag() == property.getType().tag()) {
addBinaryValue(document, property, agg.get());
} else {
@@ -335,7 +239,7 @@ public class LuceneIndexEditor implement
try {
InputStream stream = v.getNewStream();
try {
- parser.parse(stream, handler, metadata, new ParseContext());
+ context.getParser().parse(stream, handler, metadata, new ParseContext());
} finally {
stream.close();
}
Added: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java?rev=1493056&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java (added)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java Fri Jun 14 13:13:48 2013
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.lucene;
+
+import static org.apache.jackrabbit.oak.plugins.index.IndexUtils.getString;
+import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.ANALYZER;
+import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.INCLUDE_PROPERTY_TYPES;
+import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.INDEX_DATA_CHILD_NAME;
+import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PERSISTENCE_PATH;
+import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.VERSION;
+import static org.apache.lucene.store.NoLockFactory.getNoLockFactory;
+
+import java.io.File;
+import java.io.IOException;
+
+import javax.jcr.PropertyType;
+
+import org.apache.jackrabbit.oak.api.PropertyState;
+import org.apache.jackrabbit.oak.api.Type;
+import org.apache.jackrabbit.oak.plugins.index.lucene.aggregation.NodeAggregator;
+import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.SerialMergeScheduler;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.Parser;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class LuceneIndexEditorContext {
+
+ private static final Logger log = LoggerFactory
+ .getLogger(LuceneIndexEditorContext.class);
+
+ private static IndexWriterConfig getIndexWriterConfig() {
+ // FIXME: Hack needed to make Lucene work in an OSGi environment
+ Thread thread = Thread.currentThread();
+ ClassLoader loader = thread.getContextClassLoader();
+ thread.setContextClassLoader(IndexWriterConfig.class.getClassLoader());
+ try {
+ IndexWriterConfig config = new IndexWriterConfig(VERSION, ANALYZER);
+ config.setMergeScheduler(new SerialMergeScheduler());
+ return config;
+ } finally {
+ thread.setContextClassLoader(loader);
+ }
+ }
+
+ private static Directory newIndexDirectory(NodeBuilder definition)
+ throws IOException {
+ String path = getString(definition, PERSISTENCE_PATH);
+ if (path == null) {
+ return new ReadWriteOakDirectory(
+ definition.child(INDEX_DATA_CHILD_NAME));
+ } else {
+ // try {
+ File file = new File(path);
+ file.mkdirs();
+ // TODO: close() is never called
+ // TODO: no locking used
+ // --> using the FS backend for the index is in any case
+ // troublesome in clustering scenarios and for backup
+ // etc. so instead of fixing these issues we'd better
+ // work on making the in-content index work without
+ // problems (or look at the Solr indexer as alternative)
+ return FSDirectory.open(file, getNoLockFactory());
+ // } catch (IOException e) {
+ // throw new CommitFailedException("Lucene", 1,
+ // "Failed to open the index in " + path, e);
+ // }
+ }
+ }
+
+ private static final NodeAggregator aggregator = new NodeAggregator();
+
+ private static final IndexWriterConfig config = getIndexWriterConfig();
+
+ private static final Parser parser = new AutoDetectParser();
+
+ private final NodeBuilder definition;
+
+ private IndexWriter writer = null;
+
+ private final int propertyTypes;
+
+ private long indexedNodes;
+
+ LuceneIndexEditorContext(NodeBuilder definition) {
+ this.definition = definition;
+
+ PropertyState ps = definition.getProperty(INCLUDE_PROPERTY_TYPES);
+ if (ps != null) {
+ int types = 0;
+ for (String inc : ps.getValue(Type.STRINGS)) {
+ try {
+ types |= 1 << PropertyType.valueFromName(inc);
+ } catch (IllegalArgumentException e) {
+ log.warn("Unknown property type: " + inc);
+ }
+ }
+ this.propertyTypes = types;
+ } else {
+ this.propertyTypes = -1;
+ }
+ this.indexedNodes = 0;
+ }
+
+ int getPropertyTypes() {
+ return propertyTypes;
+ }
+
+ NodeAggregator getAggregator() {
+ return aggregator;
+ }
+
+ Parser getParser() {
+ return parser;
+ }
+
+ IndexWriter getWriter() throws IOException {
+ if (writer == null) {
+ writer = new IndexWriter(newIndexDirectory(definition), config);
+ }
+ return writer;
+ }
+
+ /**
+ * close writer if it's not null
+ */
+ void closeWriter() throws IOException {
+ if (writer != null) {
+ writer.close();
+ }
+ }
+
+ public long incIndexedNodes() {
+ indexedNodes++;
+ return indexedNodes;
+ }
+
+ public long getIndexedNodes() {
+ return indexedNodes;
+ }
+
+}
Propchange: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java
------------------------------------------------------------------------------
svn:mime-type = text/plain