You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by al...@apache.org on 2012/07/19 15:11:09 UTC
svn commit: r1363321 - in /jackrabbit/oak/trunk/oak-core: ./
src/main/java/org/apache/jackrabbit/oak/plugins/lucene/
Author: alexparvulescu
Date: Thu Jul 19 13:11:09 2012
New Revision: 1363321
URL: http://svn.apache.org/viewvc?rev=1363321&view=rev
Log:
OAK-154 Full text search index
- bumped lucene up to 4.0.0 alpha, fixed compilation issues
- added factory classes & minor cosmetics
Added:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldFactory.java (with props)
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldNames.java (with props)
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/TermFactory.java (with props)
Modified:
jackrabbit/oak/trunk/oak-core/pom.xml
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneEditor.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneIndex.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/OakDirectory.java
Modified: jackrabbit/oak/trunk/oak-core/pom.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/pom.xml?rev=1363321&r1=1363320&r2=1363321&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/pom.xml (original)
+++ jackrabbit/oak/trunk/oak-core/pom.xml Thu Jul 19 13:11:09 2012
@@ -162,10 +162,16 @@
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
- <version>3.6.0</version>
+ <version>4.0.0-ALPHA</version>
<optional>true</optional>
</dependency>
<dependency>
+ <groupId>org.apache.lucene</groupId>
+ <artifactId>lucene-analyzers-common</artifactId>
+ <version>4.0.0-ALPHA</version>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers</artifactId>
<version>1.1</version>
Added: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldFactory.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldFactory.java?rev=1363321&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldFactory.java (added)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldFactory.java Thu Jul 19 13:11:09 2012
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.lucene;
+
+import static org.apache.jackrabbit.oak.plugins.lucene.FieldNames.PATH;
+import static org.apache.lucene.document.Field.Store.*;
+
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StringField;
+
+/**
+ * <code>FieldFactory</code> is a factory for <code>Field</code> instances with
+ * frequently used fields.
+ */
+public final class FieldFactory {
+
+ /**
+ * Private constructor.
+ */
+ private FieldFactory() {
+ }
+
+ public static Field newPathField(String path) {
+ return new StringField(PATH, path, YES);
+ }
+
+ public static Field newPropertyField(String name, String value) {
+ // TODO do we need norms info on the indexed fields ? TextField:StringField
+ // return new TextField(name, value, NO);
+ return new StringField(name, value, NO);
+ }
+}
Propchange: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldFactory.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldNames.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldNames.java?rev=1363321&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldNames.java (added)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldNames.java Thu Jul 19 13:11:09 2012
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.lucene;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * Defines field names that are used internally to store :path, etc in the
+ * search index.
+ */
+public final class FieldNames {
+
+ /**
+ * Private constructor.
+ */
+ private FieldNames() {
+ }
+
+ /**
+ * Name of the field that contains the {@value} property of the node.
+ */
+ public static final String PATH = ":path";
+
+ /**
+ * Used to select only the PATH field from the lucene documents
+ */
+ public static final Set<String> PATH_SELECTOR = new HashSet<String>(
+ Arrays.asList(PATH));
+
+}
Propchange: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldNames.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneEditor.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneEditor.java?rev=1363321&r1=1363320&r2=1363321&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneEditor.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneEditor.java Thu Jul 19 13:11:09 2012
@@ -16,6 +16,10 @@
*/
package org.apache.jackrabbit.oak.plugins.lucene;
+import static org.apache.jackrabbit.oak.plugins.lucene.FieldFactory.newPathField;
+import static org.apache.jackrabbit.oak.plugins.lucene.FieldFactory.newPropertyField;
+import static org.apache.jackrabbit.oak.plugins.lucene.TermFactory.newPathTerm;
+
import java.io.IOException;
import javax.jcr.PropertyType;
@@ -31,12 +35,8 @@ import org.apache.jackrabbit.oak.spi.sta
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.Field.Index;
-import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.Term;
import org.apache.lucene.util.Version;
import org.apache.tika.Tika;
import org.apache.tika.exception.TikaException;
@@ -45,7 +45,7 @@ public class LuceneEditor implements Com
private static final Tika TIKA = new Tika();
- private static final Version VERSION = Version.LUCENE_36;
+ private static final Version VERSION = Version.LUCENE_40;
private static final Analyzer ANALYZER = new StandardAnalyzer(VERSION);
@@ -101,7 +101,7 @@ public class LuceneEditor implements Com
}
if (modified) {
writer.updateDocument(
- makePathTerm(path),
+ newPathTerm(path),
makeDocument(path, state));
}
}
@@ -167,31 +167,26 @@ public class LuceneEditor implements Com
private void deleteSubtree(String path, NodeState state)
throws IOException {
- writer.deleteDocuments(makePathTerm(path));
+ writer.deleteDocuments(newPathTerm(path));
for (ChildNodeEntry entry : state.getChildNodeEntries()) {
deleteSubtree(path + "/" + entry.getName(), entry.getNodeState());
}
}
- private Term makePathTerm(String path) {
- return new Term(":path", path);
- }
-
private Document makeDocument(
String path, NodeState state) {
Document document = new Document();
- document.add(new Field(
- ":path", path, Store.YES, Index.NOT_ANALYZED));
+ document.add(newPathField(path));
for (PropertyState property : state.getProperties()) {
String pname = property.getName();
for (CoreValue value : property.getValues()) {
- document.add(makeField(pname, value));
+ document.add(newPropertyField(pname, parseStringValue(value)));
}
}
return document;
}
- private Field makeField(String name, CoreValue value) {
+ private String parseStringValue(CoreValue value) {
String string;
if (value.getType() != PropertyType.BINARY) {
string = value.getString();
@@ -204,7 +199,7 @@ public class LuceneEditor implements Com
string = "";
}
}
- return new Field(name, string, Store.NO, Index.ANALYZED);
+ return string;
}
}
Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneIndex.java?rev=1363321&r1=1363320&r2=1363321&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneIndex.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneIndex.java Thu Jul 19 13:11:09 2012
@@ -16,6 +16,10 @@
*/
package org.apache.jackrabbit.oak.plugins.lucene;
+import static org.apache.jackrabbit.oak.plugins.lucene.FieldNames.PATH;
+import static org.apache.jackrabbit.oak.plugins.lucene.FieldNames.PATH_SELECTOR;
+import static org.apache.jackrabbit.oak.plugins.lucene.TermFactory.newPathTerm;
+
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
@@ -28,6 +32,7 @@ import org.apache.jackrabbit.oak.spi.Fil
import org.apache.jackrabbit.oak.spi.Filter.PropertyRestriction;
import org.apache.jackrabbit.oak.spi.QueryIndex;
import org.apache.jackrabbit.oak.spi.state.NodeStore;
+import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
@@ -70,32 +75,29 @@ public class LuceneIndex implements Quer
@Override
public Cursor query(Filter filter, String revisionId) {
try {
- Directory directory =
- new OakDirectory(store, store.getRoot(), path);
+ Directory directory = new OakDirectory(store, store.getRoot(), path);
try {
- IndexReader reader = IndexReader.open(directory);
+ IndexReader reader = DirectoryReader.open(directory);
try {
IndexSearcher searcher = new IndexSearcher(reader);
- try {
- Collection<String> paths = new ArrayList<String>();
+ Collection<String> paths = new ArrayList<String>();
- Query query = getQuery(filter);
- if (query != null) {
- TopDocs docs = searcher.search(query, Integer.MAX_VALUE);
- for (ScoreDoc doc : docs.scoreDocs) {
- String path = reader.document(doc.doc).get(":path");
- if ("".equals(path)) {
- paths.add("/");
- } else if (path != null) {
- paths.add(path);
- }
+ Query query = getQuery(filter);
+ if (query != null) {
+ TopDocs docs = searcher
+ .search(query, Integer.MAX_VALUE);
+ for (ScoreDoc doc : docs.scoreDocs) {
+ String path = reader.document(doc.doc,
+ PATH_SELECTOR).get(PATH);
+ if ("".equals(path)) {
+ paths.add("/");
+ } else if (path != null) {
+ paths.add(path);
}
}
-
- return new PathCursor(paths);
- } finally {
- searcher.close();
}
+
+ return new PathCursor(paths);
} finally {
reader.close();
}
@@ -103,7 +105,7 @@ public class LuceneIndex implements Quer
directory.close();
}
} catch (IOException e) {
- return new PathCursor(Collections.<String>emptySet());
+ return new PathCursor(Collections.<String> emptySet());
}
}
@@ -116,19 +118,19 @@ public class LuceneIndex implements Quer
}
switch (filter.getPathRestriction()) {
case ALL_CHILDREN:
- qs.add(new PrefixQuery(new Term(":path", path + "/")));
+ qs.add(new PrefixQuery(newPathTerm(path + "/")));
break;
case DIRECT_CHILDREN:
- qs.add(new PrefixQuery(new Term(":path", path + "/"))); // FIXME
+ qs.add(new PrefixQuery(newPathTerm(path + "/"))); // FIXME
break;
case EXACT:
- qs.add(new TermQuery(new Term(":path", path)));
+ qs.add(new TermQuery(newPathTerm(path)));
break;
case PARENT:
int slash = path.lastIndexOf('/');
if (slash != -1) {
String parent = path.substring(0, slash);
- qs.add(new TermQuery(new Term(":path", parent)));
+ qs.add(new TermQuery(newPathTerm(parent)));
} else {
return null; // there's no parent of the root node
}
@@ -142,8 +144,8 @@ public class LuceneIndex implements Quer
if (first .equals(last) && pr.firstIncluding && pr.lastIncluding) {
qs.add(new TermQuery(new Term(name, first)));
} else {
- qs.add(new TermRangeQuery(
- name, first, last, pr.firstIncluding, pr.lastIncluding));
+ qs.add(TermRangeQuery.newStringRange(name, first, last,
+ pr.firstIncluding, pr.lastIncluding));
}
}
Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/OakDirectory.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/OakDirectory.java?rev=1363321&r1=1363320&r2=1363321&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/OakDirectory.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/OakDirectory.java Thu Jul 19 13:11:09 2012
@@ -20,6 +20,7 @@ import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.List;
import javax.annotation.Nonnull;
@@ -32,6 +33,7 @@ import org.apache.jackrabbit.oak.spi.sta
import org.apache.jackrabbit.oak.spi.state.NodeStateBuilder;
import org.apache.jackrabbit.oak.spi.state.NodeStore;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.NoLockFactory;
@@ -60,7 +62,7 @@ class OakDirectory extends Directory {
}
@Nonnull
- public NodeState getRoot() {
+ NodeState getRoot() {
return rootBuilder.getNodeState();
}
@@ -89,30 +91,6 @@ class OakDirectory extends Directory {
}
@Override
- public long fileModified(String name) throws IOException {
- NodeState file = getDirectory().getChildNode(name);
- if (file == null) {
- return 0;
- }
-
- PropertyState property = file.getProperty("jcr:lastModified");
- if (property == null || property.isArray()) {
- return 0;
- }
-
- return property.getValue().getLong();
- }
-
- @Override
- public void touchFile(String name) throws IOException {
- NodeStateBuilder builder = directoryBuilder.getChildBuilder(name);
- builder.setProperty(
- "jcr:lastModified",
- factory.createValue(System.currentTimeMillis()));
- directory = null;
- }
-
- @Override
public void deleteFile(String name) throws IOException {
directoryBuilder.removeNode(name);
directory = null;
@@ -132,65 +110,23 @@ class OakDirectory extends Directory {
return property.getValue().length();
}
+
@Override
- public IndexOutput createOutput(String name) throws IOException {
+ public IndexOutput createOutput(String name, IOContext context)
+ throws IOException {
return new OakIndexOutput(name);
}
@Override
- public IndexInput openInput(final String name) throws IOException {
- return new IndexInput(name) {
-
- private final byte[] data = readFile(name);
-
- private int position;
-
- @Override
- public void readBytes(byte[] b, int offset, int len)
- throws IOException {
- if (len < 0 || position + len > data.length) {
- throw new IOException("Invalid byte range request");
- } else {
- System.arraycopy(data, position, b, offset, len);
- position += len;
- }
- }
-
- @Override
- public byte readByte() throws IOException {
- if (position >= data.length) {
- throw new IOException("Invalid byte range request");
- } else {
- return data[position++];
- }
- }
-
- @Override
- public void seek(long pos) throws IOException {
- if (pos < 0 || pos >= data.length) {
- throw new IOException("Invalid seek request");
- } else {
- position = (int) pos;
- }
- }
-
- @Override
- public long length() {
- return data.length;
- }
-
- @Override
- public long getFilePointer() {
- return position;
- }
-
- @Override
- public void close() {
- // do nothing
- }
+ public IndexInput openInput(String name, IOContext context)
+ throws IOException {
+ return new OakIndexInput(name);
+ }
- };
+ @Override
+ public void sync(Collection<String> names) throws IOException {
+ // ?
}
@Override
@@ -314,5 +250,62 @@ class OakDirectory extends Directory {
}
}
+ private final class OakIndexInput extends IndexInput {
+
+ private final byte[] data;
+
+ private int position;
+
+ public OakIndexInput(String name) throws IOException {
+ super(name);
+ this.data = readFile(name);
+ this.position = 0;
+ }
+
+ @Override
+ public void readBytes(byte[] b, int offset, int len)
+ throws IOException {
+ if (len < 0 || position + len > data.length) {
+ throw new IOException("Invalid byte range request");
+ } else {
+ System.arraycopy(data, position, b, offset, len);
+ position += len;
+ }
+ }
+
+ @Override
+ public byte readByte() throws IOException {
+ if (position >= data.length) {
+ throw new IOException("Invalid byte range request");
+ } else {
+ return data[position++];
+ }
+ }
+
+ @Override
+ public void seek(long pos) throws IOException {
+ if (pos < 0 || pos >= data.length) {
+ throw new IOException("Invalid seek request");
+ } else {
+ position = (int) pos;
+ }
+ }
+
+ @Override
+ public long length() {
+ return data.length;
+ }
+
+ @Override
+ public long getFilePointer() {
+ return position;
+ }
+
+ @Override
+ public void close() {
+ // do nothing
+ }
+
+ };
}
Added: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/TermFactory.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/TermFactory.java?rev=1363321&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/TermFactory.java (added)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/TermFactory.java Thu Jul 19 13:11:09 2012
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.lucene;
+
+import org.apache.lucene.index.Term;
+
+/**
+ * <code>TermFactory</code> is a factory for <code>Term</code> instances with
+ * frequently used field names.
+ */
+public final class TermFactory {
+
+ /**
+ * Private constructor.
+ */
+ private TermFactory() {
+ }
+
+ /**
+ * Creates a Term with the given <code>path</code> value and with a field
+ * name {@link FieldNames#PATH}.
+ *
+ * @param path
+ * the path.
+ * @return the path term.
+ */
+ public static Term newPathTerm(String path) {
+ return new Term(FieldNames.PATH, path);
+ }
+
+}
Propchange: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/TermFactory.java
------------------------------------------------------------------------------
svn:mime-type = text/plain