You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by al...@apache.org on 2012/07/19 15:11:09 UTC

svn commit: r1363321 - in /jackrabbit/oak/trunk/oak-core: ./ src/main/java/org/apache/jackrabbit/oak/plugins/lucene/

Author: alexparvulescu
Date: Thu Jul 19 13:11:09 2012
New Revision: 1363321

URL: http://svn.apache.org/viewvc?rev=1363321&view=rev
Log:
OAK-154 Full text search index
 - bumped lucene up to 4.0.0 alpha, fixed compilation issues
 - added factory classes & minor cosmetics

Added:
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldFactory.java   (with props)
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldNames.java   (with props)
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/TermFactory.java   (with props)
Modified:
    jackrabbit/oak/trunk/oak-core/pom.xml
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneEditor.java
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneIndex.java
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/OakDirectory.java

Modified: jackrabbit/oak/trunk/oak-core/pom.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/pom.xml?rev=1363321&r1=1363320&r2=1363321&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/pom.xml (original)
+++ jackrabbit/oak/trunk/oak-core/pom.xml Thu Jul 19 13:11:09 2012
@@ -162,10 +162,16 @@
     <dependency>
       <groupId>org.apache.lucene</groupId>
       <artifactId>lucene-core</artifactId>
-      <version>3.6.0</version>
+      <version>4.0.0-ALPHA</version>
       <optional>true</optional>
     </dependency>
     <dependency>
+        <groupId>org.apache.lucene</groupId>
+        <artifactId>lucene-analyzers-common</artifactId>
+        <version>4.0.0-ALPHA</version>
+        <optional>true</optional>
+    </dependency>
+    <dependency>
       <groupId>org.apache.tika</groupId>
       <artifactId>tika-parsers</artifactId>
       <version>1.1</version>

Added: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldFactory.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldFactory.java?rev=1363321&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldFactory.java (added)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldFactory.java Thu Jul 19 13:11:09 2012
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.lucene;
+
+import static org.apache.jackrabbit.oak.plugins.lucene.FieldNames.PATH;
+import static org.apache.lucene.document.Field.Store.*;
+
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StringField;
+
+/**
+ * <code>FieldFactory</code> is a factory for <code>Field</code> instances with
+ * frequently used fields.
+ */
+public final class FieldFactory {
+
+    /**
+     * Private constructor.
+     */
+    private FieldFactory() {
+    }
+
+    public static Field newPathField(String path) {
+        return new StringField(PATH, path, YES);
+    }
+
+    public static Field newPropertyField(String name, String value) {
+        // TODO do we need norms info on the indexed fields ? TextField:StringField
+        // return new TextField(name, value, NO);
+        return new StringField(name, value, NO);
+    }
+}

Propchange: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldFactory.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldNames.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldNames.java?rev=1363321&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldNames.java (added)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldNames.java Thu Jul 19 13:11:09 2012
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.lucene;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * Defines field names that are used internally to store :path, etc in the
+ * search index.
+ */
+public final class FieldNames {
+
+    /**
+     * Private constructor.
+     */
+    private FieldNames() {
+    }
+
+    /**
+     * Name of the field that contains the {@value} property of the node.
+     */
+    public static final String PATH = ":path";
+
+    /**
+     * Used to select only the PATH field from the lucene documents
+     */
+    public static final Set<String> PATH_SELECTOR = new HashSet<String>(
+            Arrays.asList(PATH));
+
+}

Propchange: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/FieldNames.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneEditor.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneEditor.java?rev=1363321&r1=1363320&r2=1363321&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneEditor.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneEditor.java Thu Jul 19 13:11:09 2012
@@ -16,6 +16,10 @@
  */
 package org.apache.jackrabbit.oak.plugins.lucene;
 
+import static org.apache.jackrabbit.oak.plugins.lucene.FieldFactory.newPathField;
+import static org.apache.jackrabbit.oak.plugins.lucene.FieldFactory.newPropertyField;
+import static org.apache.jackrabbit.oak.plugins.lucene.TermFactory.newPathTerm;
+
 import java.io.IOException;
 
 import javax.jcr.PropertyType;
@@ -31,12 +35,8 @@ import org.apache.jackrabbit.oak.spi.sta
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.Field.Index;
-import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.Term;
 import org.apache.lucene.util.Version;
 import org.apache.tika.Tika;
 import org.apache.tika.exception.TikaException;
@@ -45,7 +45,7 @@ public class LuceneEditor implements Com
 
     private static final Tika TIKA = new Tika();
 
-    private static final Version VERSION = Version.LUCENE_36;
+    private static final Version VERSION = Version.LUCENE_40;
 
     private static final Analyzer ANALYZER = new StandardAnalyzer(VERSION);
 
@@ -101,7 +101,7 @@ public class LuceneEditor implements Com
             }
             if (modified) {
                 writer.updateDocument(
-                        makePathTerm(path),
+                        newPathTerm(path),
                         makeDocument(path, state));
             }
         }
@@ -167,31 +167,26 @@ public class LuceneEditor implements Com
 
         private void deleteSubtree(String path, NodeState state)
                 throws IOException {
-            writer.deleteDocuments(makePathTerm(path));
+            writer.deleteDocuments(newPathTerm(path));
             for (ChildNodeEntry entry : state.getChildNodeEntries()) {
                 deleteSubtree(path + "/" + entry.getName(), entry.getNodeState());
             }
         }
 
-        private Term makePathTerm(String path) {
-            return new Term(":path", path);
-        }
-
         private Document makeDocument(
                 String path, NodeState state) {
             Document document = new Document();
-            document.add(new Field(
-                    ":path", path, Store.YES, Index.NOT_ANALYZED));
+            document.add(newPathField(path));
             for (PropertyState property : state.getProperties()) {
                 String pname = property.getName();
                 for (CoreValue value : property.getValues()) {
-                    document.add(makeField(pname, value));
+                    document.add(newPropertyField(pname, parseStringValue(value)));
                 }
             }
             return document;
         }
 
-        private Field makeField(String name, CoreValue value) {
+        private String parseStringValue(CoreValue value) {
             String string;
             if (value.getType() != PropertyType.BINARY) {
                 string = value.getString();
@@ -204,7 +199,7 @@ public class LuceneEditor implements Com
                     string = "";
                 }
             }
-            return new Field(name, string, Store.NO, Index.ANALYZED);
+            return string;
         }
 
     }

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneIndex.java?rev=1363321&r1=1363320&r2=1363321&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneIndex.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/LuceneIndex.java Thu Jul 19 13:11:09 2012
@@ -16,6 +16,10 @@
  */
 package org.apache.jackrabbit.oak.plugins.lucene;
 
+import static org.apache.jackrabbit.oak.plugins.lucene.FieldNames.PATH;
+import static org.apache.jackrabbit.oak.plugins.lucene.FieldNames.PATH_SELECTOR;
+import static org.apache.jackrabbit.oak.plugins.lucene.TermFactory.newPathTerm;
+
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
@@ -28,6 +32,7 @@ import org.apache.jackrabbit.oak.spi.Fil
 import org.apache.jackrabbit.oak.spi.Filter.PropertyRestriction;
 import org.apache.jackrabbit.oak.spi.QueryIndex;
 import org.apache.jackrabbit.oak.spi.state.NodeStore;
+import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause.Occur;
@@ -70,32 +75,29 @@ public class LuceneIndex implements Quer
     @Override
     public Cursor query(Filter filter, String revisionId) {
         try {
-            Directory directory =
-                    new OakDirectory(store, store.getRoot(), path);
+            Directory directory = new OakDirectory(store, store.getRoot(), path);
             try {
-                IndexReader reader = IndexReader.open(directory);
+                IndexReader reader = DirectoryReader.open(directory);
                 try {
                     IndexSearcher searcher = new IndexSearcher(reader);
-                    try {
-                        Collection<String> paths = new ArrayList<String>();
+                    Collection<String> paths = new ArrayList<String>();
 
-                        Query query = getQuery(filter);
-                        if (query != null) {
-                            TopDocs docs = searcher.search(query, Integer.MAX_VALUE);
-                            for (ScoreDoc doc : docs.scoreDocs) {
-                                String path = reader.document(doc.doc).get(":path");
-                                if ("".equals(path)) {
-                                    paths.add("/");
-                                } else if (path != null) {
-                                    paths.add(path);
-                                }
+                    Query query = getQuery(filter);
+                    if (query != null) {
+                        TopDocs docs = searcher
+                                .search(query, Integer.MAX_VALUE);
+                        for (ScoreDoc doc : docs.scoreDocs) {
+                            String path = reader.document(doc.doc,
+                                    PATH_SELECTOR).get(PATH);
+                            if ("".equals(path)) {
+                                paths.add("/");
+                            } else if (path != null) {
+                                paths.add(path);
                             }
                         }
-
-                        return new PathCursor(paths);
-                    } finally {
-                        searcher.close();
                     }
+
+                    return new PathCursor(paths);
                 } finally {
                     reader.close();
                 }
@@ -103,7 +105,7 @@ public class LuceneIndex implements Quer
                 directory.close();
             }
         } catch (IOException e) {
-            return new PathCursor(Collections.<String>emptySet());
+            return new PathCursor(Collections.<String> emptySet());
         }
     }
 
@@ -116,19 +118,19 @@ public class LuceneIndex implements Quer
         }
         switch (filter.getPathRestriction()) {
         case ALL_CHILDREN:
-            qs.add(new PrefixQuery(new Term(":path", path + "/")));
+            qs.add(new PrefixQuery(newPathTerm(path + "/")));
             break;
         case DIRECT_CHILDREN:
-            qs.add(new PrefixQuery(new Term(":path", path + "/"))); // FIXME
+            qs.add(new PrefixQuery(newPathTerm(path + "/"))); // FIXME
             break;
         case EXACT:
-            qs.add(new TermQuery(new Term(":path", path)));
+            qs.add(new TermQuery(newPathTerm(path)));
             break;
         case PARENT:
             int slash = path.lastIndexOf('/');
             if (slash != -1) {
                 String parent = path.substring(0, slash);
-                qs.add(new TermQuery(new Term(":path", parent)));
+                qs.add(new TermQuery(newPathTerm(parent)));
             } else {
                 return null; // there's no parent of the root node
             }
@@ -142,8 +144,8 @@ public class LuceneIndex implements Quer
             if (first .equals(last) && pr.firstIncluding && pr.lastIncluding) {
                 qs.add(new TermQuery(new Term(name, first)));
             } else {
-                qs.add(new TermRangeQuery(
-                        name, first, last, pr.firstIncluding, pr.lastIncluding));
+                qs.add(TermRangeQuery.newStringRange(name, first, last,
+                        pr.firstIncluding, pr.lastIncluding));
             }
         }
 

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/OakDirectory.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/OakDirectory.java?rev=1363321&r1=1363320&r2=1363321&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/OakDirectory.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/OakDirectory.java Thu Jul 19 13:11:09 2012
@@ -20,6 +20,7 @@ import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.List;
 
 import javax.annotation.Nonnull;
@@ -32,6 +33,7 @@ import org.apache.jackrabbit.oak.spi.sta
 import org.apache.jackrabbit.oak.spi.state.NodeStateBuilder;
 import org.apache.jackrabbit.oak.spi.state.NodeStore;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.store.NoLockFactory;
@@ -60,7 +62,7 @@ class OakDirectory extends Directory {
     }
 
     @Nonnull
-    public NodeState getRoot() {
+    NodeState getRoot() {
         return rootBuilder.getNodeState();
     }
 
@@ -89,30 +91,6 @@ class OakDirectory extends Directory {
     }
 
     @Override
-    public long fileModified(String name) throws IOException {
-        NodeState file = getDirectory().getChildNode(name);
-        if (file == null) {
-            return 0;
-        }
-
-        PropertyState property = file.getProperty("jcr:lastModified");
-        if (property == null || property.isArray()) {
-            return 0;
-        }
-
-        return property.getValue().getLong();
-    }
-
-    @Override
-    public void touchFile(String name) throws IOException {
-        NodeStateBuilder builder = directoryBuilder.getChildBuilder(name);
-        builder.setProperty(
-                "jcr:lastModified",
-                factory.createValue(System.currentTimeMillis()));
-        directory = null;
-    }
-
-    @Override
     public void deleteFile(String name) throws IOException {
         directoryBuilder.removeNode(name);
         directory = null;
@@ -132,65 +110,23 @@ class OakDirectory extends Directory {
 
         return property.getValue().length();
     }
+    
 
     @Override
-    public IndexOutput createOutput(String name) throws IOException {
+    public IndexOutput createOutput(String name, IOContext context)
+            throws IOException {
         return new OakIndexOutput(name);
     }
 
     @Override
-    public IndexInput openInput(final String name) throws IOException {
-        return new IndexInput(name) {
-
-            private final byte[] data = readFile(name);
-
-            private int position;
-
-            @Override
-            public void readBytes(byte[] b, int offset, int len)
-                    throws IOException {
-                if (len < 0 || position + len > data.length) {
-                    throw new IOException("Invalid byte range request");
-                } else {
-                    System.arraycopy(data, position, b, offset, len);
-                    position += len;
-                }
-            }
-
-            @Override
-            public byte readByte() throws IOException {
-                if (position >= data.length) {
-                    throw new IOException("Invalid byte range request");
-                } else {
-                    return data[position++];
-                }
-            }
-
-            @Override
-            public void seek(long pos) throws IOException {
-                if (pos < 0 || pos >= data.length) {
-                    throw new IOException("Invalid seek request");
-                } else {
-                    position = (int) pos;
-                }
-            }
-
-            @Override
-            public long length() {
-                return data.length;
-            }
-
-            @Override
-            public long getFilePointer() {
-                return position;
-            }
-
-            @Override
-            public void close() {
-                // do nothing
-            }
+    public IndexInput openInput(String name, IOContext context)
+            throws IOException {
+        return new OakIndexInput(name);
+    }
 
-        };
+    @Override
+    public void sync(Collection<String> names) throws IOException {
+        // ?
     }
 
     @Override
@@ -314,5 +250,62 @@ class OakDirectory extends Directory {
         }
     }
 
+    private final class OakIndexInput extends IndexInput {
+
+        private final byte[] data;
+
+        private int position;
+
+        public OakIndexInput(String name) throws IOException {
+            super(name);
+            this.data = readFile(name);
+            this.position = 0;
+        }
+
+        @Override
+        public void readBytes(byte[] b, int offset, int len)
+                throws IOException {
+            if (len < 0 || position + len > data.length) {
+                throw new IOException("Invalid byte range request");
+            } else {
+                System.arraycopy(data, position, b, offset, len);
+                position += len;
+            }
+        }
+
+        @Override
+        public byte readByte() throws IOException {
+            if (position >= data.length) {
+                throw new IOException("Invalid byte range request");
+            } else {
+                return data[position++];
+            }
+        }
+
+        @Override
+        public void seek(long pos) throws IOException {
+            if (pos < 0 || pos >= data.length) {
+                throw new IOException("Invalid seek request");
+            } else {
+                position = (int) pos;
+            }
+        }
+
+        @Override
+        public long length() {
+            return data.length;
+        }
+
+        @Override
+        public long getFilePointer() {
+            return position;
+        }
+
+        @Override
+        public void close() {
+            // do nothing
+        }
+
+    };
 
 }

Added: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/TermFactory.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/TermFactory.java?rev=1363321&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/TermFactory.java (added)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/TermFactory.java Thu Jul 19 13:11:09 2012
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.lucene;
+
+import org.apache.lucene.index.Term;
+
+/**
+ * <code>TermFactory</code> is a factory for <code>Term</code> instances with
+ * frequently used field names.
+ */
+public final class TermFactory {
+
+    /**
+     * Private constructor.
+     */
+    private TermFactory() {
+    }
+
+    /**
+     * Creates a Term with the given <code>path</code> value and with a field
+     * name {@link FieldNames#PATH}.
+     * 
+     * @param path
+     *            the path.
+     * @return the path term.
+     */
+    public static Term newPathTerm(String path) {
+        return new Term(FieldNames.PATH, path);
+    }
+
+}

Propchange: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/lucene/TermFactory.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain