You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by ju...@apache.org on 2009/04/22 16:49:49 UTC

svn commit: r767551 - in /jackrabbit/branches/1.5: ./ jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ jackrabbit-core/src/test/java/org/apache/jackrabbit/core/ jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/ jackr...

Author: jukka
Date: Wed Apr 22 14:49:48 2009
New Revision: 767551

URL: http://svn.apache.org/viewvc?rev=767551&view=rev
Log:
1.5: Merged revision 760906 (JCR-2035)

Added:
    jackrabbit/branches/1.5/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/TestHelper.java
      - copied unchanged from r760906, jackrabbit/trunk/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/TestHelper.java
Modified:
    jackrabbit/branches/1.5/   (props changed)
    jackrabbit/branches/1.5/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java
    jackrabbit/branches/1.5/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/AbstractIndexingTest.java
    jackrabbit/branches/1.5/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/lucene/IndexingQueueTest.java
    jackrabbit/branches/1.5/jackrabbit-core/src/test/repository/workspaces/indexing-test/workspace.xml

Propchange: jackrabbit/branches/1.5/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Wed Apr 22 14:49:48 2009
@@ -1,2 +1,2 @@
 /jackrabbit/branches/1.3:631261
-/jackrabbit/trunk:703899-704158,704165,704167,704324,704358,704361,704864,704933,704939,705010,705033,705243,705496,705522,705579,705925,705932,705934,705937-705938,705961,706242,706273,706285-706286,706562,706606,706649,706655,706660,706697,706918,707303-707304,707307,707310,707630,708206,708598,708609,708613,708619,708634,708840,708863,708909,708929,708943,709115,709142,709207,709211,710047,711238,711566-711567,711595,711841-711843,712984-712985,713037,713059,713065,713072,713076,713162,713214,713956,713958,713964,713971,713975,714034,718218,718249-718250,718371,718376,718566,718632,718981,719225,719282,719575-719577,719579,719585-719586,719588,719592,720455,720484,720492,720524,720533,720540,720673,720679,720687,720784,720940,720969,721186,721191,721194,721235,721387-721389,721470,721495,722068-722069,722463,722465,722467,722470,722825,723281,723346,723728,723784,724300,724387,725292,727376,727388,727390,727395,727397,727402,727492,727701,728022,731896,731934,731941,73234
 7,732678,732686,732689,732693,732703,732715-732716,732719,732728,732730,732734-732738,732740,732742-732743,732745,732867,732883,733057-733059,733061,733080,734092,734366,734375,734400,734709,735401,736021,736030,736274,736276,736650-736651,736653-736656,736658,736680,737695,738087,738119,738121,738419,738422,738474,738512,739210,739212,739226,740262,740734,740736-740738,740747,740749-740750,741052,741100,741121,741206,741208-741210,741213,741509,741524,741652,741803,742382,742538,743295,743713,743718,743726,743734,743738,744883-744884,744889,744895,744911,744935,744940,744954,744956,745041,745051,745053,745056,745060,745120,745500,745534,745824,745849,746301-746302,746486,746602-746603,746609,746666,746747-746748,746932,746946,747096,747325,747347,747358-747360,747362,747365,747368,747372,747785,747839,748065-748066,748232-748233,748243,748247,748486,749237,749448,749622,749953,749965,750008,750011,750437,750536,752036-752039,752044,752046-752051,752053-752054,752056,752058-
 752060,752063-752067,752115,752131,752414-752415,752457-752458,752478,752480,752543-752545,752640,752809,752831,752839-752841,753225-753228,753232,753244,753328,755512,755582,756378,756403,756405,756409,756429,756432,756442,756444-756445,757364,757698,757775-757776,757814,757854,757856,757862,758003,758193,758263,758265,758349,758354,758629,758632,758634-758636,758639-758642,758646,758649,758653-758654,758667,759880,759889,760386,760479,760876,760900,760945,761267,761279-761280,761282-761283,761292,761634,761645,761690-761691,761715,762671,762675,762700,762702,762731,762737,762755,762780-762781,762789,762793,762797,762802,762804,762808,762813-762814,762817-762818,762821-762823,763146,763160,763188,763205,763215,763242,763244,763248,763617,765322,765328,765337,765532,765551,765554,765556,765585
+/jackrabbit/trunk:703899-704158,704165,704167,704324,704358,704361,704864,704933,704939,705010,705033,705243,705496,705522,705579,705925,705932,705934,705937-705938,705961,706242,706273,706285-706286,706562,706606,706649,706655,706660,706697,706918,707303-707304,707307,707310,707630,708206,708598,708609,708613,708619,708634,708840,708863,708909,708929,708943,709115,709142,709207,709211,710047,711238,711566-711567,711595,711841-711843,712984-712985,713037,713059,713065,713072,713076,713162,713214,713956,713958,713964,713971,713975,714034,718218,718249-718250,718371,718376,718566,718632,718981,719225,719282,719575-719577,719579,719585-719586,719588,719592,720455,720484,720492,720524,720533,720540,720673,720679,720687,720784,720940,720969,721186,721191,721194,721235,721387-721389,721470,721495,722068-722069,722463,722465,722467,722470,722825,723281,723346,723728,723784,724300,724387,725292,727376,727388,727390,727395,727397,727402,727492,727701,728022,731896,731934,731941,73234
 7,732678,732686,732689,732693,732703,732715-732716,732719,732728,732730,732734-732738,732740,732742-732743,732745,732867,732883,733057-733059,733061,733080,734092,734366,734375,734400,734709,735401,736021,736030,736274,736276,736650-736651,736653-736656,736658,736680,737695,738087,738119,738121,738419,738422,738474,738512,739210,739212,739226,740262,740734,740736-740738,740747,740749-740750,741052,741100,741121,741206,741208-741210,741213,741509,741524,741652,741803,742382,742538,743295,743713,743718,743726,743734,743738,744883-744884,744889,744895,744911,744935,744940,744954,744956,745041,745051,745053,745056,745060,745120,745500,745534,745824,745849,746301-746302,746486,746602-746603,746609,746666,746747-746748,746932,746946,747096,747325,747347,747358-747360,747362,747365,747368,747372,747785,747839,748065-748066,748232-748233,748243,748247,748486,749237,749448,749622,749953,749965,750008,750011,750437,750536,752036-752039,752044,752046-752051,752053-752054,752056,752058-
 752060,752063-752067,752115,752131,752414-752415,752457-752458,752478,752480,752543-752545,752640,752809,752831,752839-752841,753225-753228,753232,753244,753328,755512,755582,756378,756403,756405,756409,756429,756432,756442,756444-756445,757364,757698,757775-757776,757814,757854,757856,757862,758003,758193,758263,758265,758349,758354,758629,758632,758634-758636,758639-758642,758646,758649,758653-758654,758667,759880,759889,760386,760479,760876,760900,760906,760945,761267,761279-761280,761282-761283,761292,761634,761645,761690-761691,761715,762671,762675,762700,762702,762731,762737,762755,762780-762781,762789,762793,762797,762802,762804,762808,762813-762814,762817-762818,762821-762823,763146,763160,763188,763205,763215,763242,763244,763248,763617,765322,765328,765337,765532,765551,765554,765556,765585

Modified: jackrabbit/branches/1.5/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/branches/1.5/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java?rev=767551&r1=767550&r2=767551&view=diff
==============================================================================
--- jackrabbit/branches/1.5/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java (original)
+++ jackrabbit/branches/1.5/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java Wed Apr 22 14:49:48 2009
@@ -30,6 +30,8 @@
 import org.apache.jackrabbit.spi.Path;
 import org.apache.jackrabbit.spi.PathFactory;
 import org.apache.jackrabbit.spi.commons.name.PathFactoryImpl;
+import org.apache.jackrabbit.spi.commons.conversion.PathResolver;
+import org.apache.jackrabbit.spi.commons.conversion.DefaultNamePathResolver;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.lucene.document.Document;
@@ -373,21 +375,26 @@
      *
      * @param stateMgr the item state manager.
      * @param rootId   the id of the node from where to start.
+     * @param rootPath the path of the node from where to start.
      * @throws IOException           if an error occurs while indexing the
      *                               workspace.
      * @throws IllegalStateException if this index is not empty.
      */
-    void createInitialIndex(ItemStateManager stateMgr, NodeId rootId, Path rootPath)
+    void createInitialIndex(ItemStateManager stateMgr,
+                            NodeId rootId,
+                            Path rootPath)
             throws IOException {
         // only do an initial index if there are no indexes at all
         if (indexNames.size() == 0) {
             reindexing = true;
             try {
+                long count = 0;
                 // traverse and index workspace
                 executeAndLog(new Start(Action.INTERNAL_TRANSACTION));
                 NodeState rootState = (NodeState) stateMgr.getItemState(rootId);
-                createIndex(rootState, rootPath, stateMgr);
+                count = createIndex(rootState, rootPath, stateMgr, count);
                 executeAndLog(new Commit(getTransactionId()));
+                log.info("Created initial index for {} nodes", new Long(count));
                 scheduleFlushTask();
             } catch (Exception e) {
                 String msg = "Error indexing workspace";
@@ -1036,19 +1043,33 @@
      * <code>node</code>.
      *
      * @param node     the current NodeState.
+     * @param path     the path of the current node.
      * @param stateMgr the shared item state manager.
+     * @param count    the number of nodes already indexed.
+     * @return the number of nodes indexed so far.
      * @throws IOException         if an error occurs while writing to the
      *                             index.
      * @throws ItemStateException  if an node state cannot be found.
      * @throws RepositoryException if any other error occurs
      */
-    private void createIndex(NodeState node, Path path, ItemStateManager stateMgr)
+    private long createIndex(NodeState node,
+                             Path path,
+                             ItemStateManager stateMgr,
+                             long count)
             throws IOException, ItemStateException, RepositoryException {
         NodeId id = node.getNodeId();
         if (excludedIDs.contains(id)) {
-            return;
+            return count;
         }
         executeAndLog(new AddNode(getTransactionId(), id.getUUID()));
+        if (++count % 100 == 0) {
+            PathResolver resolver = new DefaultNamePathResolver(
+                    handler.getContext().getNamespaceRegistry());
+            log.info("indexing... {} ({})", resolver.getJCRPath(path), new Long(count));
+        }
+        if (count % 10 == 0) {
+            checkIndexingQueue(true);
+        }
         checkVolatileCommit();
         List children = node.getChildNodeEntries();
         for (Iterator it = children.iterator(); it.hasNext();) {
@@ -1063,9 +1084,10 @@
                         e, handler, path, node, child);
             }
             if (childState != null) {
-                createIndex(childState, childPath, stateMgr);
+                count = createIndex(childState, childPath, stateMgr, count);
             }
         }
+        return count;
     }
 
     /**
@@ -1173,10 +1195,27 @@
     }
 
     /**
-     * Checks the indexing queue for finished text extrator jobs and
-     * updates the index accordingly if there are any new ones.
+     * Checks the indexing queue for finished text extrator jobs and updates the
+     * index accordingly if there are any new ones. This method is synchronized
+     * and should only be called by the timer task that periodically checks if
+     * there are documents ready in the indexing queue. A new transaction is
+     * used when documents are transfered from the indexing queue to the index.
      */
     private synchronized void checkIndexingQueue() {
+        checkIndexingQueue(false);
+    }
+
+    /**
+     * Checks the indexing queue for finished text extrator jobs and updates the
+     * index accordingly if there are any new ones.
+     *
+     * @param transactionPresent whether a transaction is in progress and the
+     *                           current {@link #getTransactionId()} should be
+     *                           used. If <code>false</code> a new transaction
+     *                           is created when documents are transfered from
+     *                           the indexing queue to the index.
+     */
+    private void checkIndexingQueue(boolean transactionPresent) {
         Document[] docs = indexingQueue.getFinishedDocuments();
         Map finished = new HashMap();
         for (int i = 0; i < docs.length; i++) {
@@ -1186,18 +1225,28 @@
 
         // now update index with the remaining ones if there are any
         if (!finished.isEmpty()) {
-            log.debug("updating index with {} nodes from indexing queue.",
+            log.info("updating index with {} nodes from indexing queue.",
                     new Long(finished.size()));
 
             // remove documents from the queue
-            Iterator it = finished.keySet().iterator();
-            while (it.hasNext()) {
+            for (Iterator it = finished.keySet().iterator(); it.hasNext(); ) {
                 indexingQueue.removeDocument(it.next().toString());
             }
 
             try {
-                update(finished.keySet().iterator(),
+                if (transactionPresent) {
+                    for (Iterator it = finished.keySet().iterator(); it.hasNext(); ) {
+                        executeAndLog(new DeleteNode(getTransactionId(), (UUID) it.next()));
+                    }
+                    for (Iterator it = finished.values().iterator(); it.hasNext(); ) {
+                        executeAndLog(new AddNode(
+                                getTransactionId(), (Document) it.next()));
+                    }
+                } else {
+                    update(
+                        finished.keySet().iterator(),
                         finished.values().iterator());
+                }
             } catch (IOException e) {
                 // update failed
                 log.warn("Failed to update index with deferred text extraction", e);

Modified: jackrabbit/branches/1.5/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/AbstractIndexingTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/branches/1.5/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/AbstractIndexingTest.java?rev=767551&r1=767550&r2=767551&view=diff
==============================================================================
--- jackrabbit/branches/1.5/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/AbstractIndexingTest.java (original)
+++ jackrabbit/branches/1.5/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/AbstractIndexingTest.java Wed Apr 22 14:49:48 2009
@@ -25,13 +25,15 @@
  */
 public class AbstractIndexingTest extends AbstractQueryTest {
 
+    protected static final String WORKSPACE_NAME = "indexing-test";
+
     protected Session session;
 
     protected Node testRootNode;
 
     protected void setUp() throws Exception {
         super.setUp();
-        session = helper.getSuperuserSession("indexing-test");
+        session = helper.getSuperuserSession(WORKSPACE_NAME);
         testRootNode = cleanUpTestRoot(session);
         // overwrite query manager
         qm = session.getWorkspace().getQueryManager();

Modified: jackrabbit/branches/1.5/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/lucene/IndexingQueueTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/branches/1.5/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/lucene/IndexingQueueTest.java?rev=767551&r1=767550&r2=767551&view=diff
==============================================================================
--- jackrabbit/branches/1.5/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/lucene/IndexingQueueTest.java (original)
+++ jackrabbit/branches/1.5/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/lucene/IndexingQueueTest.java Wed Apr 22 14:49:48 2009
@@ -18,28 +18,38 @@
 
 import org.apache.jackrabbit.extractor.TextExtractor;
 import org.apache.jackrabbit.core.query.AbstractIndexingTest;
+import org.apache.jackrabbit.core.RepositoryImpl;
+import org.apache.jackrabbit.core.TestHelper;
+import org.apache.jackrabbit.core.fs.local.FileUtil;
 
 import javax.jcr.Node;
 import javax.jcr.NodeIterator;
+import javax.jcr.RepositoryException;
 import javax.jcr.query.Query;
 import java.io.Reader;
 import java.io.InputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.FilenameFilter;
 import java.util.Calendar;
 
 /**
  * <code>IndexingQueueTest</code> checks if the indexing queue properly indexes
- * nodes in a background thread when text extraction takes more than 100 ms.
+ * nodes in a background thread when text extraction takes more than 10 ms. See
+ * the workspace.xml file for the indexing-test workspace.
  */
 public class IndexingQueueTest extends AbstractIndexingTest {
 
+    private static final File TEMP_DIR = new File(System.getProperty("java.io.tmpdir")); 
+
     private static final String CONTENT_TYPE = "application/indexing-queue-test";
 
     private static final String ENCODING = "UTF-8";
 
     public void testQueue() throws Exception {
+        Extractor.sleepTime = 200;
         SearchIndex index = (SearchIndex) getQueryHandler();
         IndexingQueue queue = index.getIndex().getIndexingQueue();
 
@@ -71,8 +81,115 @@
         assertTrue(nodes.hasNext());
     }
 
+    public void testInitialIndex() throws Exception {
+        Extractor.sleepTime = 200;
+        SearchIndex index = (SearchIndex) getQueryHandler();
+        File indexDir = new File(index.getPath());
+
+        // fill workspace
+        Node testFolder = testRootNode.addNode("folder", "nt:folder");
+        String text = "the quick brown fox jumps over the lazy dog.";
+        int num = createFiles(testFolder, text.getBytes(ENCODING), 10, 2, 0);
+        session.save();
+
+        // shutdown workspace
+        RepositoryImpl repo = (RepositoryImpl) session.getRepository();
+        session.logout();
+        session = null;
+        superuser.logout();
+        superuser = null;
+        TestHelper.shutdownWorkspace(WORKSPACE_NAME, repo);
+
+        // delete index
+        try {
+            FileUtil.delete(indexDir);
+        } catch (IOException e) {
+            fail("Unable to delete index directory");
+        }
+
+        int initialNumExtractorFiles = getNumExtractorFiles();
+
+        Extractor.sleepTime = 20;
+        Thread t = new Thread(new Runnable() {
+            public void run() {
+                try {
+                    session = helper.getSuperuserSession(WORKSPACE_NAME);
+                } catch (RepositoryException e) {
+                    throw new RuntimeException(e);
+                }
+            }
+        });
+        t.start();
+
+        while (t.isAlive()) {
+            // there must not be more than 20 extractor files, because:
+            // - initial index creation checks indexing queue every 10 nodes
+            // - there is an aggregate definition on the workspace that causes
+            //   2 extractor jobs per nt:resource
+            // => 2 * 10 = 20
+            int numFiles = getNumExtractorFiles() - initialNumExtractorFiles;
+            assertTrue(numFiles <= 20);
+            Thread.sleep(50);
+        }
+
+        qm = session.getWorkspace().getQueryManager();
+        index = (SearchIndex) getQueryHandler();
+        IndexingQueue queue = index.getIndex().getIndexingQueue();
+
+        // flush index to make sure any documents in the buffer are written
+        // to the index. this is to make sure all nodes are pushed either to
+        // the index or to the indexing queue
+        index.getIndex().flush();
+
+        synchronized (index.getIndex()) {
+            while (queue.getNumPendingDocuments() > 0) {
+                index.getIndex().wait(50);
+            }
+        }
+
+        String stmt = testPath + "//element(*, nt:resource)[jcr:contains(., 'fox')]";
+        Query q = qm.createQuery(stmt, Query.XPATH);
+        assertEquals(num, q.execute().getNodes().getSize());
+    }
+
+    private int createFiles(Node folder, byte[] data,
+                            int filesPerLevel, int levels, int count)
+            throws RepositoryException {
+        levels--;
+        for (int i = 0; i < filesPerLevel; i++) {
+            // create files
+            Node file = folder.addNode("file" + i, "nt:file");
+            InputStream in = new ByteArrayInputStream(data);
+            Node resource = file.addNode("jcr:content", "nt:resource");
+            resource.setProperty("jcr:data", in);
+            resource.setProperty("jcr:lastModified", Calendar.getInstance());
+            resource.setProperty("jcr:mimeType", CONTENT_TYPE);
+            resource.setProperty("jcr:encoding", ENCODING);
+            count++;
+        }
+        if (levels > 0) {
+            for (int i = 0; i < filesPerLevel; i++) {
+                // create files
+                Node subFolder = folder.addNode("folder" + i, "nt:folder");
+                count = createFiles(subFolder, data,
+                        filesPerLevel, levels, count);
+            }
+        }
+        return count;
+    }
+
+    private int getNumExtractorFiles() throws IOException {
+        return TEMP_DIR.listFiles(new FilenameFilter() {
+            public boolean accept(File dir, String name) {
+                return name.startsWith("extractor");
+            }
+        }).length;
+    }
+
     public static final class Extractor implements TextExtractor {
 
+        protected static volatile int sleepTime = 200;
+
         public String[] getContentTypes() {
             return new String[]{CONTENT_TYPE};
         }
@@ -80,7 +197,7 @@
         public Reader extractText(InputStream stream, String type, String encoding)
         throws IOException {
             try {
-                Thread.sleep(200);
+                Thread.sleep(sleepTime);
             } catch (InterruptedException e) {
                 throw new IOException();
             }

Modified: jackrabbit/branches/1.5/jackrabbit-core/src/test/repository/workspaces/indexing-test/workspace.xml
URL: http://svn.apache.org/viewvc/jackrabbit/branches/1.5/jackrabbit-core/src/test/repository/workspaces/indexing-test/workspace.xml?rev=767551&r1=767550&r2=767551&view=diff
==============================================================================
--- jackrabbit/branches/1.5/jackrabbit-core/src/test/repository/workspaces/indexing-test/workspace.xml (original)
+++ jackrabbit/branches/1.5/jackrabbit-core/src/test/repository/workspaces/indexing-test/workspace.xml Wed Apr 22 14:49:48 2009
@@ -41,6 +41,7 @@
     <param name="excerptProviderClass" value="org.apache.jackrabbit.core.query.lucene.WeightedHTMLExcerpt"/>
     <param name="textFilterClasses" value="org.apache.jackrabbit.extractor.PlainTextExtractor,org.apache.jackrabbit.core.query.lucene.IndexingQueueTest$Extractor"/>
     <param name="extractorPoolSize" value="2"/>
+    <param name="extractorTimeout" value="10"/>
   </SearchIndex>
 </Workspace>