You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2013/06/01 17:30:16 UTC

svn commit: r1488537 [1/3] - in /manifoldcf/branches/CONNECTORS-694: connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/ connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/go...

Author: kwright
Date: Sat Jun  1 15:30:15 2013
New Revision: 1488537

URL: http://svn.apache.org/r1488537
Log:
Revise connector to have at least decent threading.

Added:
    manifoldcf/branches/CONNECTORS-694/connectors/googledrive/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/googledrive/common_ja_JP.properties   (with props)
    manifoldcf/branches/CONNECTORS-694/framework/core/src/main/java/org/apache/manifoldcf/core/common/XThreadStringBuffer.java   (with props)
Removed:
    manifoldcf/branches/CONNECTORS-694/connectors/googledrive/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/googledrive/common_en_US.properties
    manifoldcf/branches/CONNECTORS-694/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/PageBuffer.java
Modified:
    manifoldcf/branches/CONNECTORS-694/connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxRepositoryConnector.java
    manifoldcf/branches/CONNECTORS-694/connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxSession.java
    manifoldcf/branches/CONNECTORS-694/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveConfig.java   (contents, props changed)
    manifoldcf/branches/CONNECTORS-694/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveRepositoryConnector.java   (contents, props changed)
    manifoldcf/branches/CONNECTORS-694/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveSession.java   (contents, props changed)
    manifoldcf/branches/CONNECTORS-694/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/Messages.java   (contents, props changed)
    manifoldcf/branches/CONNECTORS-694/connectors/googledrive/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/googledrive/common_en_US.properties
    manifoldcf/branches/CONNECTORS-694/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java
    manifoldcf/branches/CONNECTORS-694/framework/core/src/main/java/org/apache/manifoldcf/core/common/XThreadInputStream.java

Modified: manifoldcf/branches/CONNECTORS-694/connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxRepositoryConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-694/connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxRepositoryConnector.java?rev=1488537&r1=1488536&r2=1488537&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-694/connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxRepositoryConnector.java (original)
+++ manifoldcf/branches/CONNECTORS-694/connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxRepositoryConnector.java Sat Jun  1 15:30:15 2013
@@ -17,12 +17,10 @@
 * limitations under the License.
 */
 
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
 package org.apache.manifoldcf.crawler.connectors.dropbox;
 
+import org.apache.manifoldcf.core.common.*;
+
 import com.dropbox.client2.DropboxAPI;
 import com.dropbox.client2.exception.DropboxException;
 import java.io.IOException;
@@ -674,20 +672,24 @@ public class DropboxRepositoryConnector 
       i++;
     }
     
-    HashSet<String> seeds = getSeeds(dropboxPath);
-    for (String seed : seeds) {
-      activities.addSeedDocument(seed);
-    }
-
-  }
-
-  protected HashSet<String> getSeeds(String path)
-    throws ManifoldCFException, ServiceInterruption {
     getSession();
-    GetSeedsThread t = new GetSeedsThread(path);
+    XThreadStringBuffer seedBuffer = new XThreadStringBuffer();
+    GetSeedsThread t = new GetSeedsThread(dropboxPath, seedBuffer);
     try {
       t.start();
+      
+      // Pick up the paths, and add them to the activities, before we join with the child thread.
+      while (true) {
+        // The only kind of exceptions this can throw are going to shut the process down.
+        String docPath = seedBuffer.fetch();
+        if (docPath ==  null)
+          break;
+        // Add the pageID to the queue
+        activities.addSeedDocument(docPath);
+      }
+
       t.join();
+
       Throwable thr = t.getException();
       if (thr != null) {
         if (thr instanceof DropboxException) {
@@ -705,35 +707,34 @@ public class DropboxRepositoryConnector 
     } catch (DropboxException e) {
       Logging.connectors.error("DROPBOX: Error adding seed documents: " + e.getMessage(), e);
       handleDropboxException(e);
+    } finally {
+      // Make SURE buffer is dead, otherwise child thread may well hang waiting on it
+      seedBuffer.abandon();
     }
-    return t.getResponse();
   }
 
   protected class GetSeedsThread extends Thread {
 
     protected Throwable exception = null;
-    protected HashSet<String> response = null;
-    protected String path = null;
+    protected final String path;
+    protected final XThreadStringBuffer seedBuffer;
     
-    public GetSeedsThread(String path) {
+    public GetSeedsThread(String path, XThreadStringBuffer seedBuffer) {
       super();
-      this.path=path;
+      this.path = path;
+      this.seedBuffer = seedBuffer;
       setDaemon(true);
     }
 
     @Override
     public void run() {
       try {
-        response = session.getSeeds(path,25000); //upper limit on files to get supported by dropbox api in a single directory
+        session.getSeeds(seedBuffer,path,25000); //upper limit on files to get supported by dropbox api in a single directory
       } catch (Throwable e) {
         this.exception = e;
       }
     }
 
-    public HashSet<String> getResponse() {
-      return response;
-    }
-    
     public Throwable getException() {
       return exception;
     }

Modified: manifoldcf/branches/CONNECTORS-694/connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxSession.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-694/connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxSession.java?rev=1488537&r1=1488536&r2=1488537&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-694/connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxSession.java (original)
+++ manifoldcf/branches/CONNECTORS-694/connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxSession.java Sat Jun  1 15:30:15 2013
@@ -23,6 +23,8 @@
  */
 package org.apache.manifoldcf.crawler.connectors.dropbox;
 
+import org.apache.manifoldcf.core.common.*;
+
 import com.dropbox.client2.session.AppKeyPair;
 import java.util.Map;
 import com.dropbox.client2.session.WebAuthSession;
@@ -73,23 +75,22 @@ public class DropboxSession {
     return info;
   }
 
-    public HashSet<String> getSeeds(String path, int max_dirs) throws DropboxException {
-        HashSet<String> ids = new HashSet<String>();
+  public void getSeeds(XThreadStringBuffer idBuffer, String path, int max_dirs)
+    throws DropboxException, InterruptedException {
 
-        ids.add(path); //need to add root dir so that single files such as /file1 will still get read
+    idBuffer.add(path); //need to add root dir so that single files such as /file1 will still get read
         
         
-        DropboxAPI.Entry root_entry = client.metadata(path, max_dirs, null, true, null);
-        List<DropboxAPI.Entry> entries = root_entry.contents; //gets a list of the contents of the entire folder: subfolders + files
+    DropboxAPI.Entry root_entry = client.metadata(path, max_dirs, null, true, null);
+    List<DropboxAPI.Entry> entries = root_entry.contents; //gets a list of the contents of the entire folder: subfolders + files
 
-        // Apply the entries one by one.
-        for (DropboxAPI.Entry e : entries) {
-            if (e.isDir) { //only add the directories as seeds, we'll add the files later
-                ids.add(e.path);
-            }
-        }
-        return ids;
+    // Apply the entries one by one.
+    for (DropboxAPI.Entry e : entries) {
+      if (e.isDir) { //only add the directories as seeds, we'll add the files later
+        idBuffer.add(e.path);
+      }
     }
+  }
   
   public DropboxAPI.Entry getObject(String id) throws DropboxException {
     return client.metadata(id, 25000, null, true, null);

Modified: manifoldcf/branches/CONNECTORS-694/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveConfig.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-694/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveConfig.java?rev=1488537&r1=1488536&r2=1488537&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-694/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveConfig.java (original)
+++ manifoldcf/branches/CONNECTORS-694/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveConfig.java Sat Jun  1 15:30:15 2013
@@ -1,19 +1,34 @@
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
-package org.apache.manifoldcf.crawler.connectors.googledrive;
-
-/**
- *
- * @author andrew
- */
-public class GoogleDriveConfig {
-
-    public static final String CLIENT_ID_PARAM = "clientid";
-    public static final String CLIENT_SECRET_PARAM = "clientsecret";
-    public static final String REFRESH_TOKEN_PARAM = "refreshtoken";
-    public static final String REPOSITORY_ID_DEFAULT_VALUE = "googledrive";
-    public static final String GOOGLEDRIVE_QUERY_PARAM = "googledriveQuery";
-    public static final String GOOGLEDRIVE_QUERY_DEFAULT = "mimeType='application/vnd.google-apps.folder' and trashed=false";
-}
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.manifoldcf.crawler.connectors.googledrive;
+
+/**
+ *
+ * @author andrew
+ */
+public class GoogleDriveConfig {
+
+  public static final String CLIENT_ID_PARAM = "clientid";
+  public static final String CLIENT_SECRET_PARAM = "clientsecret";
+  public static final String REFRESH_TOKEN_PARAM = "refreshtoken";
+  public static final String REPOSITORY_ID_DEFAULT_VALUE = "googledrive";
+  public static final String GOOGLEDRIVE_QUERY_PARAM = "googledriveQuery";
+  public static final String GOOGLEDRIVE_QUERY_DEFAULT = "mimeType='application/vnd.google-apps.folder' and trashed=false";
+}

Propchange: manifoldcf/branches/CONNECTORS-694/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveConfig.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: manifoldcf/branches/CONNECTORS-694/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveConfig.java
------------------------------------------------------------------------------
    svn:keywords = Id