You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2013/06/01 17:30:16 UTC
svn commit: r1488537 [1/3] - in /manifoldcf/branches/CONNECTORS-694:
connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/
connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/go...
Author: kwright
Date: Sat Jun 1 15:30:15 2013
New Revision: 1488537
URL: http://svn.apache.org/r1488537
Log:
Revise connector to have at least decent threading.
Added:
manifoldcf/branches/CONNECTORS-694/connectors/googledrive/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/googledrive/common_ja_JP.properties (with props)
manifoldcf/branches/CONNECTORS-694/framework/core/src/main/java/org/apache/manifoldcf/core/common/XThreadStringBuffer.java (with props)
Removed:
manifoldcf/branches/CONNECTORS-694/connectors/googledrive/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/googledrive/common_en_US.properties
manifoldcf/branches/CONNECTORS-694/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/PageBuffer.java
Modified:
manifoldcf/branches/CONNECTORS-694/connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxRepositoryConnector.java
manifoldcf/branches/CONNECTORS-694/connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxSession.java
manifoldcf/branches/CONNECTORS-694/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveConfig.java (contents, props changed)
manifoldcf/branches/CONNECTORS-694/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveRepositoryConnector.java (contents, props changed)
manifoldcf/branches/CONNECTORS-694/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveSession.java (contents, props changed)
manifoldcf/branches/CONNECTORS-694/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/Messages.java (contents, props changed)
manifoldcf/branches/CONNECTORS-694/connectors/googledrive/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/googledrive/common_en_US.properties
manifoldcf/branches/CONNECTORS-694/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java
manifoldcf/branches/CONNECTORS-694/framework/core/src/main/java/org/apache/manifoldcf/core/common/XThreadInputStream.java
Modified: manifoldcf/branches/CONNECTORS-694/connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxRepositoryConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-694/connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxRepositoryConnector.java?rev=1488537&r1=1488536&r2=1488537&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-694/connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxRepositoryConnector.java (original)
+++ manifoldcf/branches/CONNECTORS-694/connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxRepositoryConnector.java Sat Jun 1 15:30:15 2013
@@ -17,12 +17,10 @@
* limitations under the License.
*/
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
package org.apache.manifoldcf.crawler.connectors.dropbox;
+import org.apache.manifoldcf.core.common.*;
+
import com.dropbox.client2.DropboxAPI;
import com.dropbox.client2.exception.DropboxException;
import java.io.IOException;
@@ -674,20 +672,24 @@ public class DropboxRepositoryConnector
i++;
}
- HashSet<String> seeds = getSeeds(dropboxPath);
- for (String seed : seeds) {
- activities.addSeedDocument(seed);
- }
-
- }
-
- protected HashSet<String> getSeeds(String path)
- throws ManifoldCFException, ServiceInterruption {
getSession();
- GetSeedsThread t = new GetSeedsThread(path);
+ XThreadStringBuffer seedBuffer = new XThreadStringBuffer();
+ GetSeedsThread t = new GetSeedsThread(dropboxPath, seedBuffer);
try {
t.start();
+
+ // Pick up the paths, and add them to the activities, before we join with the child thread.
+ while (true) {
+ // The only kind of exceptions this can throw are going to shut the process down.
+ String docPath = seedBuffer.fetch();
+ if (docPath == null)
+ break;
+ // Add the pageID to the queue
+ activities.addSeedDocument(docPath);
+ }
+
t.join();
+
Throwable thr = t.getException();
if (thr != null) {
if (thr instanceof DropboxException) {
@@ -705,35 +707,34 @@ public class DropboxRepositoryConnector
} catch (DropboxException e) {
Logging.connectors.error("DROPBOX: Error adding seed documents: " + e.getMessage(), e);
handleDropboxException(e);
+ } finally {
+ // Make SURE buffer is dead, otherwise child thread may well hang waiting on it
+ seedBuffer.abandon();
}
- return t.getResponse();
}
protected class GetSeedsThread extends Thread {
protected Throwable exception = null;
- protected HashSet<String> response = null;
- protected String path = null;
+ protected final String path;
+ protected final XThreadStringBuffer seedBuffer;
- public GetSeedsThread(String path) {
+ public GetSeedsThread(String path, XThreadStringBuffer seedBuffer) {
super();
- this.path=path;
+ this.path = path;
+ this.seedBuffer = seedBuffer;
setDaemon(true);
}
@Override
public void run() {
try {
- response = session.getSeeds(path,25000); //upper limit on files to get supported by dropbox api in a single directory
+ session.getSeeds(seedBuffer,path,25000); //upper limit on files to get supported by dropbox api in a single directory
} catch (Throwable e) {
this.exception = e;
}
}
- public HashSet<String> getResponse() {
- return response;
- }
-
public Throwable getException() {
return exception;
}
Modified: manifoldcf/branches/CONNECTORS-694/connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxSession.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-694/connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxSession.java?rev=1488537&r1=1488536&r2=1488537&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-694/connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxSession.java (original)
+++ manifoldcf/branches/CONNECTORS-694/connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxSession.java Sat Jun 1 15:30:15 2013
@@ -23,6 +23,8 @@
*/
package org.apache.manifoldcf.crawler.connectors.dropbox;
+import org.apache.manifoldcf.core.common.*;
+
import com.dropbox.client2.session.AppKeyPair;
import java.util.Map;
import com.dropbox.client2.session.WebAuthSession;
@@ -73,23 +75,22 @@ public class DropboxSession {
return info;
}
- public HashSet<String> getSeeds(String path, int max_dirs) throws DropboxException {
- HashSet<String> ids = new HashSet<String>();
+ public void getSeeds(XThreadStringBuffer idBuffer, String path, int max_dirs)
+ throws DropboxException, InterruptedException {
- ids.add(path); //need to add root dir so that single files such as /file1 will still get read
+ idBuffer.add(path); //need to add root dir so that single files such as /file1 will still get read
- DropboxAPI.Entry root_entry = client.metadata(path, max_dirs, null, true, null);
- List<DropboxAPI.Entry> entries = root_entry.contents; //gets a list of the contents of the entire folder: subfolders + files
+ DropboxAPI.Entry root_entry = client.metadata(path, max_dirs, null, true, null);
+ List<DropboxAPI.Entry> entries = root_entry.contents; //gets a list of the contents of the entire folder: subfolders + files
- // Apply the entries one by one.
- for (DropboxAPI.Entry e : entries) {
- if (e.isDir) { //only add the directories as seeds, we'll add the files later
- ids.add(e.path);
- }
- }
- return ids;
+ // Apply the entries one by one.
+ for (DropboxAPI.Entry e : entries) {
+ if (e.isDir) { //only add the directories as seeds, we'll add the files later
+ idBuffer.add(e.path);
+ }
}
+ }
public DropboxAPI.Entry getObject(String id) throws DropboxException {
return client.metadata(id, 25000, null, true, null);
Modified: manifoldcf/branches/CONNECTORS-694/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveConfig.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-694/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveConfig.java?rev=1488537&r1=1488536&r2=1488537&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-694/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveConfig.java (original)
+++ manifoldcf/branches/CONNECTORS-694/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveConfig.java Sat Jun 1 15:30:15 2013
@@ -1,19 +1,34 @@
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
-package org.apache.manifoldcf.crawler.connectors.googledrive;
-
-/**
- *
- * @author andrew
- */
-public class GoogleDriveConfig {
-
- public static final String CLIENT_ID_PARAM = "clientid";
- public static final String CLIENT_SECRET_PARAM = "clientsecret";
- public static final String REFRESH_TOKEN_PARAM = "refreshtoken";
- public static final String REPOSITORY_ID_DEFAULT_VALUE = "googledrive";
- public static final String GOOGLEDRIVE_QUERY_PARAM = "googledriveQuery";
- public static final String GOOGLEDRIVE_QUERY_DEFAULT = "mimeType='application/vnd.google-apps.folder' and trashed=false";
-}
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.manifoldcf.crawler.connectors.googledrive;
+
+/**
+ *
+ * @author andrew
+ */
+public class GoogleDriveConfig {
+
+ public static final String CLIENT_ID_PARAM = "clientid";
+ public static final String CLIENT_SECRET_PARAM = "clientsecret";
+ public static final String REFRESH_TOKEN_PARAM = "refreshtoken";
+ public static final String REPOSITORY_ID_DEFAULT_VALUE = "googledrive";
+ public static final String GOOGLEDRIVE_QUERY_PARAM = "googledriveQuery";
+ public static final String GOOGLEDRIVE_QUERY_DEFAULT = "mimeType='application/vnd.google-apps.folder' and trashed=false";
+}
Propchange: manifoldcf/branches/CONNECTORS-694/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveConfig.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: manifoldcf/branches/CONNECTORS-694/connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveConfig.java
------------------------------------------------------------------------------
svn:keywords = Id