You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2013/04/22 16:41:36 UTC
svn commit: r1470550 [1/2] - in /manifoldcf/branches/CONNECTORS-676: ./
connectors/dropbox/
connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/
dist-license/ framework/core/src/main/java/org/apache/manifoldcf/co...
Author: kwright
Date: Mon Apr 22 14:41:36 2013
New Revision: 1470550
URL: http://svn.apache.org/r1470550
Log:
Various changes, including reformatting and getting the thread structure correct.
Modified:
manifoldcf/branches/CONNECTORS-676/build.xml
manifoldcf/branches/CONNECTORS-676/connectors/dropbox/build.xml
manifoldcf/branches/CONNECTORS-676/connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxRepositoryConnector.java
manifoldcf/branches/CONNECTORS-676/connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxSession.java
manifoldcf/branches/CONNECTORS-676/dist-license/LICENSE.txt
manifoldcf/branches/CONNECTORS-676/framework/core/src/main/java/org/apache/manifoldcf/core/common/XThreadInputStream.java
Modified: manifoldcf/branches/CONNECTORS-676/build.xml
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-676/build.xml?rev=1470550&r1=1470549&r2=1470550&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-676/build.xml (original)
+++ manifoldcf/branches/CONNECTORS-676/build.xml Mon Apr 22 14:41:36 2013
@@ -1427,7 +1427,7 @@
</antcall>
<antcall target="general-add-repository-connector">
<param name="connector-name" value="dropbox"/>
- <param name="connector-label" value="dropbox"/>
+ <param name="connector-label" value="DropBox"/>
<param name="connector-class" value="org.apache.manifoldcf.crawler.connectors.dropbox.DropboxRepositoryConnector"/>
</antcall>
</target>
Modified: manifoldcf/branches/CONNECTORS-676/connectors/dropbox/build.xml
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-676/connectors/dropbox/build.xml?rev=1470550&r1=1470549&r2=1470550&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-676/connectors/dropbox/build.xml (original)
+++ manifoldcf/branches/CONNECTORS-676/connectors/dropbox/build.xml Mon Apr 22 14:41:36 2013
@@ -23,7 +23,7 @@
<path refid="mcf-connector-build.connector-classpath"/>
<fileset dir="../../lib">
<include name="dropbox-client*.jar"/>
- <include name="json_simple*.jar"/>
+ <include name="json-simple*.jar"/>
</fileset>
</path>
@@ -32,7 +32,7 @@
<copy todir="dist/lib">
<fileset dir="../../lib">
<include name="dropbox*.jar"/>
- <include name="json_simple*.jar"/>
+ <include name="json-simple*.jar"/>
</fileset>
</copy>
</target>
Modified: manifoldcf/branches/CONNECTORS-676/connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxRepositoryConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-676/connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxRepositoryConnector.java?rev=1470550&r1=1470549&r2=1470550&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-676/connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxRepositoryConnector.java (original)
+++ manifoldcf/branches/CONNECTORS-676/connectors/dropbox/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/dropbox/DropboxRepositoryConnector.java Mon Apr 22 14:41:36 2013
@@ -28,8 +28,6 @@ import com.dropbox.client2.exception.Dro
import java.io.IOException;
import java.io.InputStream;
import java.io.InterruptedIOException;
-import java.rmi.NotBoundException;
-import java.rmi.RemoteException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@@ -40,6 +38,7 @@ import org.apache.manifoldcf.crawler.con
import org.apache.manifoldcf.agents.interfaces.ServiceInterruption;
import org.apache.manifoldcf.core.interfaces.ConfigParams;
import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
+import org.apache.manifoldcf.core.common.XThreadInputStream;
import org.apache.commons.lang.StringUtils;
import org.apache.manifoldcf.agents.interfaces.RepositoryDocument;
import org.apache.manifoldcf.core.interfaces.IHTTPOutput;
@@ -57,987 +56,1158 @@ import org.apache.log4j.Logger;
*/
public class DropboxRepositoryConnector extends BaseRepositoryConnector {
- protected final static String ACTIVITY_READ = "read document";
- public final static String ACTIVITY_FETCH = "fetch";
- protected static final String RELATIONSHIP_CHILD = "child";
- private static final String JOB_STARTPOINT_NODE_TYPE = "startpoint";
- private static final String DROPBOX_SERVER_TAB_PROPERTY = "DropboxRepositoryConnector.Server";
- private static final String DROPBOX_QUERY_TAB_PROPERTY = "DropboxRepositoryConnector.DROPBOXConfig";
- // Template names
- /**
- * Forward to the javascript to check the configuration parameters
- */
- private static final String EDIT_CONFIG_HEADER_FORWARD = "editConfiguration.js";
- /**
- * Server tab template
- */
- private static final String EDIT_CONFIG_FORWARD_SERVER = "editConfiguration_Server.html";
- /**
- * Forward to the javascript to check the specification parameters for the
- * job
- */
- private static final String EDIT_SPEC_HEADER_FORWARD = "editSpecification.js";
- /**
- * Forward to the template to edit the configuration parameters for the job
- */
- private static final String EDIT_SPEC_FORWARD_DROPBOXQUERY = "editSpecification_DROPBOXQuery.html";
- /**
- * Forward to the HTML template to view the configuration parameters
- */
- private static final String VIEW_CONFIG_FORWARD = "viewConfiguration.html";
- /**
- * Forward to the template to view the specification parameters for the job
- */
- private static final String VIEW_SPEC_FORWARD = "viewSpecification.html";
- /**
- * Endpoint server name
- */
- protected String server = "dropbox";
- protected DropboxSession session = null;
- protected long lastSessionFetch = -1L;
- protected static final long timeToRelease = 300000L;
-
- protected String app_key = null;
- protected String app_secret = null;
- protected String key = null;
- protected String secret = null;
- protected String path = null;
- protected Map<String, String> parameters = new HashMap<String, String>();
-
- public DropboxRepositoryConnector() {
- super();
- }
-
- /**
- * Return the list of activities that this connector supports (i.e. writes
- * into the log).
- *
- * @return the list.
- */
- @Override
- public String[] getActivitiesList() {
- return new String[]{ACTIVITY_FETCH, ACTIVITY_READ};
+ protected final static String ACTIVITY_READ = "read document";
+ public final static String ACTIVITY_FETCH = "fetch";
+ protected static final String RELATIONSHIP_CHILD = "child";
+ private static final String JOB_STARTPOINT_NODE_TYPE = "startpoint";
+ private static final String DROPBOX_SERVER_TAB_PROPERTY = "DropboxRepositoryConnector.Server";
+ private static final String DROPBOX_QUERY_TAB_PROPERTY = "DropboxRepositoryConnector.DROPBOXConfig";
+ // Template names
+ /**
+ * Forward to the javascript to check the configuration parameters
+ */
+ private static final String EDIT_CONFIG_HEADER_FORWARD = "editConfiguration.js";
+ /**
+ * Server tab template
+ */
+ private static final String EDIT_CONFIG_FORWARD_SERVER = "editConfiguration_Server.html";
+ /**
+ * Forward to the javascript to check the specification parameters for the
+ * job
+ */
+ private static final String EDIT_SPEC_HEADER_FORWARD = "editSpecification.js";
+ /**
+ * Forward to the template to edit the configuration parameters for the job
+ */
+ private static final String EDIT_SPEC_FORWARD_DROPBOXQUERY = "editSpecification_DROPBOXQuery.html";
+ /**
+ * Forward to the HTML template to view the configuration parameters
+ */
+ private static final String VIEW_CONFIG_FORWARD = "viewConfiguration.html";
+ /**
+ * Forward to the template to view the specification parameters for the job
+ */
+ private static final String VIEW_SPEC_FORWARD = "viewSpecification.html";
+ /**
+ * Endpoint server name
+ */
+ protected String server = "dropbox";
+ protected DropboxSession session = null;
+ protected long lastSessionFetch = -1L;
+ protected static final long timeToRelease = 300000L;
+
+ protected String app_key = null;
+ protected String app_secret = null;
+ protected String key = null;
+ protected String secret = null;
+ protected String path = null;
+
+ public DropboxRepositoryConnector() {
+ super();
+ }
+
+ /**
+ * Return the list of activities that this connector supports (i.e. writes
+ * into the log).
+ *
+ * @return the list.
+ */
+ @Override
+ public String[] getActivitiesList() {
+ return new String[]{ACTIVITY_FETCH, ACTIVITY_READ};
+ }
+
+ /**
+ * Get the bin name strings for a document identifier. The bin name
+ * describes the queue to which the document will be assigned for throttling
+ * purposes. Throttling controls the rate at which items in a given queue
+ * are fetched; it does not say anything about the overall fetch rate, which
+ * may operate on multiple queues or bins. For example, if you implement a
+ * web crawler, a good choice of bin name would be the server name, since
+ * that is likely to correspond to a real resource that will need real
+ * throttle protection.
+ *
+ * @param documentIdentifier is the document identifier.
+ * @return the set of bin names. If an empty array is returned, it is
+ * equivalent to there being no request rate throttling available for this
+ * identifier.
+ */
+ @Override
+ public String[] getBinNames(String documentIdentifier) {
+ return new String[]{server};
+ }
+
+ /**
+ * Close the connection. Call this before discarding the connection.
+ */
+ @Override
+ public void disconnect() throws ManifoldCFException {
+ if (session != null) {
+ session.close();
+ session = null;
+ lastSessionFetch = -1L;
+ }
+
+ app_key = null;
+ app_secret= null;
+ key = null;
+ secret = null;
+ path = null;
+ }
+
+ /**
+ * This method create a new DROPBOX session for a DROPBOX repository, if the
+ * repositoryId is not provided in the configuration, the connector will
+ * retrieve all the repositories exposed for this endpoint the it will start
+ * to use the first one.
+ *
+ * @param configParameters is the set of configuration parameters, which in
+ * this case describe the target appliance, basic auth configuration, etc.
+ * (This formerly came out of the ini file.)
+ */
+ @Override
+ public void connect(ConfigParams configParams) {
+ super.connect(configParams);
+
+ app_key=params.getParameter(DropboxConfig.APP_KEY_PARAM);
+ app_secret=params.getParameter(DropboxConfig.APP_SECRET_PARAM);
+ key = params.getParameter(DropboxConfig.KEY_PARAM);
+ secret = params.getParameter(DropboxConfig.SECRET_PARAM);
+ path = params.getParameter(DropboxConfig.PATH_PARAM);
+ }
+
+ /**
+ * Test the connection. Returns a string describing the connection
+ * integrity.
+ *
+ * @return the connection's status as a displayable string.
+ */
+ @Override
+ public String check() throws ManifoldCFException {
+ try {
+ checkConnection();
+ return super.check();
+ } catch (ServiceInterruption e) {
+ return "Connection temporarily failed: " + e.getMessage();
+ } catch (ManifoldCFException e) {
+ return "Connection failed: " + e.getMessage();
+ }
+ }
+
+ protected void checkConnection()
+ throws ManifoldCFException, ServiceInterruption {
+ getSession();
+ CheckConnectionThread t = new CheckConnectionThread();
+ try {
+ t.start();
+ t.join();
+ Throwable thr = t.getException();
+ if (thr != null) {
+ if (thr instanceof DropboxException) {
+ throw (DropboxException) thr;
+ } else if (thr instanceof RuntimeException) {
+ throw (RuntimeException) thr;
+ } else {
+ throw (Error) thr;
+ }
+ }
+ return;
+ } catch (InterruptedException e) {
+ t.interrupt();
+ throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
+ ManifoldCFException.INTERRUPTED);
+ } catch (DropboxException e) {
+ Logging.connectors.warn("DROPBOX: Error checking repository: " + e.getMessage(), e);
+ handleDropboxException(e);
+ }
+ }
+
+ protected class CheckConnectionThread extends Thread {
+
+ protected Throwable exception = null;
+
+ public CheckConnectionThread() {
+ super();
+ setDaemon(true);
+ }
+
+ public void run() {
+ try {
+ session.getRepositoryInfo();
+ } catch (Throwable e) {
+ this.exception = e;
+ }
+ }
+
+ public Throwable getException() {
+ return exception;
+ }
+ }
+
+ /**
+ * Set up a session
+ */
+ protected void getSession() throws ManifoldCFException, ServiceInterruption {
+ if (session == null) {
+ // Check for parameter validity
+
+ if (StringUtils.isEmpty(app_key)) {
+ throw new ManifoldCFException("Parameter " + DropboxConfig.APP_KEY_PARAM
+ + " required but not set");
+ }
+
+ if (StringUtils.isEmpty(app_secret)) {
+ throw new ManifoldCFException("Parameter " + DropboxConfig.APP_SECRET_PARAM
+ + " required but not set");
+ }
+
+
+ if (StringUtils.isEmpty(key)) {
+ throw new ManifoldCFException("Parameter " + DropboxConfig.KEY_PARAM
+ + " required but not set");
+ }
+
+ if (Logging.connectors.isDebugEnabled()) {
+ Logging.connectors.debug("DROPBOX: Username = '" + key + "'");
+ }
+
+ if (StringUtils.isEmpty(secret)) {
+ throw new ManifoldCFException("Parameter " + DropboxConfig.SECRET_PARAM
+ + " required but not set");
+ }
+
+ Logging.connectors.debug("DROPBOX: Password exists");
+
+ if (StringUtils.isEmpty(path)) {
+ throw new ManifoldCFException("Parameter " + DropboxConfig.PATH_PARAM
+ + " required but not set");
+ }
+
+ // Create a session
+ Map<String, String> parameters = new HashMap<String, String>();
+
+ // user credentials
+ parameters.put(DropboxConfig.APP_KEY_PARAM, app_key);
+ parameters.put(DropboxConfig.APP_SECRET_PARAM, app_secret);
+
+ parameters.put(DropboxConfig.KEY_PARAM, key);
+ parameters.put(DropboxConfig.SECRET_PARAM, secret);
+
+ session = new DropboxSession(parameters);
+ lastSessionFetch = System.currentTimeMillis();
+ }
+ }
+
+ @Override
+ public void poll() throws ManifoldCFException {
+ if (lastSessionFetch == -1L) {
+ return;
+ }
+
+ long currentTime = System.currentTimeMillis();
+ if (currentTime >= lastSessionFetch + timeToRelease) {
+ session.close();
+ session = null;
+ lastSessionFetch = -1L;
+ }
+
+ }
+
+ /**
+ * Get the maximum number of documents to amalgamate together into one
+ * batch, for this connector.
+ *
+ * @return the maximum number. 0 indicates "unlimited".
+ */
+ @Override
+ public int getMaxDocumentRequest() {
+ return 1;
+ }
+
+ /**
+ * Return the list of relationship types that this connector recognizes.
+ *
+ * @return the list.
+ */
+ @Override
+ public String[] getRelationshipTypes() {
+ return new String[]{RELATIONSHIP_CHILD};
+ }
+
+ /**
+ * Fill in a Server tab configuration parameter map for calling a Velocity
+ * template.
+ *
+ * @param newMap is the map to fill in
+ * @param parameters is the current set of configuration parameters
+ */
+ private static void fillInServerConfigurationMap(Map<String, String> newMap, ConfigParams parameters) {
+
+ String app_key = parameters.getParameter(DropboxConfig.APP_KEY_PARAM);
+ String app_secret = parameters.getParameter(DropboxConfig.APP_SECRET_PARAM);
+
+ String username = parameters.getParameter(DropboxConfig.KEY_PARAM);
+ String password = parameters.getParameter(DropboxConfig.SECRET_PARAM);
+ String path = parameters.getParameter(DropboxConfig.PATH_PARAM);
+
+ if (app_key == null) {
+ app_key = StringUtils.EMPTY;
}
-
- /**
- * Get the bin name strings for a document identifier. The bin name
- * describes the queue to which the document will be assigned for throttling
- * purposes. Throttling controls the rate at which items in a given queue
- * are fetched; it does not say anything about the overall fetch rate, which
- * may operate on multiple queues or bins. For example, if you implement a
- * web crawler, a good choice of bin name would be the server name, since
- * that is likely to correspond to a real resource that will need real
- * throttle protection.
- *
- * @param documentIdentifier is the document identifier.
- * @return the set of bin names. If an empty array is returned, it is
- * equivalent to there being no request rate throttling available for this
- * identifier.
- */
- @Override
- public String[] getBinNames(String documentIdentifier) {
- return new String[]{server};
+
+ if (app_secret == null) {
+ app_secret = StringUtils.EMPTY;
}
-
- /**
- * Close the connection. Call this before discarding the connection.
- */
- @Override
- public void disconnect() throws ManifoldCFException {
- if (session != null) {
- DestroySessionThread t = new DestroySessionThread();
- try {
- t.start();
- t.join();
- Throwable thr = t.getException();
- if (thr != null) {
- if (thr instanceof RemoteException) {
- throw (RemoteException) thr;
- } else {
- throw (Error) thr;
- }
- }
- session = null;
- lastSessionFetch = -1L;
- } catch (InterruptedException e) {
- t.interrupt();
- throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
- ManifoldCFException.INTERRUPTED);
- } catch (RemoteException e) {
- Throwable e2 = e.getCause();
- if (e2 instanceof InterruptedException
- || e2 instanceof InterruptedIOException) {
-
- throw new ManifoldCFException(e2.getMessage(), e2,
- ManifoldCFException.INTERRUPTED);
- }
-
- session = null;
- lastSessionFetch = -1L;
-
- // Treat this as a transient problem
-
- Logging.connectors.warn(
- "DROPBOX: Transient remote exception closing session: "
- + e.getMessage(), e);
- }
-
- }
-
- app_key = null;
- app_secret= null;
- key = null;
- secret = null;
- path = null;
- }
-
- /**
- * This method create a new DROPBOX session for a DROPBOX repository, if the
- * repositoryId is not provided in the configuration, the connector will
- * retrieve all the repositories exposed for this endpoint the it will start
- * to use the first one.
- *
- * @param configParameters is the set of configuration parameters, which in
- * this case describe the target appliance, basic auth configuration, etc.
- * (This formerly came out of the ini file.)
- */
- @Override
- public void connect(ConfigParams configParams) {
- super.connect(configParams);
-
- app_key=params.getParameter(DropboxConfig.APP_KEY_PARAM);
- app_secret=params.getParameter(DropboxConfig.APP_SECRET_PARAM);
- key = params.getParameter(DropboxConfig.KEY_PARAM);
- secret = params.getParameter(DropboxConfig.SECRET_PARAM);
- path = params.getParameter(DropboxConfig.PATH_PARAM);
- }
-
- /**
- * Test the connection. Returns a string describing the connection
- * integrity.
- *
- * @return the connection's status as a displayable string.
- */
- @Override
- public String check() throws ManifoldCFException {
- try {
- checkConnection();
- return super.check();
- } catch (ServiceInterruption e) {
- return "Connection temporarily failed: " + e.getMessage();
- } catch (ManifoldCFException e) {
- return "Connection failed: " + e.getMessage();
- }
+
+ if (username == null) {
+ username = StringUtils.EMPTY;
}
-
- protected class DestroySessionThread extends Thread {
-
- protected Throwable exception = null;
-
- public DestroySessionThread() {
- super();
- setDaemon(true);
- }
-
- public void run() {
- try {
- session = null;
- } catch (Throwable e) {
- this.exception = e;
- }
- }
-
- public Throwable getException() {
- return exception;
- }
+ if (password == null) {
+ password = StringUtils.EMPTY;
}
+ if (path == null) {
+ path = DropboxConfig.PATH_DEFAULT_VALUE;
+ }
+
+ newMap.put(DropboxConfig.APP_KEY_PARAM, app_key);
+ newMap.put(DropboxConfig.APP_SECRET_PARAM, app_secret);
+ newMap.put(DropboxConfig.KEY_PARAM, username);
+ newMap.put(DropboxConfig.SECRET_PARAM, password);
+ newMap.put(DropboxConfig.PATH_PARAM, path);
+ }
+
+ /**
+ * View configuration. This method is called in the body section of the
+ * connector's view configuration page. Its purpose is to present the
+ * connection information to the user. The coder can presume that the HTML
+ * that is output from this configuration will be within appropriate <html>
+ * and <body> tags.
+ *
+ * @param threadContext is the local thread context.
+ * @param out is the output to which any HTML should be sent.
+ * @param parameters are the configuration parameters, as they currently
+ * exist, for this connection being configured.
+ */
+ @Override
+ public void viewConfiguration(IThreadContext threadContext, IHTTPOutput out,
+ Locale locale, ConfigParams parameters) throws ManifoldCFException, IOException {
+ Map<String, String> paramMap = new HashMap<String, String>();
+
+ // Fill in map from each tab
+ fillInServerConfigurationMap(paramMap, parameters);
+
+ outputResource(VIEW_CONFIG_FORWARD, out, locale, paramMap);
+ }
+
+ /**
+ * Read the content of a resource, replace the variable ${PARAMNAME} with
+ * the value and copy it to the out.
+ *
+ * @param resName
+ * @param out
+ * @throws ManifoldCFException
+ */
+ private static void outputResource(String resName, IHTTPOutput out,
+ Locale locale, Map<String, String> paramMap) throws ManifoldCFException {
+ Messages.outputResourceWithVelocity(out, locale, resName, paramMap, true);
+ }
+
+ /**
+ *
+ * Output the configuration header section. This method is called in the
+ * head section of the connector's configuration page. Its purpose is to add
+ * the required tabs to the list, and to output any javascript methods that
+ * might be needed by the configuration editing HTML.
+ *
+ * @param threadContext is the local thread context.
+ * @param out is the output to which any HTML should be sent.
+ * @param parameters are the configuration parameters, as they currently
+ * exist, for this connection being configured.
+ * @param tabsArray is an array of tab names. Add to this array any tab
+ * names that are specific to the connector.
+ */
+ @Override
+ public void outputConfigurationHeader(IThreadContext threadContext,
+ IHTTPOutput out, Locale locale, ConfigParams parameters, List<String> tabsArray)
+ throws ManifoldCFException, IOException {
+ // Add the Server tab
+ tabsArray.add(Messages.getString(locale, DROPBOX_SERVER_TAB_PROPERTY));
+ // Map the parameters
+ Map<String, String> paramMap = new HashMap<String, String>();
+
+ // Fill in the parameters from each tab
+ fillInServerConfigurationMap(paramMap, parameters);
+
+ // Output the Javascript - only one Velocity template for all tabs
+ outputResource(EDIT_CONFIG_HEADER_FORWARD, out, locale, paramMap);
+ }
+
+ @Override
+ public void outputConfigurationBody(IThreadContext threadContext,
+ IHTTPOutput out, Locale locale, ConfigParams parameters, String tabName)
+ throws ManifoldCFException, IOException {
+
+ // Call the Velocity templates for each tab
+
+ // Server tab
+ Map<String, String> paramMap = new HashMap<String, String>();
+ // Set the tab name
+ paramMap.put("TabName", tabName);
+ // Fill in the parameters
+ fillInServerConfigurationMap(paramMap, parameters);
+ outputResource(EDIT_CONFIG_FORWARD_SERVER, out, locale, paramMap);
+
+ }
+
+ /**
+ * Process a configuration post. This method is called at the start of the
+ * connector's configuration page, whenever there is a possibility that form
+ * data for a connection has been posted. Its purpose is to gather form
+ * information and modify the configuration parameters accordingly. The name
+ * of the posted form is "editconnection".
+ *
+ * @param threadContext is the local thread context.
+ * @param variableContext is the set of variables available from the post,
+ * including binary file post information.
+ * @param parameters are the configuration parameters, as they currently
+ * exist, for this connection being configured.
+ * @return null if all is well, or a string error message if there is an
+ * error that should prevent saving of the connection (and cause a
+ * redirection to an error page).
+ *
+ */
+ @Override
+ public String processConfigurationPost(IThreadContext threadContext,
+ IPostParameters variableContext, ConfigParams parameters)
+ throws ManifoldCFException {
- protected class CheckConnectionThread extends Thread {
-
- protected Throwable exception = null;
-
- public CheckConnectionThread() {
- super();
- setDaemon(true);
- }
-
- public void run() {
- try {
- session.getRepositoryInfo();
- } catch (Throwable e) {
- Logging.connectors.warn("DROPBOX: Error checking repository: " + e.getMessage(), e);
- this.exception = e;
- }
- }
-
- public Throwable getException() {
- return exception;
- }
+
+ String app_key = variableContext.getParameter(DropboxConfig.APP_KEY_PARAM);
+ if (app_key != null) {
+ parameters.setParameter(DropboxConfig.APP_KEY_PARAM, app_key);
+ }
+
+ String app_secret = variableContext.getParameter(DropboxConfig.APP_SECRET_PARAM);
+ if (app_secret != null) {
+ parameters.setParameter(DropboxConfig.APP_SECRET_PARAM, app_secret);
+ }
+
+ String key = variableContext.getParameter(DropboxConfig.KEY_PARAM);
+ if (key != null) {
+ parameters.setParameter(DropboxConfig.KEY_PARAM, key);
+ }
+
+ String secret = variableContext.getParameter(DropboxConfig.SECRET_PARAM);
+ if (secret != null) {
+ parameters.setParameter(DropboxConfig.SECRET_PARAM, secret);
+ }
+
+ String path = variableContext.getParameter(DropboxConfig.PATH_PARAM);
+ if (path != null) {
+ parameters.setParameter(DropboxConfig.PATH_PARAM, path);
+ }
+
+ return null;
+ }
+
+ /**
+ * Fill in specification Velocity parameter map for DROPBOXQuery tab.
+ */
+ private static void fillInDROPBOXQuerySpecificationMap(Map<String, String> newMap, DocumentSpecification ds) {
+ int i = 0;
+ String DropboxQuery = "";
+ while (i < ds.getChildCount()) {
+ SpecificationNode sn = ds.getChild(i);
+ if (sn.getType().equals(JOB_STARTPOINT_NODE_TYPE)) {
+ DropboxQuery = sn.getAttributeValue(DropboxConfig.DROPBOX_QUERY_PARAM);
+ }
+ i++;
+ }
+ newMap.put(DropboxConfig.DROPBOX_QUERY_PARAM, DropboxQuery);
+ }
+
+ /**
+ * View specification. This method is called in the body section of a job's
+ * view page. Its purpose is to present the document specification
+ * information to the user. The coder can presume that the HTML that is
+ * output from this configuration will be within appropriate <html> and
+ * <body> tags.
+ *
+ * @param out is the output to which any HTML should be sent.
+ * @param ds is the current document specification for this job.
+ */
+ @Override
+ public void viewSpecification(IHTTPOutput out, Locale locale, DocumentSpecification ds)
+ throws ManifoldCFException, IOException {
+
+ Map<String, String> paramMap = new HashMap<String, String>();
+
+ // Fill in the map with data from all tabs
+ fillInDROPBOXQuerySpecificationMap(paramMap, ds);
+
+ outputResource(VIEW_SPEC_FORWARD, out, locale, paramMap);
+ }
+
+ /**
+ * Process a specification post. This method is called at the start of job's
+ * edit or view page, whenever there is a possibility that form data for a
+ * connection has been posted. Its purpose is to gather form information and
+ * modify the document specification accordingly. The name of the posted
+ * form is "editjob".
+ *
+ * @param variableContext contains the post data, including binary
+ * file-upload information.
+ * @param ds is the current document specification for this job.
+ * @return null if all is well, or a string error message if there is an
+ * error that should prevent saving of the job (and cause a redirection to
+ * an error page).
+ */
+ @Override
+ public String processSpecificationPost(IPostParameters variableContext,
+ DocumentSpecification ds) throws ManifoldCFException {
+ String cmisQuery = variableContext.getParameter(DropboxConfig.DROPBOX_QUERY_PARAM);
+ if (cmisQuery != null) {
+ int i = 0;
+ while (i < ds.getChildCount()) {
+ SpecificationNode oldNode = ds.getChild(i);
+ if (oldNode.getType().equals(JOB_STARTPOINT_NODE_TYPE)) {
+ ds.removeChild(i);
+ break;
+ }
+ i++;
+ }
+ SpecificationNode node = new SpecificationNode(JOB_STARTPOINT_NODE_TYPE);
+ node.setAttribute(DropboxConfig.DROPBOX_QUERY_PARAM, cmisQuery);
+ variableContext.setParameter(DropboxConfig.DROPBOX_QUERY_PARAM, cmisQuery);
+ ds.addChild(ds.getChildCount(), node);
+ }
+ return null;
+ }
+
+ /**
+ * Output the specification body section. This method is called in the body
+ * section of a job page which has selected a repository connection of the
+ * current type. Its purpose is to present the required form elements for
+ * editing. The coder can presume that the HTML that is output from this
+ * configuration will be within appropriate <html>, <body>, and <form> tags.
+ * The name of the form is "editjob".
+ *
+ * @param out is the output to which any HTML should be sent.
+ * @param ds is the current document specification for this job.
+ * @param tabName is the current tab name.
+ */
+ @Override
+ public void outputSpecificationBody(IHTTPOutput out,
+ Locale locale, DocumentSpecification ds, String tabName) throws ManifoldCFException,
+ IOException {
+
+ // Output DROPBOXQuery tab
+ Map<String, String> paramMap = new HashMap<String, String>();
+ paramMap.put("TabName", tabName);
+ fillInDROPBOXQuerySpecificationMap(paramMap, ds);
+ outputResource(EDIT_SPEC_FORWARD_DROPBOXQUERY, out, locale, paramMap);
+ }
+
+ /**
+ * Output the specification header section. This method is called in the
+ * head section of a job page which has selected a repository connection of
+ * the current type. Its purpose is to add the required tabs to the list,
+ * and to output any javascript methods that might be needed by the job
+ * editing HTML.
+ *
+ * @param out is the output to which any HTML should be sent.
+ * @param ds is the current document specification for this job.
+ * @param tabsArray is an array of tab names. Add to this array any tab
+ * names that are specific to the connector.
+ */
+ @Override
+ public void outputSpecificationHeader(IHTTPOutput out,
+ Locale locale, DocumentSpecification ds, List<String> tabsArray)
+ throws ManifoldCFException, IOException {
+ tabsArray.add(Messages.getString(locale, DROPBOX_QUERY_TAB_PROPERTY));
+
+ Map<String, String> paramMap = new HashMap<String, String>();
+
+ // Fill in the specification header map, using data from all tabs.
+ fillInDROPBOXQuerySpecificationMap(paramMap, ds);
+
+ outputResource(EDIT_SPEC_HEADER_FORWARD, out, locale, paramMap);
+ }
+
+ /**
+ * Queue "seed" documents. Seed documents are the starting places for
+ * crawling activity. Documents are seeded when this method calls
+ * appropriate methods in the passed in ISeedingActivity object.
+ *
+ * This method can choose to find repository changes that happen only during
+ * the specified time interval. The seeds recorded by this method will be
+ * viewed by the framework based on what the getConnectorModel() method
+ * returns.
+ *
+ * It is not a big problem if the connector chooses to create more seeds
+ * than are strictly necessary; it is merely a question of overall work
+ * required.
+ *
+ * The times passed to this method may be interpreted for greatest
+ * efficiency. The time ranges any given job uses with this connector will
+ * not overlap, but will proceed starting at 0 and going to the "current
+ * time", each time the job is run. For continuous crawling jobs, this
+ * method will be called once, when the job starts, and at various periodic
+ * intervals as the job executes.
+ *
+ * When a job's specification is changed, the framework automatically resets
+ * the seeding start time to 0. The seeding start time may also be set to 0
+ * on each job run, depending on the connector model returned by
+ * getConnectorModel().
+ *
+ * Note that it is always ok to send MORE documents rather than less to this
+ * method.
+ *
+ * @param activities is the interface this method should use to perform
+ * whatever framework actions are desired.
+ * @param spec is a document specification (that comes from the job).
+ * @param startTime is the beginning of the time range to consider,
+ * inclusive.
+ * @param endTime is the end of the time range to consider, exclusive.
+ * @param jobMode is an integer describing how the job is being run, whether
+ * continuous or once-only.
+ */
+ @Override
+ public void addSeedDocuments(ISeedingActivity activities,
+ DocumentSpecification spec, long startTime, long endTime, int jobMode)
+ throws ManifoldCFException, ServiceInterruption {
+
+ HashSet<String> seeds = getSeeds();
+ for (String seed : seeds) {
+ activities.addSeedDocument(seed);
+ }
+
+ }
+
+ protected HashSet<String> getSeeds()
+ throws ManifoldCFException, ServiceInterruption {
+ getSession();
+ GetSeedsThread t = new GetSeedsThread();
+ try {
+ t.start();
+ t.join();
+ Throwable thr = t.getException();
+ if (thr != null) {
+ if (thr instanceof DropboxException) {
+ throw (DropboxException) thr;
+ } else if (thr instanceof RuntimeException) {
+ throw (RuntimeException) thr;
+ } else {
+ throw (Error) thr;
+ }
+ }
+ } catch (InterruptedException e) {
+ t.interrupt();
+ throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
+ ManifoldCFException.INTERRUPTED);
+ } catch (DropboxException e) {
+ Logging.connectors.error("DROPBOX: Error adding seed documents: " + e.getMessage(), e);
+ handleDropboxException(e);
+ }
+ return t.getResponse();
+ }
+
+ protected class GetSeedsThread extends Thread {
+
+ protected Throwable exception = null;
+ protected HashSet<String> response = null;
+
+ public GetSeedsThread() {
+ super();
+ setDaemon(true);
+ }
+
+ public void run() {
+ try {
+ response = session.getSeeds();
+ } catch (Throwable e) {
+ this.exception = e;
+ }
}
- protected void checkConnection() throws ManifoldCFException,
- ServiceInterruption {
- while (true) {
- boolean noSession = (session == null);
- getSession();
- long currentTime;
- CheckConnectionThread t = new CheckConnectionThread();
- try {
- t.start();
- t.join();
- Throwable thr = t.getException();
- if (thr != null) {
- if (thr instanceof RemoteException) {
- throw (RemoteException) thr;
- } else {
- throw (Error) thr;
- }
- }
- return;
- } catch (InterruptedException e) {
- t.interrupt();
- throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
- ManifoldCFException.INTERRUPTED);
- } catch (RemoteException e) {
- Throwable e2 = e.getCause();
- if (e2 instanceof InterruptedException
- || e2 instanceof InterruptedIOException) {
- throw new ManifoldCFException(e2.getMessage(), e2,
- ManifoldCFException.INTERRUPTED);
- }
- if (noSession) {
- currentTime = System.currentTimeMillis();
- throw new ServiceInterruption(
- "Transient error connecting to filenet service: "
- + e.getMessage(), currentTime + 60000L);
- }
- session = null;
- lastSessionFetch = -1L;
- continue;
- }
- }
+ public HashSet<String> getResponse() {
+ return response;
}
+
+ public Throwable getException() {
+ return exception;
+ }
+ }
- /**
- * Set up a session
- */
- protected void getSession() throws ManifoldCFException, ServiceInterruption {
- if (session == null) {
- // Check for parameter validity
-
- if (StringUtils.isEmpty(app_key)) {
- throw new ManifoldCFException("Parameter " + DropboxConfig.APP_KEY_PARAM
- + " required but not set");
- }
-
- if (StringUtils.isEmpty(app_secret)) {
- throw new ManifoldCFException("Parameter " + DropboxConfig.APP_SECRET_PARAM
- + " required but not set");
- }
-
-
- if (StringUtils.isEmpty(key)) {
- throw new ManifoldCFException("Parameter " + DropboxConfig.KEY_PARAM
- + " required but not set");
- }
+ /**
+ * Process a set of documents. This is the method that should cause each
+ * document to be fetched, processed, and the results either added to the
+ * queue of documents for the current job, and/or entered into the
+ * incremental ingestion manager. The document specification allows this
+ * class to filter what is done based on the job.
+ *
+ * @param documentIdentifiers is the set of document identifiers to process.
+ * @param versions is the corresponding document versions to process, as
+ * returned by getDocumentVersions() above. The implementation may choose to
+ * ignore this parameter and always process the current version.
+ * @param activities is the interface this method should use to queue up new
+ * document references and ingest documents.
+ * @param spec is the document specification.
+ * @param scanOnly is an array corresponding to the document identifiers. It
+ * is set to true to indicate when the processing should only find other
+ * references, and should not actually call the ingestion methods.
+ * @param jobMode is an integer describing how the job is being run, whether
+ * continuous or once-only.
+ */
+ @SuppressWarnings("unchecked")
+ @Override
+ public void processDocuments(String[] documentIdentifiers, String[] versions,
+ IProcessActivity activities, DocumentSpecification spec,
+ boolean[] scanOnly) throws ManifoldCFException, ServiceInterruption {
+
+ Logging.connectors.debug("DROPBOX: Inside processDocuments");
+
+ for (int i = 0; i < documentIdentifiers.length; i++) {
+ long startTime = System.currentTimeMillis();
+ String nodeId = documentIdentifiers[i];
+ if (Logging.connectors.isDebugEnabled()) {
+ Logging.connectors.debug("DROPBOX: Processing document identifier '"
+ + nodeId + "'");
+ }
+
+ DropboxAPI.Entry dropboxObject = getObject(nodeId);
+
+ if(dropboxObject.isDeleted){
+ continue;
+ }
+ String errorCode = "OK";
+ String errorDesc = StringUtils.EMPTY;
+
+
+ if (dropboxObject.isDir) {
+
+ // adding all the children for a folder
+
+ List<DropboxAPI.Entry> children = dropboxObject.contents;
+ for (DropboxAPI.Entry child : children) {
+ activities.addDocumentReference(child.path, nodeId, RELATIONSHIP_CHILD);
+ }
- if (Logging.connectors.isDebugEnabled()) {
- Logging.connectors.debug("DROPBOX: Username = '" + key + "'");
- }
+ } else { // its a file
- if (StringUtils.isEmpty(secret)) {
- throw new ManifoldCFException("Parameter " + DropboxConfig.SECRET_PARAM
- + " required but not set");
- }
+ // content ingestion
+ // MHL to use stream thingy
- Logging.connectors.debug("DROPBOX: Password exists");
+ long fileLength = dropboxObject.bytes;
+ InputStream is = null;
- if (StringUtils.isEmpty(path)) {
- throw new ManifoldCFException("Parameter " + DropboxConfig.PATH_PARAM
- + " required but not set");
- }
+ try {
+ RepositoryDocument rd = new RepositoryDocument();
- long currentTime;
- GetSessionThread t = new GetSessionThread();
- try {
- t.start();
- t.join();
- Throwable thr = t.getException();
- if (thr != null) {
- if (thr instanceof java.net.MalformedURLException) {
- throw (java.net.MalformedURLException) thr;
- } else if (thr instanceof NotBoundException) {
- throw (NotBoundException) thr;
- } else if (thr instanceof RemoteException) {
- throw (RemoteException) thr;
- } else {
- throw (Error) thr;
- }
-
- }
- } catch (InterruptedException e) {
- t.interrupt();
- throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
- ManifoldCFException.INTERRUPTED);
- } catch (java.net.MalformedURLException e) {
- throw new ManifoldCFException(e.getMessage(), e);
- } catch (NotBoundException e) {
- // Transient problem: Server not available at the moment.
- Logging.connectors.warn(
- "DROPBOX: Server not up at the moment: " + e.getMessage(), e);
- currentTime = System.currentTimeMillis();
- throw new ServiceInterruption(e.getMessage(), currentTime + 60000L);
- } catch (RemoteException e) {
- Throwable e2 = e.getCause();
- if (e2 instanceof InterruptedException
- || e2 instanceof InterruptedIOException) {
- throw new ManifoldCFException(e2.getMessage(), e2,
- ManifoldCFException.INTERRUPTED);
- }
- // Treat this as a transient problem
- Logging.connectors.warn(
- "DROPBOX: Transient remote exception creating session: "
- + e.getMessage(), e);
- currentTime = System.currentTimeMillis();
- throw new ServiceInterruption(e.getMessage(), currentTime + 60000L);
+ //binary
+ if (fileLength > 0) {
+ is = getInputStream(nodeId);
+ rd.setBinary(is, fileLength);
+ }
+
+ rd.addField("Modified", dropboxObject.modified);
+ rd.addField("Size", dropboxObject.size);
+ rd.addField("Path", dropboxObject.path);
+ rd.addField("Root", dropboxObject.root);
+ rd.addField("ClientMtime", dropboxObject.clientMtime);
+ rd.addField("mimeType", dropboxObject.mimeType);
+ rd.addField("rev", dropboxObject.rev);
+
+ //ingestion
+ String version = dropboxObject.rev;
+ if (StringUtils.isEmpty(version)) {
+ version = StringUtils.EMPTY;
+ }
+
+ //documentURI
+ String documentURI = dropboxObject.path;
+ activities.ingestDocument(nodeId, version, documentURI, rd);
+
+ } finally {
+ try {
+ if (is != null) {
+ is.close();
}
-
- }
- lastSessionFetch = System.currentTimeMillis();
+ } catch (InterruptedIOException e) {
+ errorCode = "Interrupted error";
+ errorDesc = e.getMessage();
+ throw new ManifoldCFException(e.getMessage(), e,
+ ManifoldCFException.INTERRUPTED);
+ } catch (IOException e) {
+ errorCode = "IO ERROR";
+ errorDesc = e.getMessage();
+ Logging.connectors.warn(
+ "DROPBOX: IOException closing file input stream: "
+ + e.getMessage(), e);
+ }
+
+ activities.recordActivity(new Long(startTime), ACTIVITY_READ,
+ fileLength, nodeId, errorCode, errorDesc, null);
+ }
+ }
+ }
+ }
+
+ protected InputStream getInputStream(String nodeId)
+ throws ManifoldCFException, ServiceInterruption {
+ getSession();
+ InputStream rval = null;
+ BackgroundStreamThread t = new BackgroundStreamThread(nodeId);
+ try {
+ t.start();
+ rval = t.getSafeInputStream();
+ } catch (InterruptedException e) {
+ t.interrupt();
+ throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
+ ManifoldCFException.INTERRUPTED);
+ } catch (IOException e) {
+ handleIOException(e);
+ } catch (DropboxException e) {
+ Logging.connectors.error("DROPBOX: Error getting stream: " + e.getMessage(), e);
+ handleDropboxException(e);
+ }
+ return rval;
+ }
+
+ protected DropboxAPI.Entry getObject(String nodeId)
+ throws ManifoldCFException, ServiceInterruption {
+ getSession();
+ GetObjectThread t = new GetObjectThread(nodeId);
+ try {
+ t.start();
+ t.join();
+ Throwable thr = t.getException();
+ if (thr != null) {
+ if (thr instanceof DropboxException) {
+ throw (DropboxException) thr;
+ } else if (thr instanceof RuntimeException) {
+ throw (RuntimeException) thr;
+ } else {
+ throw (Error) thr;
+ }
+ }
+ } catch (InterruptedException e) {
+ t.interrupt();
+ throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
+ ManifoldCFException.INTERRUPTED);
+ } catch (DropboxException e) {
+ Logging.connectors.error("DROPBOX: Error getting object: " + e.getMessage(), e);
+ handleDropboxException(e);
+ }
+ return t.getResponse();
+ }
+
+ protected class GetObjectThread extends Thread {
+
+ protected final String nodeId;
+ protected Throwable exception = null;
+ protected DropboxAPI.Entry response = null;
+
+ public GetObjectThread(String nodeId) {
+ super();
+ setDaemon(true);
+ this.nodeId = nodeId;
+ }
+
+ public void run() {
+ try {
+ response = session.getObject(nodeId);
+ } catch (Throwable e) {
+ this.exception = e;
+ }
}
- protected class GetSessionThread extends Thread {
-
- protected Throwable exception = null;
-
- public GetSessionThread() {
- super();
- setDaemon(true);
- }
-
- public void run() {
- try {
- // Create a session
- parameters.clear();
-
- // user credentials
- parameters.put(DropboxConfig.APP_KEY_PARAM, app_key);
- parameters.put(DropboxConfig.APP_SECRET_PARAM, app_secret);
-
-
- parameters.put(DropboxConfig.KEY_PARAM, key);
- parameters.put(DropboxConfig.SECRET_PARAM, secret);
-
- try {
- session = new DropboxSession(parameters);
- } catch (Exception e) {
- Logging.connectors.error("DROPBOX: Error during the creation of the new session. Please check the endpoint parameters: " + e.getMessage(), e);
- this.exception = e;
- }
- } catch (Throwable e) {
- this.exception = e;
- }
- }
+ public DropboxAPI.Entry getResponse() {
+ return response;
+ }
+
+ public Throwable getException() {
+ return exception;
+ }
+ }
- public Throwable getException() {
- return exception;
- }
+ protected class BackgroundStreamThread extends Thread
+ {
+ protected final String nodeId;
+
+ protected boolean abortThread = false;
+ protected Throwable responseException = null;
+ protected InputStream sourceStream = null;
+ protected XThreadInputStream threadStream = null;
+
+ public BackgroundStreamThread(String nodeId)
+ {
+ super();
+ setDaemon(true);
+ this.nodeId = nodeId;
}
- @Override
- public void poll() throws ManifoldCFException {
- if (lastSessionFetch == -1L) {
- return;
- }
-
- long currentTime = System.currentTimeMillis();
- if (currentTime >= lastSessionFetch + timeToRelease) {
- DestroySessionThread t = new DestroySessionThread();
- try {
- t.start();
- t.join();
- Throwable thr = t.getException();
- if (thr != null) {
- if (thr instanceof RemoteException) {
- throw (RemoteException) thr;
- } else {
- throw (Error) thr;
- }
- }
- session = null;
- lastSessionFetch = -1L;
- } catch (InterruptedException e) {
- t.interrupt();
- throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
- ManifoldCFException.INTERRUPTED);
- } catch (RemoteException e) {
- Throwable e2 = e.getCause();
- if (e2 instanceof InterruptedException
- || e2 instanceof InterruptedIOException) {
- throw new ManifoldCFException(e2.getMessage(), e2,
- ManifoldCFException.INTERRUPTED);
- }
- session = null;
- lastSessionFetch = -1L;
- // Treat this as a transient problem
- Logging.connectors.warn(
- "DROPBOX: Transient remote exception closing session: "
- + e.getMessage(), e);
+ public void run()
+ {
+ try {
+ try {
+ synchronized (this) {
+ if (!abortThread) {
+ sourceStream = session.getDropboxInputStream(nodeId);
+ threadStream = new XThreadInputStream(sourceStream);
+ this.notifyAll();
}
-
+ }
+
+ if (threadStream != null)
+ {
+ // Stuff the content until we are done
+ threadStream.stuffQueue();
+ }
+ } finally {
+ if (sourceStream != null)
+ sourceStream.close();
+ }
+ } catch (Throwable e) {
+ responseException = e;
+ }
+ }
+
+ public InputStream getSafeInputStream()
+ throws InterruptedException, IOException, DropboxException
+ {
+ // Must wait until stream is created, or until we note an exception was thrown.
+ while (true)
+ {
+ synchronized (this)
+ {
+ if (responseException != null)
+ throw new IllegalStateException("Check for response before getting stream");
+ checkException(responseException);
+ if (threadStream != null)
+ return threadStream;
+ wait();
}
+ }
}
-
- /**
- * Get the maximum number of documents to amalgamate together into one
- * batch, for this connector.
- *
- * @return the maximum number. 0 indicates "unlimited".
- */
- @Override
- public int getMaxDocumentRequest() {
- return 1;
+
+ public void abort()
+ {
+ // This will be called during the finally
+ // block in the case where all is well (and
+ // the stream completed) and in the case where
+ // there were exceptions.
+ synchronized (this) {
+ if (threadStream != null) {
+ threadStream.abort();
+ }
+ abortThread = true;
+ }
}
-
- /**
- * Return the list of relationship types that this connector recognizes.
- *
- * @return the list.
- */
- @Override
- public String[] getRelationshipTypes() {
- return new String[]{RELATIONSHIP_CHILD};
+
+ public void finishUp()
+ throws InterruptedException
+ {
+ join();
}
+
+ protected synchronized void checkException(Throwable exception)
+ throws IOException, DropboxException
+ {
+ if (exception != null)
+ {
+ Throwable e = exception;
+ if (e instanceof DropboxException)
+ throw (DropboxException)e;
+ else if (e instanceof IOException)
+ throw (IOException)e;
+ else if (e instanceof RuntimeException)
+ throw (RuntimeException)e;
+ else if (e instanceof Error)
+ throw (Error)e;
+ else
+ throw new RuntimeException("Unhandled exception of type: "+e.getClass().getName(),e);
+ }
+ }
+
+ }
+
+ /** This input stream wraps a background transaction thread, so that
+ * the thread is ended when the stream is closed.
+ */
+ private static class BackgroundInputStream extends InputStream {
+
+ private BackgroundStreamThread streamThread = null;
+ private InputStream xThreadInputStream = null;
- /**
- * Fill in a Server tab configuration parameter map for calling a Velocity
- * template.
- *
- * @param newMap is the map to fill in
- * @param parameters is the current set of configuration parameters
- */
- private static void fillInServerConfigurationMap(Map<String, String> newMap, ConfigParams parameters) {
-
- String app_key = parameters.getParameter(DropboxConfig.APP_KEY_PARAM);
- String app_secret = parameters.getParameter(DropboxConfig.APP_SECRET_PARAM);
-
- String username = parameters.getParameter(DropboxConfig.KEY_PARAM);
- String password = parameters.getParameter(DropboxConfig.SECRET_PARAM);
- String path = parameters.getParameter(DropboxConfig.PATH_PARAM);
-
- if (app_key == null) {
- app_key = StringUtils.EMPTY;
- }
-
- if (app_secret == null) {
- app_secret = StringUtils.EMPTY;
- }
-
- if (username == null) {
- username = StringUtils.EMPTY;
- }
- if (password == null) {
- password = StringUtils.EMPTY;
- }
- if (path == null) {
- path = DropboxConfig.PATH_DEFAULT_VALUE;
- }
-
- newMap.put(DropboxConfig.APP_KEY_PARAM, app_key);
- newMap.put(DropboxConfig.APP_SECRET_PARAM, app_secret);
- newMap.put(DropboxConfig.KEY_PARAM, username);
- newMap.put(DropboxConfig.SECRET_PARAM, password);
- newMap.put(DropboxConfig.PATH_PARAM, path);
- }
-
- /**
- * View configuration. This method is called in the body section of the
- * connector's view configuration page. Its purpose is to present the
- * connection information to the user. The coder can presume that the HTML
- * that is output from this configuration will be within appropriate <html>
- * and <body> tags.
- *
- * @param threadContext is the local thread context.
- * @param out is the output to which any HTML should be sent.
- * @param parameters are the configuration parameters, as they currently
- * exist, for this connection being configured.
- */
- @Override
- public void viewConfiguration(IThreadContext threadContext, IHTTPOutput out,
- Locale locale, ConfigParams parameters) throws ManifoldCFException, IOException {
- Map<String, String> paramMap = new HashMap<String, String>();
-
- // Fill in map from each tab
- fillInServerConfigurationMap(paramMap, parameters);
-
- outputResource(VIEW_CONFIG_FORWARD, out, locale, paramMap);
- }
-
- /**
- * Read the content of a resource, replace the variable ${PARAMNAME} with
- * the value and copy it to the out.
- *
- * @param resName
- * @param out
- * @throws ManifoldCFException
- */
- private static void outputResource(String resName, IHTTPOutput out,
- Locale locale, Map<String, String> paramMap) throws ManifoldCFException {
- Messages.outputResourceWithVelocity(out, locale, resName, paramMap, true);
- }
-
- /**
- *
- * Output the configuration header section. This method is called in the
- * head section of the connector's configuration page. Its purpose is to add
- * the required tabs to the list, and to output any javascript methods that
- * might be needed by the configuration editing HTML.
- *
- * @param threadContext is the local thread context.
- * @param out is the output to which any HTML should be sent.
- * @param parameters are the configuration parameters, as they currently
- * exist, for this connection being configured.
- * @param tabsArray is an array of tab names. Add to this array any tab
- * names that are specific to the connector.
- */
- @Override
- public void outputConfigurationHeader(IThreadContext threadContext,
- IHTTPOutput out, Locale locale, ConfigParams parameters, List<String> tabsArray)
- throws ManifoldCFException, IOException {
- // Add the Server tab
- tabsArray.add(Messages.getString(locale, DROPBOX_SERVER_TAB_PROPERTY));
- // Map the parameters
- Map<String, String> paramMap = new HashMap<String, String>();
-
- // Fill in the parameters from each tab
- fillInServerConfigurationMap(paramMap, parameters);
-
- // Output the Javascript - only one Velocity template for all tabs
- outputResource(EDIT_CONFIG_HEADER_FORWARD, out, locale, paramMap);
+ public BackgroundInputStream(BackgroundStreamThread streamThread, InputStream xThreadInputStream)
+ {
+ this.streamThread = streamThread;
+ this.xThreadInputStream = xThreadInputStream;
}
-
+
@Override
- public void outputConfigurationBody(IThreadContext threadContext,
- IHTTPOutput out, Locale locale, ConfigParams parameters, String tabName)
- throws ManifoldCFException, IOException {
-
- // Call the Velocity templates for each tab
-
- // Server tab
- Map<String, String> paramMap = new HashMap<String, String>();
- // Set the tab name
- paramMap.put("TabName", tabName);
- // Fill in the parameters
- fillInServerConfigurationMap(paramMap, parameters);
- outputResource(EDIT_CONFIG_FORWARD_SERVER, out, locale, paramMap);
-
+ public int available()
+ throws IOException
+ {
+ if (xThreadInputStream != null)
+ return xThreadInputStream.available();
+ return super.available();
}
-
- /**
- * Process a configuration post. This method is called at the start of the
- * connector's configuration page, whenever there is a possibility that form
- * data for a connection has been posted. Its purpose is to gather form
- * information and modify the configuration parameters accordingly. The name
- * of the posted form is "editconnection".
- *
- * @param threadContext is the local thread context.
- * @param variableContext is the set of variables available from the post,
- * including binary file post information.
- * @param parameters are the configuration parameters, as they currently
- * exist, for this connection being configured.
- * @return null if all is well, or a string error message if there is an
- * error that should prevent saving of the connection (and cause a
- * redirection to an error page).
- *
- */
+
@Override
- public String processConfigurationPost(IThreadContext threadContext,
- IPostParameters variableContext, ConfigParams parameters)
- throws ManifoldCFException {
-
-
- String app_key = variableContext.getParameter(DropboxConfig.APP_KEY_PARAM);
- if (app_key != null) {
- parameters.setParameter(DropboxConfig.APP_KEY_PARAM, app_key);
- }
-
- String app_secret = variableContext.getParameter(DropboxConfig.APP_SECRET_PARAM);
- if (app_secret != null) {
- parameters.setParameter(DropboxConfig.APP_SECRET_PARAM, app_secret);
- }
-
- String key = variableContext.getParameter(DropboxConfig.KEY_PARAM);
- if (key != null) {
- parameters.setParameter(DropboxConfig.KEY_PARAM, key);
- }
-
- String secret = variableContext.getParameter(DropboxConfig.SECRET_PARAM);
- if (secret != null) {
- parameters.setParameter(DropboxConfig.SECRET_PARAM, secret);
+ public void close()
+ throws IOException
+ {
+ try
+ {
+ if (xThreadInputStream != null)
+ {
+ xThreadInputStream.close();
+ xThreadInputStream = null;
+ }
+ }
+ finally
+ {
+ if (streamThread != null)
+ {
+ streamThread.abort();
+ try
+ {
+ streamThread.finishUp();
+ }
+ catch (InterruptedException e)
+ {
+ throw new InterruptedIOException(e.getMessage());
+ }
+ streamThread = null;
}
-
- String path = variableContext.getParameter(DropboxConfig.PATH_PARAM);
- if (path != null) {
- parameters.setParameter(DropboxConfig.PATH_PARAM, path);
- }
-
- return null;
- }
-
- /**
- * Fill in specification Velocity parameter map for DROPBOXQuery tab.
- */
- private static void fillInDROPBOXQuerySpecificationMap(Map<String, String> newMap, DocumentSpecification ds) {
- int i = 0;
- String DropboxQuery = "";
- while (i < ds.getChildCount()) {
- SpecificationNode sn = ds.getChild(i);
- if (sn.getType().equals(JOB_STARTPOINT_NODE_TYPE)) {
- DropboxQuery = sn.getAttributeValue(DropboxConfig.DROPBOX_QUERY_PARAM);
- }
- i++;
- }
- newMap.put(DropboxConfig.DROPBOX_QUERY_PARAM, DropboxQuery);
+ }
}
-
- /**
- * View specification. This method is called in the body section of a job's
- * view page. Its purpose is to present the document specification
- * information to the user. The coder can presume that the HTML that is
- * output from this configuration will be within appropriate <html> and
- * <body> tags.
- *
- * @param out is the output to which any HTML should be sent.
- * @param ds is the current document specification for this job.
- */
+
@Override
- public void viewSpecification(IHTTPOutput out, Locale locale, DocumentSpecification ds)
- throws ManifoldCFException, IOException {
-
- Map<String, String> paramMap = new HashMap<String, String>();
-
- // Fill in the map with data from all tabs
- fillInDROPBOXQuerySpecificationMap(paramMap, ds);
-
- outputResource(VIEW_SPEC_FORWARD, out, locale, paramMap);
- }
-
- /**
- * Process a specification post. This method is called at the start of job's
- * edit or view page, whenever there is a possibility that form data for a
- * connection has been posted. Its purpose is to gather form information and
- * modify the document specification accordingly. The name of the posted
- * form is "editjob".
- *
- * @param variableContext contains the post data, including binary
- * file-upload information.
- * @param ds is the current document specification for this job.
- * @return null if all is well, or a string error message if there is an
- * error that should prevent saving of the job (and cause a redirection to
- * an error page).
- */
+ public void mark(int readlimit)
+ {
+ if (xThreadInputStream != null)
+ xThreadInputStream.mark(readlimit);
+ else
+ super.mark(readlimit);
+ }
+
@Override
- public String processSpecificationPost(IPostParameters variableContext,
- DocumentSpecification ds) throws ManifoldCFException {
- String cmisQuery = variableContext.getParameter(DropboxConfig.DROPBOX_QUERY_PARAM);
- if (cmisQuery != null) {
- int i = 0;
- while (i < ds.getChildCount()) {
- SpecificationNode oldNode = ds.getChild(i);
- if (oldNode.getType().equals(JOB_STARTPOINT_NODE_TYPE)) {
- ds.removeChild(i);
- break;
- }
- i++;
- }
- SpecificationNode node = new SpecificationNode(JOB_STARTPOINT_NODE_TYPE);
- node.setAttribute(DropboxConfig.DROPBOX_QUERY_PARAM, cmisQuery);
- variableContext.setParameter(DropboxConfig.DROPBOX_QUERY_PARAM, cmisQuery);
- ds.addChild(ds.getChildCount(), node);
- }
- return null;
- }
-
- /**
- * Output the specification body section. This method is called in the body
- * section of a job page which has selected a repository connection of the
- * current type. Its purpose is to present the required form elements for
- * editing. The coder can presume that the HTML that is output from this
- * configuration will be within appropriate <html>, <body>, and <form> tags.
- * The name of the form is "editjob".
- *
- * @param out is the output to which any HTML should be sent.
- * @param ds is the current document specification for this job.
- * @param tabName is the current tab name.
- */
+ public void reset()
+ throws IOException
+ {
+ if (xThreadInputStream != null)
+ xThreadInputStream.reset();
+ else
+ super.reset();
+ }
+
@Override
- public void outputSpecificationBody(IHTTPOutput out,
- Locale locale, DocumentSpecification ds, String tabName) throws ManifoldCFException,
- IOException {
-
- // Output DROPBOXQuery tab
- Map<String, String> paramMap = new HashMap<String, String>();
- paramMap.put("TabName", tabName);
- fillInDROPBOXQuerySpecificationMap(paramMap, ds);
- outputResource(EDIT_SPEC_FORWARD_DROPBOXQUERY, out, locale, paramMap);
- }
-
- /**
- * Output the specification header section. This method is called in the
- * head section of a job page which has selected a repository connection of
- * the current type. Its purpose is to add the required tabs to the list,
- * and to output any javascript methods that might be needed by the job
- * editing HTML.
- *
- * @param out is the output to which any HTML should be sent.
- * @param ds is the current document specification for this job.
- * @param tabsArray is an array of tab names. Add to this array any tab
- * names that are specific to the connector.
- */
+ public boolean markSupported()
+ {
+ if (xThreadInputStream != null)
+ return xThreadInputStream.markSupported();
+ return super.markSupported();
+ }
+
@Override
- public void outputSpecificationHeader(IHTTPOutput out,
- Locale locale, DocumentSpecification ds, List<String> tabsArray)
- throws ManifoldCFException, IOException {
- tabsArray.add(Messages.getString(locale, DROPBOX_QUERY_TAB_PROPERTY));
-
- Map<String, String> paramMap = new HashMap<String, String>();
-
- // Fill in the specification header map, using data from all tabs.
- fillInDROPBOXQuerySpecificationMap(paramMap, ds);
-
- outputResource(EDIT_SPEC_HEADER_FORWARD, out, locale, paramMap);
- }
-
- /**
- * Queue "seed" documents. Seed documents are the starting places for
- * crawling activity. Documents are seeded when this method calls
- * appropriate methods in the passed in ISeedingActivity object.
- *
- * This method can choose to find repository changes that happen only during
- * the specified time interval. The seeds recorded by this method will be
- * viewed by the framework based on what the getConnectorModel() method
- * returns.
- *
- * It is not a big problem if the connector chooses to create more seeds
- * than are strictly necessary; it is merely a question of overall work
- * required.
- *
- * The times passed to this method may be interpreted for greatest
- * efficiency. The time ranges any given job uses with this connector will
- * not overlap, but will proceed starting at 0 and going to the "current
- * time", each time the job is run. For continuous crawling jobs, this
- * method will be called once, when the job starts, and at various periodic
- * intervals as the job executes.
- *
- * When a job's specification is changed, the framework automatically resets
- * the seeding start time to 0. The seeding start time may also be set to 0
- * on each job run, depending on the connector model returned by
- * getConnectorModel().
- *
- * Note that it is always ok to send MORE documents rather than less to this
- * method.
- *
- * @param activities is the interface this method should use to perform
- * whatever framework actions are desired.
- * @param spec is a document specification (that comes from the job).
- * @param startTime is the beginning of the time range to consider,
- * inclusive.
- * @param endTime is the end of the time range to consider, exclusive.
- * @param jobMode is an integer describing how the job is being run, whether
- * continuous or once-only.
- */
+ public long skip(long n)
+ throws IOException
+ {
+ if (xThreadInputStream != null)
+ return xThreadInputStream.skip(n);
+ return super.skip(n);
+ }
+
@Override
- public void addSeedDocuments(ISeedingActivity activities,
- DocumentSpecification spec, long startTime, long endTime, int jobMode)
- throws ManifoldCFException, ServiceInterruption {
-
- try {
- getSession();
- /*String cmisQuery = StringUtils.EMPTY;
- int i = 0;
- while (i < spec.getChildCount()) {
- SpecificationNode sn = spec.getChild(i);
- if (sn.getType().equals(JOB_STARTPOINT_NODE_TYPE)) {
- cmisQuery = sn.getAttributeValue(CmisConfig.CMIS_QUERY_PARAM);
- break;
- }
- i++;
- }*/
-
- //TODO handle different start directories, and save/retreive the cursor from somewhere..
- HashSet<String> seeds = session.getSeeds();
- for (String seed : seeds) {
- activities.addSeedDocument(seed);
- }
- } catch (DropboxException ex) {
- Logging.connectors.error("DROPBOX: Error adding seed documents: " + ex.getMessage(), ex);
- }
+ public int read(byte[] b, int off, int len)
+ throws IOException
+ {
+ if (xThreadInputStream != null)
+ return xThreadInputStream.read(b,off,len);
+ return super.read(b,off,len);
}
- /**
- * Process a set of documents. This is the method that should cause each
- * document to be fetched, processed, and the results either added to the
- * queue of documents for the current job, and/or entered into the
- * incremental ingestion manager. The document specification allows this
- * class to filter what is done based on the job.
- *
- * @param documentIdentifiers is the set of document identifiers to process.
- * @param versions is the corresponding document versions to process, as
- * returned by getDocumentVersions() above. The implementation may choose to
- * ignore this parameter and always process the current version.
- * @param activities is the interface this method should use to queue up new
- * document references and ingest documents.
- * @param spec is the document specification.
- * @param scanOnly is an array corresponding to the document identifiers. It
- * is set to true to indicate when the processing should only find other
- * references, and should not actually call the ingestion methods.
- * @param jobMode is an integer describing how the job is being run, whether
- * continuous or once-only.
- */
- @SuppressWarnings("unchecked")
@Override
- public void processDocuments(String[] documentIdentifiers, String[] versions,
- IProcessActivity activities, DocumentSpecification spec,
- boolean[] scanOnly) throws ManifoldCFException, ServiceInterruption {
- getSession();
- Logging.connectors.debug("DROPBOX: Inside processDocuments");
- int i = 0;
-
- while (i < documentIdentifiers.length) {
- try {
- long startTime = System.currentTimeMillis();
- String nodeId = documentIdentifiers[i];
- if (Logging.connectors.isDebugEnabled()) {
- Logging.connectors.debug("DROPBOX: Processing document identifier '"
- + nodeId + "'");
- }
-
- DropboxAPI.Entry dropboxObject = session.getObject(nodeId);
- if(dropboxObject.isDeleted){
- continue;
- }
- String errorCode = "OK";
- String errorDesc = StringUtils.EMPTY;
-
-
- if (dropboxObject.isDir) {
-
- // adding all the children for a folder
-
- List<DropboxAPI.Entry> children = dropboxObject.contents;
- for (DropboxAPI.Entry child : children) {
- activities.addDocumentReference(child.path, nodeId, RELATIONSHIP_CHILD);
- }
-
- } else { // its a file
-
- // content ingestion
-
-
- long fileLength = dropboxObject.bytes;
- InputStream is = null;
-
- try {
- RepositoryDocument rd = new RepositoryDocument();
-
- //binary
- if (fileLength > 0) {
- is = session.getDropboxInputStream(nodeId);
- rd.setBinary(is, fileLength);
- }
-
- rd.addField("Modified", dropboxObject.modified);
- rd.addField("Size", dropboxObject.size);
- rd.addField("Path", dropboxObject.path);
- rd.addField("Root", dropboxObject.root);
- rd.addField("ClientMtime", dropboxObject.clientMtime);
- rd.addField("mimeType", dropboxObject.mimeType);
- rd.addField("rev", dropboxObject.rev);
-
- //ingestion
- String version = dropboxObject.rev;
- if (StringUtils.isEmpty(version)) {
- version = StringUtils.EMPTY;
- }
-
- //documentURI
- String documentURI = dropboxObject.path;
- activities.ingestDocument(nodeId, version, documentURI, rd);
-
- } finally {
- try {
- if (is != null) {
- is.close();
- }
- } catch (InterruptedIOException e) {
- errorCode = "Interrupted error";
- errorDesc = e.getMessage();
- throw new ManifoldCFException(e.getMessage(), e,
- ManifoldCFException.INTERRUPTED);
- } catch (IOException e) {
- errorCode = "IO ERROR";
- errorDesc = e.getMessage();
- Logging.connectors.warn(
- "DROPBOX: IOException closing file input stream: "
- + e.getMessage(), e);
- }
-
- activities.recordActivity(new Long(startTime), ACTIVITY_READ,
- fileLength, nodeId, errorCode, errorDesc, null);
- }
- }
- i++;
- } catch (DropboxException ex) {
- Logging.connectors.error("DROPBOX: Error processing documents: " + ex.getMessage(), ex);
- }
- }
+ public int read(byte[] b)
+ throws IOException
+ {
+ if (xThreadInputStream != null)
+ return xThreadInputStream.read(b);
+ return super.read(b);
}
-
- /**
- * The short version of getDocumentVersions. Get document versions given an
- * array of document identifiers. This method is called for EVERY document
- * that is considered. It is therefore important to perform as little work
- * as possible here.
- *
- * @param documentIdentifiers is the array of local document identifiers, as
- * understood by this connector.
- * @param spec is the current document specification for the current job. If
- * there is a dependency on this specification, then the version string
- * should include the pertinent data, so that reingestion will occur when
- * the specification changes. This is primarily useful for metadata.
- * @return the corresponding version strings, with null in the places where
- * the document no longer exists. Empty version strings indicate that there
- * is no versioning ability for the corresponding document, and the document
- * will always be processed.
- */
+
@Override
- public String[] getDocumentVersions(String[] documentIdentifiers,
- DocumentSpecification spec) throws ManifoldCFException,
- ServiceInterruption {
- getSession();
- String[] rval = new String[documentIdentifiers.length];
- int i = 0;
- while (i < rval.length) {
- try {
- DropboxAPI.Entry dropboxObject = session.getObject(documentIdentifiers[i]);
- if (!dropboxObject.isDir) {
- //we have to check if this CMIS repository support versioning
- // or if the versioning is disabled for this content
- if (!dropboxObject.isDeleted&&StringUtils.isNotEmpty(dropboxObject.rev)) {
- rval[i] = dropboxObject.rev;
- } else {
- //a CMIS document that doesn't contain versioning information will always be processed
- rval[i] = StringUtils.EMPTY;
- }
- } else {
- //a CMIS folder will always be processed
- rval[i] = StringUtils.EMPTY;
- }
- } catch (DropboxException ex) {
- Logging.connectors.error("DROPBOX: Error getting document versions: " + ex.getMessage(), ex);
- }
- i++;
- }
- return rval;
+ public int read()
+ throws IOException
+ {
+ if (xThreadInputStream != null)
+ return xThreadInputStream.read();
+ return -1;
}
+
+ }
+
+ /**
+ * The short version of getDocumentVersions. Get document versions given an
+ * array of document identifiers. This method is called for EVERY document
+ * that is considered. It is therefore important to perform as little work
+ * as possible here.
+ *
+ * @param documentIdentifiers is the array of local document identifiers, as
+ * understood by this connector.
+ * @param spec is the current document specification for the current job. If
+ * there is a dependency on this specification, then the version string
+ * should include the pertinent data, so that reingestion will occur when
+ * the specification changes. This is primarily useful for metadata.
+ * @return the corresponding version strings, with null in the places where
+ * the document no longer exists. Empty version strings indicate that there
+ * is no versioning ability for the corresponding document, and the document
+ * will always be processed.
+ */
+ @Override
+ public String[] getDocumentVersions(String[] documentIdentifiers,
+ DocumentSpecification spec) throws ManifoldCFException, ServiceInterruption {
+ String[] rval = new String[documentIdentifiers.length];
+ for (int i = 0; i < rval.length; i++) {
+ DropboxAPI.Entry dropboxObject = getObject(documentIdentifiers[i]);
+ if (!dropboxObject.isDir) {
+ if (dropboxObject.isDeleted) {
+ rval[i] = null;
+ } else if (StringUtils.isNotEmpty(dropboxObject.rev)) {
+ rval[i] = dropboxObject.rev;
+ } else {
+ //a document that doesn't contain versioning information will always be processed
+ rval[i] = StringUtils.EMPTY;
+ }
+ } else {
+ //a folder will always be processed
+ rval[i] = StringUtils.EMPTY;
+ }
+ }
+ return rval;
+ }
+
+ /** Handle a dropbox exception. */
+ protected static void handleDropboxException(DropboxException e)
+ throws ManifoldCFException, ServiceInterruption {
+ // Right now I don't know enough, so throw Service Interruptions
+ long currentTime = System.currentTimeMillis();
+ throw new ServiceInterruption("Dropbox exception: "+e.getMessage(), e, currentTime + 300000L,
+ currentTime + 3 * 60 * 60000L,-1,false);
+ }
+
+ /** Handle an IO exception. */
+ protected static void handleIOException(IOException e)
+ throws ManifoldCFException, ServiceInterruption {
+ if (e instanceof InterruptedIOException) {
+ throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
+ ManifoldCFException.INTERRUPTED);
+ }
+ long currentTime = System.currentTimeMillis();
+ throw new ServiceInterruption("IO exception: "+e.getMessage(), e, currentTime + 300000L,
+ currentTime + 3 * 60 * 60000L,-1,false);
+ }
+
}