You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/05/30 01:23:19 UTC
svn commit: r1598439 - in
/manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/transformation:
./ BaseTransformationConnector.java
Author: kwright
Date: Thu May 29 23:23:19 2014
New Revision: 1598439
URL: http://svn.apache.org/r1598439
Log:
Add BaseTransformationConnector abstract base class
Added:
manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/transformation/
manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/transformation/BaseTransformationConnector.java (with props)
Added: manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/transformation/BaseTransformationConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/transformation/BaseTransformationConnector.java?rev=1598439&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/transformation/BaseTransformationConnector.java (added)
+++ manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/transformation/BaseTransformationConnector.java Thu May 29 23:23:19 2014
@@ -0,0 +1,232 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.agents.transformation;
+
+import org.apache.manifoldcf.core.interfaces.*;
+import org.apache.manifoldcf.agents.interfaces.*;
+
+import java.io.*;
+import java.util.*;
+
+/** This base class describes an instance of a connection between the Connector Framework and
+* a document transformation engine.
+*
+* Each instance of this interface is used in only one thread at a time. Connection Pooling
+* on these kinds of objects is performed by the factory which instantiates repository connectors
+* from symbolic names and config parameters, and is pooled by these parameters. That is, a pooled connector
+* handle is used only if all the connection parameters for the handle match.
+*
+* Implementers of this interface should provide a default constructor which has this signature:
+*
+* xxx();
+*
+* Connectors are either configured or not. If configured, they will persist in a pool, and be
+* reused multiple times. Certain methods of a connector may be called before the connector is
+* configured. This includes basically all methods that permit inspection of the connector's
+* capabilities.
+*
+*/
+public abstract class BaseTransformationConnector extends org.apache.manifoldcf.core.connector.BaseConnector implements ITransformationConnector
+{
+ public static final String _rcsid = "@(#)$Id$";
+
+
+ /** Return the list of activities that this connector supports (i.e. writes into the log).
+ *@return the list.
+ */
+ @Override
+ public String[] getActivitiesList()
+ {
+ return new String[0];
+ }
+
+ /** Request arbitrary connector information.
+ * This method is called directly from the API in order to allow API users to perform any one of several connector-specific
+ * queries.
+ *@param output is the response object, to be filled in by this method.
+ *@param command is the command, which is taken directly from the API request.
+ *@return true if the resource is found, false if not. In either case, output may be filled in.
+ */
+ @Override
+ public boolean requestInfo(Configuration output, String command)
+ throws ManifoldCFException
+ {
+ return false;
+ }
+
+ /** Detect if a mime type is acceptable or not. This method is used to determine whether it makes sense to fetch a document
+ * in the first place.
+ *@param pipelineDescription is the document's pipeline version string, for this connection.
+ *@param mimeType is the mime type of the document.
+ *@param checkActivity is an object including the activities that can be performed by this method.
+ *@return true if the mime type can be accepted by this connector.
+ */
+ @Override
+ public boolean checkMimeTypeIndexable(String pipelineDescription, String mimeType, IOutputCheckActivity checkActivity)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ return true;
+ }
+
+ /** Pre-determine whether a document (passed here as a File object) is acceptable or not. This method is
+ * used to determine whether a document needs to be actually transferred. This hook is provided mainly to support
+ * search engines that only handle a small set of accepted file types.
+ *@param pipelineDescription is the document's pipeline version string, for this connection.
+ *@param localFile is the local file to check.
+ *@param checkActivity is an object including the activities that can be done by this method.
+ *@return true if the file is acceptable, false if not.
+ */
+ @Override
+ public boolean checkDocumentIndexable(String pipelineDescription, File localFile, IOutputCheckActivity checkActivity)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ return true;
+ }
+
+ /** Pre-determine whether a document's length is acceptable. This method is used
+ * to determine whether to fetch a document in the first place.
+ *@param pipelineDescription is the document's pipeline version string, for this connection.
+ *@param length is the length of the document.
+ *@param checkActivity is an object including the activities that can be done by this method.
+ *@return true if the file is acceptable, false if not.
+ */
+ @Override
+ public boolean checkLengthIndexable(String pipelineDescription, long length, IOutputCheckActivity checkActivity)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ return true;
+ }
+
+ /** Get a pipeline version string, given a pipeline specification object. The version string is used to
+ * uniquely describe the pertinent details of the specification and the configuration, to allow the Connector
+ * Framework to determine whether a document will need to be processed again.
+ * Note that the contents of any document cannot be considered by this method; only configuration and specification information
+ * can be considered.
+ *
+ * This method presumes that the underlying connector object has been configured.
+ *@param spec is the current pipeline specification object for this connection for the job that is doing the crawling.
+ *@return a string, of unlimited length, which uniquely describes configuration and specification in such a way that
+ * if two such strings are equal, nothing that affects how or whether the document is indexed will be different.
+ */
+ @Override
+ public String getPipelineDescription(OutputSpecification spec)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ return "";
+ }
+
+ /** Add (or replace) a document in the output data store using the connector.
+ * This method presumes that the connector object has been configured, and it is thus able to communicate with the output data store should that be
+ * necessary.
+ * The OutputSpecification is *not* provided to this method, because the goal is consistency, and if output is done it must be consistent with the
+ * output description, since that was what was partly used to determine if output should be taking place. So it may be necessary for this method to decode
+ * an output description string in order to determine what should be done.
+ *@param documentURI is the URI of the document. The URI is presumed to be the unique identifier which the output data store will use to process
+ * and serve the document. This URI is constructed by the repository connector which fetches the document, and is thus universal across all output connectors.
+ *@param pipelineDescription is the description string that was constructed for this document by the getOutputDescription() method.
+ *@param document is the document data to be processed (handed to the output data store).
+ *@param authorityNameString is the name of the authority responsible for authorizing any access tokens passed in with the repository document. May be null.
+ *@param activities is the handle to an object that the implementer of a pipeline connector may use to perform operations, such as logging processing activity.
+ *@return the document status (accepted or permanently rejected).
+ */
+ @Override
+ public int addOrReplaceDocument(String documentURI, String pipelineDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ return DOCUMENTSTATUS_REJECTED;
+ }
+
+ // UI support methods.
+ //
+ // These support methods come in two varieties. The first bunch is involved in setting up connection configuration information. The second bunch
+ // is involved in presenting and editing output specification information for a job. The two kinds of methods are accordingly treated differently,
+ // in that the first bunch cannot assume that the current connector object is connected, while the second bunch can. That is why the first bunch
+ // receives a thread context argument for all UI methods, while the second bunch does not need one (since it has already been applied via the connect()
+ // method, above).
+
+ /** Output the specification header section.
+ * This method is called in the head section of a job page which has selected an output connection of the current type. Its purpose is to add the required tabs
+ * to the list, and to output any javascript methods that might be needed by the job editing HTML.
+ *@param out is the output to which any HTML should be sent.
+ *@param locale is the preferred local of the output.
+ *@param os is the current output specification for this job.
+ *@param connectionSequenceNumber is the unique number of this connection within the job.
+ *@param tabsArray is an array of tab names. Add to this array any tab names that are specific to the connector.
+ */
+ @Override
+ public void outputSpecificationHeader(IHTTPOutput out, Locale locale, OutputSpecification os,
+ int connectionSequenceNumber, List<String> tabsArray)
+ throws ManifoldCFException, IOException
+ {
+ }
+
+ /** Output the specification body section.
+ * This method is called in the body section of a job page which has selected an output connection of the current type. Its purpose is to present the required form elements for editing.
+ * The coder can presume that the HTML that is output from this configuration will be within appropriate <html>, <body>, and <form> tags. The name of the
+ * form is "editjob".
+ *@param out is the output to which any HTML should be sent.
+ *@param locale is the preferred local of the output.
+ *@param os is the current output specification for this job.
+ *@param connectionSequenceNumber is the unique number of this connection within the job.
+ *@param actualSequenceNumber is the connection within the job that has currently been selected.
+ *@param tabName is the current tab name.
+ */
+ @Override
+ public void outputSpecificationBody(IHTTPOutput out, Locale locale, OutputSpecification os,
+ int connectionSequenceNumber, int actualSequenceNumber, String tabName)
+ throws ManifoldCFException, IOException
+ {
+ }
+
+ /** Process a specification post.
+ * This method is called at the start of job's edit or view page, whenever there is a possibility that form data for a connection has been
+ * posted. Its purpose is to gather form information and modify the output specification accordingly.
+ * The name of the posted form is "editjob".
+ *@param variableContext contains the post data, including binary file-upload information.
+ *@param locale is the preferred local of the output.
+ *@param os is the current output specification for this job.
+ *@param connectionSequenceNumber is the unique number of this connection within the job.
+ *@return null if all is well, or a string error message if there is an error that should prevent saving of the job (and cause a redirection to an error page).
+ */
+ @Override
+ public String processSpecificationPost(IPostParameters variableContext, Locale locale, OutputSpecification os,
+ int connectionSequenceNumber)
+ throws ManifoldCFException
+ {
+ return null;
+ }
+
+ /** View specification.
+ * This method is called in the body section of a job's view page. Its purpose is to present the output specification information to the user.
+ * The coder can presume that the HTML that is output from this configuration will be within appropriate <html> and <body> tags.
+ *@param out is the output to which any HTML should be sent.
+ *@param locale is the preferred local of the output.
+ *@param connectionSequenceNumber is the unique number of this connection within the job.
+ *@param os is the current output specification for this job.
+ */
+ @Override
+ public void viewSpecification(IHTTPOutput out, Locale locale, OutputSpecification os,
+ int connectionSequenceNumber)
+ throws ManifoldCFException, IOException
+ {
+ }
+
+}
+
+
Propchange: manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/transformation/BaseTransformationConnector.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: manifoldcf/branches/CONNECTORS-946/framework/agents/src/main/java/org/apache/manifoldcf/agents/transformation/BaseTransformationConnector.java
------------------------------------------------------------------------------
svn:keywords = Id