You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/06/20 11:16:13 UTC
svn commit: r1604110 - in /manifoldcf/trunk: ./ connectors/amazoncloudsearch/
connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/
connectors/amazoncloudsearch/connector/src/main/resources/org/apac...
Author: kwright
Date: Fri Jun 20 09:16:12 2014
New Revision: 1604110
URL: http://svn.apache.org/r1604110
Log:
Finish off CONNECTORS-954
Added:
manifoldcf/trunk/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/DocumentChunkManager.java
- copied unchanged from r1604108, manifoldcf/branches/CONNECTORS-954/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/DocumentChunkManager.java
manifoldcf/trunk/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/DocumentRecord.java
- copied unchanged from r1604108, manifoldcf/branches/CONNECTORS-954/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/DocumentRecord.java
manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/jsongen/
- copied from r1604108, manifoldcf/branches/CONNECTORS-954/framework/core/src/main/java/org/apache/manifoldcf/core/jsongen/
Removed:
manifoldcf/trunk/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/editSpecification_FieldMapping.html
Modified:
manifoldcf/trunk/ (props changed)
manifoldcf/trunk/CHANGES.txt
manifoldcf/trunk/connectors/amazoncloudsearch/build.xml
manifoldcf/trunk/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java (contents, props changed)
manifoldcf/trunk/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/SDFModel.java (props changed)
manifoldcf/trunk/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/editSpecification.js
manifoldcf/trunk/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/viewSpecification.html
manifoldcf/trunk/connectors/amazoncloudsearch/pom.xml
Propchange: manifoldcf/trunk/
------------------------------------------------------------------------------
Merged /manifoldcf/branches/CONNECTORS-954:r1603693-1604108
Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1604110&r1=1604109&r2=1604110&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Fri Jun 20 09:16:12 2014
@@ -3,6 +3,13 @@ $Id$
======================= 1.7-dev =====================
+CONNECTORS-954: Revamp AmazonCloudSearch output connector completely.
+(1) Remove Tika and field mapping, since that would be done upstream in the
+pipeline.
+(2) Revamped the document lifecycle so they are batched together (which isn't
+perfect; see CONNECTORS-980).
+(Karl Wright, Takumi Yoshida)
+
CONNECTORS-971: Use a generic "seeding version string" to track the last
seeding event for every job. This abstracts away from time intervals and
permits seeding based on things like transaction IDs.
Modified: manifoldcf/trunk/connectors/amazoncloudsearch/build.xml
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/amazoncloudsearch/build.xml?rev=1604110&r1=1604109&r2=1604110&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/amazoncloudsearch/build.xml (original)
+++ manifoldcf/trunk/connectors/amazoncloudsearch/build.xml Fri Jun 20 09:16:12 2014
@@ -37,42 +37,6 @@
<include name="jackson-core*.jar"/>
<include name="jackson-databind*.jar"/>
<include name="jackson-annotations*.jar"/>
- <include name="tika-core*.jar"/>
- <include name="tika-parsers*.jar"/>
- <include name="tagsoup*.jar"/>
- <include name="poi*.jar"/>
- <include name="vorbis-java-tika*.jar"/>
- <include name="vorbis-java-core*.jar"/>
- <include name="netcdf*.jar"/>
- <include name="apache-mime4j-core*.jar"/>
- <include name="apache-mime4j-dom*.jar"/>
- <include name="commons-compress*.jar"/>
- <include name="commons-codec*.jar"/>
- <include name="pdfbox*.jar"/>
- <include name="fontbox*.jar"/>
- <include name="jempbox*.jar"/>
- <include name="commons-logging*.jar"/>
- <include name="bcmail-jdk15*.jar"/>
- <include name="bcprov-jdk15*.jar"/>
- <include name="poi-scratchpad*.jar"/>
- <include name="poi-ooxml*.jar"/>
- <include name="poi-ooxml-schemas*.jar"/>
- <include name="xmlbeans*.jar"/>
- <include name="dom4j*.jar"/>
- <include name="geronimo-stax-api_1.0_spec*.jar"/>
- <include name="asm-debug-all*.jar"/>
- <include name="isoparser*.jar"/>
- <include name="aspectjrt*.jar"/>
- <include name="metadata-extractor*.jar"/>
- <include name="xmpcore*.jar"/>
- <include name="xml-apis*.jar"/>
- <include name="boilerpipe*.jar"/>
- <include name="rome*.jar"/>
- <include name="jdom*.jar"/>
- <include name="xercesImpl*.jar"/>
- <include name="vorbis-java-core*.jar"/>
- <include name="juniversalchardet*.jar"/>
- <include name="jhighlight*.jar"/>
</fileset>
</path>
@@ -84,42 +48,6 @@
<include name="jackson-core*.jar"/>
<include name="jackson-databind*.jar"/>
<include name="jackson-annotations*.jar"/>
- <include name="tika-core*.jar"/>
- <include name="tika-parsers*.jar"/>
- <include name="tagsoup*.jar"/>
- <include name="poi*.jar"/>
- <include name="vorbis-java-tika*.jar"/>
- <include name="vorbis-java-core*.jar"/>
- <include name="netcdf*.jar"/>
- <include name="apache-mime4j-core*.jar"/>
- <include name="apache-mime4j-dom*.jar"/>
- <include name="commons-compress*.jar"/>
- <include name="commons-codec*.jar"/>
- <include name="pdfbox*.jar"/>
- <include name="fontbox*.jar"/>
- <include name="jempbox*.jar"/>
- <include name="commons-logging*.jar"/>
- <include name="bcmail-jdk15*.jar"/>
- <include name="bcprov-jdk15*.jar"/>
- <include name="poi-scratchpad*.jar"/>
- <include name="poi-ooxml*.jar"/>
- <include name="poi-ooxml-schemas*.jar"/>
- <include name="xmlbeans*.jar"/>
- <include name="dom4j*.jar"/>
- <include name="geronimo-stax-api_1.0_spec*.jar"/>
- <include name="asm-debug-all*.jar"/>
- <include name="isoparser*.jar"/>
- <include name="aspectjrt*.jar"/>
- <include name="metadata-extractor*.jar"/>
- <include name="xmpcore*.jar"/>
- <include name="xml-apis*.jar"/>
- <include name="boilerpipe*.jar"/>
- <include name="rome*.jar"/>
- <include name="jdom*.jar"/>
- <include name="xercesImpl*.jar"/>
- <include name="vorbis-java-core*.jar"/>
- <include name="juniversalchardet*.jar"/>
- <include name="jhighlight*.jar"/>
</fileset>
</copy>
</target>
Modified: manifoldcf/trunk/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java?rev=1604110&r1=1604109&r2=1604110&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java (original)
+++ manifoldcf/trunk/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java Fri Jun 20 09:16:12 2014
@@ -17,58 +17,59 @@
* limitations under the License.
*/
package org.apache.manifoldcf.agents.output.amazoncloudsearch;
-
-import java.io.IOException;
-import java.io.InputStream;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
import java.io.InterruptedIOException;
import java.io.StringReader;
-import java.io.BufferedReader;
-import java.util.ArrayList;
-import java.util.HashMap;
+import java.io.BufferedReader;
+import java.io.Reader;
+import java.util.ArrayList;
+import java.util.HashMap;
import java.util.Iterator;
-import java.util.List;
+import java.util.List;
import java.util.Map;
import java.util.Locale;
import java.util.Set;
import java.util.HashSet;
+import java.util.Date;
+
+import org.apache.commons.io.input.ReaderInputStream;
import org.apache.commons.io.FilenameUtils;
import org.apache.http.Consts;
import org.apache.http.HttpEntity;
-import org.apache.http.HttpHost;
-import org.apache.http.HttpResponse;
-import org.apache.http.client.ClientProtocolException;
-import org.apache.http.client.config.RequestConfig;
-import org.apache.http.client.methods.HttpPost;
-import org.apache.http.entity.StringEntity;
+import org.apache.http.HttpHost;
+import org.apache.http.HttpResponse;
+import org.apache.http.client.ClientProtocolException;
+import org.apache.http.client.config.RequestConfig;
+import org.apache.http.client.methods.HttpPost;
+import org.apache.http.entity.StringEntity;
+import org.apache.http.entity.InputStreamEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.apache.manifoldcf.agents.interfaces.IOutputAddActivity;
+import org.apache.manifoldcf.agents.interfaces.IOutputNotifyActivity;
import org.apache.manifoldcf.agents.interfaces.IOutputRemoveActivity;
import org.apache.manifoldcf.agents.interfaces.RepositoryDocument;
import org.apache.manifoldcf.agents.interfaces.ServiceInterruption;
import org.apache.manifoldcf.agents.output.BaseOutputConnector;
import org.apache.manifoldcf.agents.output.amazoncloudsearch.SDFModel.Document;
import org.apache.manifoldcf.core.interfaces.Specification;
-import org.apache.manifoldcf.core.interfaces.ConfigParams;
+import org.apache.manifoldcf.core.interfaces.ConfigParams;
import org.apache.manifoldcf.core.interfaces.ConfigurationNode;
-import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
+import org.apache.manifoldcf.core.interfaces.DBInterfaceFactory;
+import org.apache.manifoldcf.core.interfaces.IDBInterface;
+import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
import org.apache.manifoldcf.core.interfaces.IThreadContext;
import org.apache.manifoldcf.core.interfaces.IHTTPOutput;
import org.apache.manifoldcf.core.interfaces.IPostParameters;
import org.apache.manifoldcf.core.interfaces.IPasswordMapperActivity;
import org.apache.manifoldcf.core.interfaces.SpecificationNode;
-import org.apache.manifoldcf.core.system.ManifoldCF;
-import org.apache.manifoldcf.crawler.system.Logging;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.sax.BodyContentHandler;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
+import org.apache.manifoldcf.agents.system.ManifoldCF;
+import org.apache.manifoldcf.agents.system.Logging;
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonParseException;
@@ -79,6 +80,8 @@ import com.fasterxml.jackson.databind.Js
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
+import org.apache.manifoldcf.core.jsongen.*;
+
public class AmazonCloudSearchConnector extends BaseOutputConnector {
/** Ingestion activity */
@@ -99,13 +102,19 @@ public class AmazonCloudSearchConnector
private static final String EDIT_SPECIFICATION_JS = "editSpecification.js";
private static final String EDIT_SPECIFICATION_CONTENTS_HTML = "editSpecification_Contents.html";
- private static final String EDIT_SPECIFICATION_FIELDMAPPING_HTML = "editSpecification_FieldMapping.html";
private static final String VIEW_SPECIFICATION_HTML = "viewSpecification.html";
/** Local connection */
protected HttpPost poster = null;
+ // What we need for database keys
+ protected String serverHost = null;
+ protected String serverPath = null;
+
+ /** Document Chunk Manager */
+ private DocumentChunkManager documentChunkManager = null;
+
/** cloudsearch field name for file body text. */
private static final String FILE_BODY_TEXT_FIELDNAME = "f_bodytext";
@@ -114,6 +123,42 @@ public class AmazonCloudSearchConnector
public AmazonCloudSearchConnector(){
}
+ /** Clear out any state information specific to a given thread.
+ * This method is called when this object is returned to the connection pool.
+ */
+ @Override
+ public void clearThreadContext()
+ {
+ super.clearThreadContext();
+ documentChunkManager = null;
+ }
+
+ @Override
+ public void install(IThreadContext threadContext)
+ throws ManifoldCFException
+ {
+ IDBInterface mainDatabase = DBInterfaceFactory.make(threadContext,
+ ManifoldCF.getMasterDatabaseName(),
+ ManifoldCF.getMasterDatabaseUsername(),
+ ManifoldCF.getMasterDatabasePassword());
+
+ DocumentChunkManager dcmanager = new DocumentChunkManager(mainDatabase);
+ dcmanager.install();
+ }
+
+ @Override
+ public void deinstall(IThreadContext threadContext)
+ throws ManifoldCFException
+ {
+ IDBInterface mainDatabase = DBInterfaceFactory.make(threadContext,
+ ManifoldCF.getMasterDatabaseName(),
+ ManifoldCF.getMasterDatabaseUsername(),
+ ManifoldCF.getMasterDatabasePassword());
+
+ DocumentChunkManager dcmanager = new DocumentChunkManager(mainDatabase);
+ dcmanager.deinstall();
+ }
+
/** Return the list of activities that this connector supports (i.e. writes into the log).
*@return the list.
*/
@@ -150,10 +195,9 @@ public class AmazonCloudSearchConnector
public void disconnect()
throws ManifoldCFException
{
- if (poster != null)
- {
- poster = null;
- }
+ serverHost = null;
+ serverPath = null;
+ poster = null;
super.disconnect();
}
@@ -161,10 +205,19 @@ public class AmazonCloudSearchConnector
protected void getSession()
throws ManifoldCFException
{
- String serverHost = params.getParameter(AmazonCloudSearchConfig.SERVER_HOST);
+ if (documentChunkManager == null)
+ {
+ IDBInterface databaseHandle = DBInterfaceFactory.make(currentContext,
+ ManifoldCF.getMasterDatabaseName(),
+ ManifoldCF.getMasterDatabaseUsername(),
+ ManifoldCF.getMasterDatabasePassword());
+ documentChunkManager = new DocumentChunkManager(databaseHandle);
+ }
+
+ serverHost = params.getParameter(AmazonCloudSearchConfig.SERVER_HOST);
if (serverHost == null)
throw new ManifoldCFException("Server host parameter required");
- String serverPath = params.getParameter(AmazonCloudSearchConfig.SERVER_PATH);
+ serverPath = params.getParameter(AmazonCloudSearchConfig.SERVER_PATH);
if (serverPath == null)
throw new ManifoldCFException("Server path parameter required");
String proxyProtocol = params.getParameter(AmazonCloudSearchConfig.PROXY_PROTOCOL);
@@ -193,7 +246,7 @@ public class AmazonCloudSearchConnector
poster.addHeader("Content-Type", "application/json");
}
-
+
/** Test the connection. Returns a string describing the connection integrity.
*@return the connection's status as a displayable string.
*/
@@ -201,14 +254,14 @@ public class AmazonCloudSearchConnector
public String check() throws ManifoldCFException {
try {
getSession();
- String responsbody = postData("[]");
+ String responsbody = postData(new ReaderInputStream(new StringReader("[]"),Consts.UTF_8));
String status = "";
try
{
status = getStatusFromJsonResponse(responsbody);
} catch (ManifoldCFException e)
{
- Logging.connectors.debug(e);
+ Logging.ingest.debug(e);
return "Could not get status from response body. Check Access Policy setting of your domain of Amazon CloudSearch.: " + e.getMessage();
}
@@ -231,45 +284,45 @@ public class AmazonCloudSearchConnector
return "Connection NOT working.";
} catch (ClientProtocolException e) {
- Logging.connectors.debug(e);
+ Logging.ingest.debug(e);
return "Protocol exception: "+e.getMessage();
} catch (IOException e) {
- Logging.connectors.debug(e);
+ Logging.ingest.debug(e);
return "IO exception: "+e.getMessage();
} catch (ServiceInterruption e) {
- Logging.connectors.debug(e);
+ Logging.ingest.debug(e);
return "Transient exception: "+e.getMessage();
}
- }
-
- private String getStatusFromJsonResponse(String responsbody) throws ManifoldCFException {
- try {
- JsonParser parser = new JsonFactory().createJsonParser(responsbody);
- while (parser.nextToken() != JsonToken.END_OBJECT)
- {
- String name = parser.getCurrentName();
- if("status".equalsIgnoreCase(name)){
- parser.nextToken();
- return parser.getText();
- }
- }
- } catch (JsonParseException e) {
- throw new ManifoldCFException(e);
- } catch (IOException e) {
- throw new ManifoldCFException(e);
- }
- return null;
- }
-
- private String parseMessage(JsonParser parser) throws JsonParseException, IOException {
- while(parser.nextToken() != JsonToken.END_ARRAY){
- String name = parser.getCurrentName();
- if("message".equalsIgnoreCase(name)){
- parser.nextToken();
- return parser.getText();
- }
- }
- return null;
+ }
+
+ private String getStatusFromJsonResponse(String responsbody) throws ManifoldCFException {
+ try {
+ JsonParser parser = new JsonFactory().createJsonParser(responsbody);
+ while (parser.nextToken() != JsonToken.END_OBJECT)
+ {
+ String name = parser.getCurrentName();
+ if("status".equalsIgnoreCase(name)){
+ parser.nextToken();
+ return parser.getText();
+ }
+ }
+ } catch (JsonParseException e) {
+ throw new ManifoldCFException(e);
+ } catch (IOException e) {
+ throw new ManifoldCFException(e);
+ }
+ return null;
+ }
+
+ private String parseMessage(JsonParser parser) throws JsonParseException, IOException {
+ while(parser.nextToken() != JsonToken.END_ARRAY){
+ String name = parser.getCurrentName();
+ if("message".equalsIgnoreCase(name)){
+ parser.nextToken();
+ return parser.getText();
+ }
+ }
+ return null;
}
/** Get an output version string, given an output specification. The output version string is used to uniquely describe the pertinent details of
@@ -342,118 +395,69 @@ public class AmazonCloudSearchConnector
*@return the document status (accepted or permanently rejected).
*/
@Override
- public int addOrReplaceDocument(String documentURI, String outputDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
- throws ManifoldCFException, ServiceInterruption
- {
- // Establish a session
+ public int addOrReplaceDocumentWithException(String documentURI, String outputDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
+ throws ManifoldCFException, ServiceInterruption, IOException
+ {
+ // Establish a session
getSession();
SpecPacker sp = new SpecPacker(outputDescription);
- String jsondata = "";
- try {
- //build json..
- SDFModel model = new SDFModel();
- Document doc = model.new Document();
- doc.setType("add");
- doc.setId(ManifoldCF.hash(documentURI));
-
- HashMap fields = new HashMap();
- Metadata metadata = extractBinaryFile(document, fields);
-
- Iterator<String> itr = document.getFields();
- while(itr.hasNext())
+ String uid = ManifoldCF.hash(documentURI);
+
+ // Build a JSON generator
+ JSONObjectReader objectReader = new JSONObjectReader();
+ // Build the metadata field part
+ JSONObjectReader fieldReader = new JSONObjectReader();
+ // Add the type and ID
+ objectReader.addNameValuePair(new JSONNameValueReader(new JSONStringReader("id"),new JSONStringReader(uid)))
+ .addNameValuePair(new JSONNameValueReader(new JSONStringReader("type"),new JSONStringReader("add")))
+ .addNameValuePair(new JSONNameValueReader(new JSONStringReader("fields"),fieldReader));
+
+ // Populate the fields...
+ Iterator<String> itr = document.getFields();
+ while (itr.hasNext())
+ {
+ String fieldName = itr.next();
+ Object[] fieldValues = document.getField(fieldName);
+ JSONReader[] elements = new JSONReader[fieldValues.length];
+ if (fieldValues instanceof Reader[])
{
- String fName = itr.next();
- Object[] value = document.getField(fName);
- String target = sp.getMapping(fName);
- if(target!=null)
- {
- fields.put(target, value);
- }
- else
+ for (int i = 0; i < elements.length; i++)
{
- if(sp.keepAllMetadata())
- {
- fields.put(fName, value);
- }
+ elements[i] = new JSONStringReader((Reader)fieldValues[i]);
}
}
-
- //metadata of binary files.
- String[] metaNames = metadata.names();
- for(String mName : metaNames){
- String value = metadata.get(mName);
- String target = sp.getMapping(mName);
- if(target!=null)
+ else if (fieldValues instanceof Date[])
+ {
+ for (int i = 0; i < elements.length; i++)
{
- fields.put(target, value);
+ elements[i] = new JSONStringReader(((Date)fieldValues[i]).toString());
}
- else
+ }
+ else if (fieldValues instanceof String[])
+ {
+ for (int i = 0; i < elements.length; i++)
{
- if(sp.keepAllMetadata())
- {
- fields.put(mName, value);
- }
+ elements[i] = new JSONStringReader((String)fieldValues[i]);
}
}
- doc.setFields(fields);
- model.addDocument(doc);
-
- //generate json data.
- jsondata = model.toJSON();
- }
- catch (SAXException e) {
- // if document data could not be converted to JSON by jackson.
- Logging.connectors.debug(e);
- throw new ManifoldCFException(e);
- } catch (JsonProcessingException e) {
- // if document data could not be converted to JSON by jackson.
- Logging.connectors.debug(e);
- throw new ManifoldCFException(e);
- } catch (TikaException e) {
- // if document could not be parsed by tika.
- Logging.connectors.debug(e);
- return DOCUMENTSTATUS_REJECTED;
- } catch (IOException e) {
- // if document data could not be read when the document parsing by tika.
- Logging.connectors.debug(e);
- throw new ManifoldCFException(e);
- }
-
- //post data..
- String responsbody = postData(jsondata);
-
- // check status
- String status = getStatusFromJsonResponse(responsbody);
- if("success".equals(status))
- {
- activities.recordActivity(null,INGEST_ACTIVITY,new Long(document.getBinaryLength()),documentURI,"OK",null);
- return DOCUMENTSTATUS_ACCEPTED;
- }
- else {
- throw new ManifoldCFException("recieved error status from service after feeding document. response body : " + responsbody);
- }
- }
-
- private Metadata extractBinaryFile(RepositoryDocument document, HashMap fields)
- throws IOException, SAXException, TikaException {
-
- //extract body text and metadata fields from binary file.
- InputStream is = document.getBinaryStream();
- Parser parser = new AutoDetectParser();
- ContentHandler handler = new BodyContentHandler();
- Metadata metadata = new Metadata();
- parser.parse(is, handler, metadata, new ParseContext());
- String bodyStr = handler.toString();
- if(bodyStr != null){
- bodyStr = handler.toString().replaceAll("\\n", "").replaceAll("\\t", "");
- fields.put(FILE_BODY_TEXT_FIELDNAME, bodyStr);
- }
- return metadata;
- }
-
- /** Remove a document using the connector.
+ else
+ throw new IllegalStateException("Unexpected metadata type: "+fieldValues.getClass().getName());
+
+ fieldReader.addNameValuePair(new JSONNameValueReader(new JSONStringReader(fieldName),new JSONArrayReader(elements)));
+ }
+
+ // Add the primary content data in.
+ fieldReader.addNameValuePair(new JSONNameValueReader(new JSONStringReader(FILE_BODY_TEXT_FIELDNAME),
+ new JSONStringReader(new InputStreamReader(document.getBinaryStream(),Consts.UTF_8))));
+
+ documentChunkManager.recordDocument(uid, serverHost, serverPath, new ReaderInputStream(objectReader, Consts.UTF_8));
+ conditionallyFlushDocuments();
+ return DOCUMENTSTATUS_ACCEPTED;
+ }
+
+ /** Remove a document using the connector.
* Note that the last outputDescription is included, since it may be necessary for the connector to use such information to know how to properly remove the document.
*@param documentURI is the URI of the document. The URI is presumed to be the unique identifier which the output data store will use to process
* and serve the document. This URI is constructed by the repository connector which fetches the document, and is thus universal across all output connectors.
@@ -467,29 +471,107 @@ public class AmazonCloudSearchConnector
// Establish a session
getSession();
- String jsonData = "";
- try {
- SDFModel model = new SDFModel();
- SDFModel.Document doc = model.new Document();
- doc.setType("delete");
- doc.setId(documentURI);
- model.addDocument(doc);
- jsonData = model.toJSON();
- } catch (JsonProcessingException e) {
- throw new ManifoldCFException(e);
- }
- String responsbody = postData(jsonData);
-
- // check status
- String status = getStatusFromJsonResponse(responsbody);
- if("success".equals(status))
- {
- activities.recordActivity(null,REMOVE_ACTIVITY,null,documentURI,"OK",null);
- }
- else {
- throw new ManifoldCFException("recieved error status from service after feeding document.");
- }
- }
+ String uid = ManifoldCF.hash(documentURI);
+
+ // Build a JSON generator
+ JSONObjectReader objectReader = new JSONObjectReader();
+ // Add the type and ID
+ objectReader.addNameValuePair(new JSONNameValueReader(new JSONStringReader("id"),new JSONStringReader(uid)))
+ .addNameValuePair(new JSONNameValueReader(new JSONStringReader("type"),new JSONStringReader("delete")));
+
+ try
+ {
+ documentChunkManager.recordDocument(uid, serverHost, serverPath, new ReaderInputStream(objectReader, Consts.UTF_8));
+ }
+ catch (IOException e)
+ {
+ handleIOException(e);
+ }
+ conditionallyFlushDocuments();
+ }
+
+ @Override
+ public void noteJobComplete(IOutputNotifyActivity activities)
+ throws ManifoldCFException, ServiceInterruption {
+ getSession();
+ flushDocuments();
+ }
+
+ protected static final int CHUNK_SIZE = 1000;
+
+ protected void conditionallyFlushDocuments()
+ throws ManifoldCFException, ServiceInterruption
+ {
+ if (documentChunkManager.equalOrMoreThan(serverHost, serverPath, CHUNK_SIZE))
+ flushDocuments();
+ }
+
+ protected void flushDocuments()
+ throws ManifoldCFException, ServiceInterruption
+ {
+ Logging.ingest.info("AmazonCloudSearch: Starting flush to Amazon");
+
+ // Repeat until we are empty of cached stuff
+ int chunkNumber = 0;
+ while (true)
+ {
+ DocumentRecord[] records = documentChunkManager.readChunk(serverHost, serverPath, CHUNK_SIZE);
+ try
+ {
+ if (records.length == 0)
+ break;
+ // The records consist of up to 1000 individual input streams, which must be all concatenated together into the post
+ // To do that, we go into and out of Reader space once again...
+ JSONArrayReader arrayReader = new JSONArrayReader();
+ for (DocumentRecord dr : records)
+ {
+ arrayReader.addArrayElement(new JSONValueReader(new InputStreamReader(dr.getDataStream(),Consts.UTF_8)));
+ }
+
+ //post data..
+ String responsbody = postData(new ReaderInputStream(arrayReader,Consts.UTF_8));
+ // check status
+ String status = getStatusFromJsonResponse(responsbody);
+ if("success".equals(status))
+ {
+ Logging.ingest.info("AmazonCloudSearch: Successfully sent document chunk " + chunkNumber);
+ //remove documents from table..
+ documentChunkManager.deleteChunk(records);
+ }
+ else
+ {
+ Logging.ingest.error("AmazonCloudSearch: Error sending document chunk "+ chunkNumber+": "+ responsbody);
+ throw new ManifoldCFException("recieved error status from service after feeding document. response body : " + responsbody);
+ }
+ }
+ finally
+ {
+ Throwable exception = null;
+ for (DocumentRecord dr : records)
+ {
+ try
+ {
+ dr.close();
+ }
+ catch (Throwable e)
+ {
+ exception = e;
+ }
+ }
+ if (exception != null)
+ {
+ if (exception instanceof ManifoldCFException)
+ throw (ManifoldCFException)exception;
+ else if (exception instanceof Error)
+ throw (Error)exception;
+ else if (exception instanceof RuntimeException)
+ throw (RuntimeException)exception;
+ else
+ throw new RuntimeException("Unknown exception class thrown: "+exception.getClass().getName()+": "+exception.getMessage(),exception);
+ }
+ }
+ }
+ }
/**
* Fill in a Server tab configuration parameter map for calling a Velocity
@@ -635,75 +717,41 @@ public class AmazonCloudSearchConnector
return null;
}
-
- private String postData(String jsonData) throws ServiceInterruption, ManifoldCFException {
- CloseableHttpClient httpclient = HttpClients.createDefault();
- try {
- poster.setEntity(new StringEntity(jsonData, Consts.UTF_8));
+
+ private String postData(InputStream jsonData) throws ServiceInterruption, ManifoldCFException {
+ CloseableHttpClient httpclient = HttpClients.createDefault();
+ try {
+ poster.setEntity(new InputStreamEntity(jsonData));
HttpResponse res = httpclient.execute(poster);
-
- HttpEntity resEntity = res.getEntity();
- return EntityUtils.toString(resEntity);
- } catch (ClientProtocolException e) {
- throw new ManifoldCFException(e);
- } catch (IOException e) {
- handleIOException(e);
- } finally {
- try {
- httpclient.close();
+
+ HttpEntity resEntity = res.getEntity();
+ return EntityUtils.toString(resEntity);
+ } catch (ClientProtocolException e) {
+ throw new ManifoldCFException(e);
+ } catch (IOException e) {
+ handleIOException(e);
+ } finally {
+ try {
+ httpclient.close();
} catch (IOException e) {
- //do nothing
- }
- }
- return null;
- }
-
- private static void handleIOException(IOException e)
- throws ManifoldCFException, ServiceInterruption {
- if (!(e instanceof java.net.SocketTimeoutException)
- && (e instanceof InterruptedIOException)) {
- throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
- ManifoldCFException.INTERRUPTED);
- }
- Logging.connectors.warn(
- "Amazon CloudSearch: IO exception: " + e.getMessage(), e);
- long currentTime = System.currentTimeMillis();
- throw new ServiceInterruption("IO exception: " + e.getMessage(), e,
- currentTime + 300000L, currentTime + 3 * 60 * 60000L, -1, false);
- }
-
- protected static void fillInFieldMappingSpecificationMap(Map<String,Object> paramMap, Specification os)
- {
- // Prep for field mappings
- List<Map<String,String>> fieldMappings = new ArrayList<Map<String,String>>();
- String keepAllMetadataValue = "true";
- for (int i = 0; i < os.getChildCount(); i++)
- {
- SpecificationNode sn = os.getChild(i);
- if (sn.getType().equals(AmazonCloudSearchConfig.NODE_FIELDMAP)) {
- String source = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_SOURCE);
- String target = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_TARGET);
- String targetDisplay;
- if (target == null)
- {
- target = "";
- targetDisplay = "(remove)";
- }
- else
- targetDisplay = target;
- Map<String,String> fieldMapping = new HashMap<String,String>();
- fieldMapping.put("SOURCE",source);
- fieldMapping.put("TARGET",target);
- fieldMapping.put("TARGETDISPLAY",targetDisplay);
- fieldMappings.add(fieldMapping);
- }
- else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_KEEPMETADATA))
- {
- keepAllMetadataValue = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_VALUE);
+ //do nothing
}
}
- paramMap.put("FIELDMAPPINGS",fieldMappings);
- paramMap.put("KEEPALLMETADATA",keepAllMetadataValue);
+ return null;
+ }
+
+ private static void handleIOException(IOException e)
+ throws ManifoldCFException, ServiceInterruption {
+ if (!(e instanceof java.net.SocketTimeoutException)
+ && (e instanceof InterruptedIOException)) {
+ throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
+ ManifoldCFException.INTERRUPTED);
+ }
+ Logging.ingest.warn(
+ "Amazon CloudSearch: IO exception: " + e.getMessage(), e);
+ long currentTime = System.currentTimeMillis();
+ throw new ServiceInterruption("IO exception: " + e.getMessage(), e,
+ currentTime + 300000L, currentTime + 3 * 60 * 60000L, -1, false);
}
protected static void fillInContentsSpecificationMap(Map<String,Object> paramMap, Specification os)
@@ -763,11 +811,9 @@ public class AmazonCloudSearchConnector
Map<String, Object> paramMap = new HashMap<String, Object>();
paramMap.put("SEQNUM",Integer.toString(connectionSequenceNumber));
- tabsArray.add(Messages.getString(locale, "AmazonCloudSearchOutputConnector.FieldMappingTabName"));
tabsArray.add(Messages.getString(locale, "AmazonCloudSearchOutputConnector.ContentsTabName"));
// Fill in the specification header map, using data from all tabs.
- fillInFieldMappingSpecificationMap(paramMap, os);
fillInContentsSpecificationMap(paramMap, os);
Messages.outputResourceWithVelocity(out,locale,EDIT_SPECIFICATION_JS,paramMap);
@@ -797,10 +843,8 @@ public class AmazonCloudSearchConnector
paramMap.put("SELECTEDNUM",Integer.toString(actualSequenceNumber));
// Fill in the field mapping tab data
- fillInFieldMappingSpecificationMap(paramMap, os);
fillInContentsSpecificationMap(paramMap, os);
Messages.outputResourceWithVelocity(out,locale,EDIT_SPECIFICATION_CONTENTS_HTML,paramMap);
- Messages.outputResourceWithVelocity(out,locale,EDIT_SPECIFICATION_FIELDMAPPING_HTML,paramMap);
}
/** Process a specification post.
@@ -872,69 +916,6 @@ public class AmazonCloudSearchConnector
os.addChild(os.getChildCount(),sn);
}
- x = variableContext.getParameter(seqPrefix+"cloudsearch_fieldmapping_count");
- if (x != null && x.length() > 0)
- {
- // About to gather the fieldmapping nodes, so get rid of the old ones.
- int i = 0;
- while (i < os.getChildCount())
- {
- SpecificationNode node = os.getChild(i);
- if (node.getType().equals(AmazonCloudSearchConfig.NODE_FIELDMAP) || node.getType().equals(AmazonCloudSearchConfig.NODE_KEEPMETADATA))
- os.removeChild(i);
- else
- i++;
- }
- int count = Integer.parseInt(x);
- i = 0;
- while (i < count)
- {
- String prefix = seqPrefix+"cloudsearch_fieldmapping_";
- String suffix = "_"+Integer.toString(i);
- String op = variableContext.getParameter(prefix+"op"+suffix);
- if (op == null || !op.equals("Delete"))
- {
- // Gather the fieldmap etc.
- String source = variableContext.getParameter(prefix+"source"+suffix);
- String target = variableContext.getParameter(prefix+"target"+suffix);
- if (target == null)
- target = "";
- SpecificationNode node = new SpecificationNode(AmazonCloudSearchConfig.NODE_FIELDMAP);
- node.setAttribute(AmazonCloudSearchConfig.ATTRIBUTE_SOURCE,source);
- node.setAttribute(AmazonCloudSearchConfig.ATTRIBUTE_TARGET,target);
- os.addChild(os.getChildCount(),node);
- }
- i++;
- }
-
- String addop = variableContext.getParameter(seqPrefix+"cloudsearch_fieldmapping_op");
- if (addop != null && addop.equals("Add"))
- {
- String source = variableContext.getParameter(seqPrefix+"cloudsearch_fieldmapping_source");
- String target = variableContext.getParameter(seqPrefix+"cloudsearch_fieldmapping_target");
- if (target == null)
- target = "";
- SpecificationNode node = new SpecificationNode(AmazonCloudSearchConfig.NODE_FIELDMAP);
- node.setAttribute(AmazonCloudSearchConfig.ATTRIBUTE_SOURCE,source);
- node.setAttribute(AmazonCloudSearchConfig.ATTRIBUTE_TARGET,target);
- os.addChild(os.getChildCount(),node);
- }
-
- // Gather the keep all metadata parameter to be the last one
- SpecificationNode node = new SpecificationNode(AmazonCloudSearchConfig.NODE_KEEPMETADATA);
- String keepAll = variableContext.getParameter(seqPrefix+"cloudsearch_keepallmetadata");
- if (keepAll != null)
- {
- node.setAttribute(AmazonCloudSearchConfig.ATTRIBUTE_VALUE, keepAll);
- }
- else
- {
- node.setAttribute(AmazonCloudSearchConfig.ATTRIBUTE_VALUE, "false");
- }
- // Add the new keepallmetadata config parameter
- os.addChild(os.getChildCount(), node);
- }
-
return null;
}
@@ -956,7 +937,6 @@ public class AmazonCloudSearchConnector
paramMap.put("SEQNUM",Integer.toString(connectionSequenceNumber));
// Fill in the map with data from all tabs
- fillInFieldMappingSpecificationMap(paramMap, os);
fillInContentsSpecificationMap(paramMap, os);
Messages.outputResourceWithVelocity(out,locale,VIEW_SPECIFICATION_HTML,paramMap);
@@ -985,32 +965,18 @@ public class AmazonCloudSearchConnector
protected static class SpecPacker {
- private final Map<String,String> sourceTargets = new HashMap<String,String>();
- private final boolean keepAllMetadata;
private final Set<String> extensions = new HashSet<String>();
private final Set<String> mimeTypes = new HashSet<String>();
private final Long lengthCutoff;
public SpecPacker(Specification os) {
- boolean keepAllMetadata = true;
Long lengthCutoff = null;
String extensions = null;
String mimeTypes = null;
for (int i = 0; i < os.getChildCount(); i++) {
SpecificationNode sn = os.getChild(i);
- if(sn.getType().equals(AmazonCloudSearchConfig.NODE_KEEPMETADATA)) {
- String value = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_VALUE);
- keepAllMetadata = Boolean.parseBoolean(value);
- } else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_FIELDMAP)) {
- String source = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_SOURCE);
- String target = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_TARGET);
-
- if (target == null) {
- target = "";
- }
- sourceTargets.put(source, target);
- } else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_MIMETYPES)) {
+ if (sn.getType().equals(AmazonCloudSearchConfig.NODE_MIMETYPES)) {
mimeTypes = sn.getValue();
} else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_EXTENSIONS)) {
extensions = sn.getValue();
@@ -1019,7 +985,6 @@ public class AmazonCloudSearchConnector
lengthCutoff = new Long(value);
}
}
- this.keepAllMetadata = keepAllMetadata;
this.lengthCutoff = lengthCutoff;
fillSet(this.extensions, extensions);
fillSet(this.mimeTypes, mimeTypes);
@@ -1029,21 +994,6 @@ public class AmazonCloudSearchConnector
int index = 0;
- // Mappings
- final List<String> packedMappings = new ArrayList<String>();
- index = unpackList(packedMappings,packedString,index,'+');
- String[] fixedList = new String[2];
- for (String packedMapping : packedMappings) {
- unpackFixedList(fixedList,packedMapping,0,':');
- sourceTargets.put(fixedList[0], fixedList[1]);
- }
-
- // Keep all metadata
- if (packedString.length() > index)
- keepAllMetadata = (packedString.charAt(index++) == '+');
- else
- keepAllMetadata = true;
-
// Max length
final StringBuilder sb = new StringBuilder();
if (packedString.length() > index) {
@@ -1074,32 +1024,6 @@ public class AmazonCloudSearchConnector
StringBuilder sb = new StringBuilder();
int i;
- // Mappings
- final String[] sortArray = new String[sourceTargets.size()];
- i = 0;
- for (String source : sourceTargets.keySet()) {
- sortArray[i++] = source;
- }
- java.util.Arrays.sort(sortArray);
-
- List<String> packedMappings = new ArrayList<String>();
- String[] fixedList = new String[2];
- for (String source : sortArray) {
- String target = sourceTargets.get(source);
- StringBuilder localBuffer = new StringBuilder();
- fixedList[0] = source;
- fixedList[1] = target;
- packFixedList(localBuffer,fixedList,':');
- packedMappings.add(localBuffer.toString());
- }
- packList(sb,packedMappings,'+');
-
- // Keep all metadata
- if (keepAllMetadata)
- sb.append('+');
- else
- sb.append('-');
-
// Max length
if (lengthCutoff == null)
sb.append('-');
@@ -1148,13 +1072,6 @@ public class AmazonCloudSearchConnector
return extensions.contains(extension);
}
- public String getMapping(String source) {
- return sourceTargets.get(source);
- }
-
- public boolean keepAllMetadata() {
- return keepAllMetadata;
- }
}
-
+
}
Propchange: manifoldcf/trunk/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: manifoldcf/trunk/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java
------------------------------------------------------------------------------
svn:keywords = Id
Propchange: manifoldcf/trunk/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/SDFModel.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: manifoldcf/trunk/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/SDFModel.java
------------------------------------------------------------------------------
svn:keywords = Id
Modified: manifoldcf/trunk/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/editSpecification.js
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/editSpecification.js?rev=1604110&r1=1604109&r2=1604110&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/editSpecification.js (original)
+++ manifoldcf/trunk/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/editSpecification.js Fri Jun 20 09:16:12 2014
@@ -22,30 +22,5 @@ function s${SEQNUM}_checkSpecification()
return true;
}
-function s${SEQNUM}_addFieldMapping()
-{
- if (editjob.s${SEQNUM}_cloudsearch_fieldmapping_source.value == "")
- {
- alert("$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.NoFieldMappingSpecified'))");
- editjob.s${SEQNUM}_cloudsearch_fieldmapping_source.focus();
- return;
- }
- editjob.s${SEQNUM}_cloudsearch_fieldmapping_op.value="Add";
- postFormSetAnchor("s${SEQNUM}_cloudsearch_fieldmapping");
-}
-
-function s${SEQNUM}_deleteFieldMapping(i)
-{
- // Set the operation
- eval("editjob.s${SEQNUM}_cloudsearch_fieldmapping_op_"+i+".value=\"Delete\"");
- // Submit
- if (editjob.s${SEQNUM}_cloudsearch_fieldmapping_count.value==i)
- postFormSetAnchor("s${SEQNUM}_cloudsearch_fieldmapping");
- else
- postFormSetAnchor("s${SEQNUM}_cloudsearch_fieldmapping_"+i)
- // Undo, so we won't get two deletes next time
- eval("editjob.s${SEQNUM}_cloudsearch_fieldmapping_op_"+i+".value=\"Continue\"");
-}
-
//-->
</script>
Modified: manifoldcf/trunk/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/viewSpecification.html
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/viewSpecification.html?rev=1604110&r1=1604109&r2=1604110&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/viewSpecification.html (original)
+++ manifoldcf/trunk/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/viewSpecification.html Fri Jun 20 09:16:12 2014
@@ -29,42 +29,4 @@
<td class="value">$Encoder.bodyEscape($EXTENSIONS)</td>
</tr>
- <tr><td class="separator" colspan="2"><hr/></td></tr>
-
- <tr>
- <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.FieldMappings'))</nobr></td>
- <td class="boxcell">
- <table class="formtable">
- <tr class="formheaderrow">
- <td class="formcolumnheader"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.MetadataFieldName'))</nobr></td>
- <td class="formcolumnheader"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.CloudSearchFieldName'))</nobr></td>
- </tr>
-#set($fieldcounter = 0)
-#foreach($fieldmapping in $FIELDMAPPINGS)
- #if(($fieldcounter % 2) == 0)
- <tr class="evenformrow">
- #else
- <tr class="oddformrow">
- #end
- <td class="formcolumncell">
- <nobr>$Encoder.bodyEscape($fieldmapping.get('SOURCE'))</nobr>
- </td>
- <td class="formcolumncell">
- <nobr>$Encoder.bodyEscape($fieldmapping.get('TARGETDISPLAY'))</nobr>
- </td>
- </tr>
- #set($fieldcounter = $fieldcounter + 1)
-#end
-#if($fieldcounter == 0)
- <tr class="formrow"><td class="formmessage" colspan="2">$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.NoFieldMappingSpecified'))</td></tr>
-#end
- </table>
- </td>
- </tr>
- <tr><td class="separator" colspan="2"><hr/></td></tr>
- <tr>
- <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.KeepAllMetadata'))</nobr></td>
- <td class="value"><nobr>$Encoder.bodyEscape($KEEPALLMETADATA)</nobr></td>
- </tr>
-
</table>
Modified: manifoldcf/trunk/connectors/amazoncloudsearch/pom.xml
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/amazoncloudsearch/pom.xml?rev=1604110&r1=1604109&r2=1604110&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/amazoncloudsearch/pom.xml (original)
+++ manifoldcf/trunk/connectors/amazoncloudsearch/pom.xml Fri Jun 20 09:16:12 2014
@@ -199,11 +199,6 @@
<version>${project.version}</version>
</dependency>
<dependency>
- <groupId>${project.groupId}</groupId>
- <artifactId>mcf-pull-agent</artifactId>
- <version>${project.version}</version>
- </dependency>
- <dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>${httpcomponent.httpclient.version}</version>
@@ -223,16 +218,6 @@
<artifactId>jackson-annotations</artifactId>
<version>2.3.0</version>
</dependency>
- <dependency>
- <groupId>org.apache.tika</groupId>
- <artifactId>tika-core</artifactId>
- <version>1.5</version>
- </dependency>
- <dependency>
- <groupId>org.apache.tika</groupId>
- <artifactId>tika-parsers</artifactId>
- <version>1.5</version>
- </dependency>
<!-- Testing dependencies -->
@@ -260,6 +245,12 @@
<groupId>${project.groupId}</groupId>
<artifactId>mcf-pull-agent</artifactId>
<version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>mcf-pull-agent</artifactId>
+ <version>${project.version}</version>
<type>test-jar</type>
<scope>test</scope>
</dependency>