You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by rh...@apache.org on 2016/01/26 16:47:13 UTC

svn commit: r1726831 - in /manifoldcf/branches/CONNECTORS-1270/connectors: ./ opennlp/ opennlp/connector/ opennlp/connector/src/ opennlp/connector/src/main/ opennlp/connector/src/main/java/ opennlp/connector/src/main/java/org/ opennlp/connector/src/mai...

Author: rharo
Date: Tue Jan 26 15:47:12 2016
New Revision: 1726831

URL: http://svn.apache.org/viewvc?rev=1726831&view=rev
Log:
OpenNLP Transformation Connector Initial Import

Added:
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/README.md
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/Messages.java
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractor.java
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractorConfig.java
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_en_US.properties
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_ja_JP.properties
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_zh_CN.properties
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/editSpecification.js
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/editSpecification_FieldMapping.html
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/viewSpecification.html
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/download-models.sh
    manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/pom.xml
Modified:
    manifoldcf/branches/CONNECTORS-1270/connectors/pom.xml

Added: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/README.md
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/README.md?rev=1726831&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/README.md (added)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/README.md Tue Jan 26 15:47:12 2016
@@ -0,0 +1,42 @@
+# OpenNLP Transformation Connector for Apache ManifoldCF
+
+OpenNLP connector extracts named entities(People, Locations and Organizations) from document content attaches metadata (ner_people, ner_locations and ner_organizations) to repository document.
+
+
+## Building the Connector
+---
+
+```
+git clone https://github.com/apache/manifoldcf.git
+cd manifoldcf/
+git checkout release-2.2-branch
+mvn clean install 
+
+git clone https://github.com/ChalithaUdara/OpenNLP-Manifold-Connector.git
+cd OpenNLP-Manifold-Connector
+mvn clean install -DskipTests=true
+```
+
+## Configure Connector with ManifoldCF
+---
+
+Copy mcf-opennlp-connector-2.2-jar-with-dependencies.jar to **$MANIFOLD_DIR/connectors-lib**
+To configure connector with manifoldcf add following to **$MANIFOLD_DIR/connectors.xml** file.
+
+```
+<transformationconnector name="OpenNLP Extractor" class="org.apache.manifoldcf.agents.transformation.opennlp.OpenNlpExtractor" />
+```
+---
+
+In order to extract named entities with OpenNLP, you first need to download the required OpenNLP models. Run **download-models** script to download models.
+
+```
+sh download-models.sh
+```
+
+This will download models to nlpmodels directory.
+
+In manifoldcf job configuration, you need to configure paths to corresponding models.  
+
+
+

Added: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/Messages.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/Messages.java?rev=1726831&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/Messages.java (added)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/Messages.java Tue Jan 26 15:47:12 2016
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.manifoldcf.agents.transformation.opennlp;
+
+import java.util.Locale;
+import java.util.Map;
+
+import org.apache.manifoldcf.core.interfaces.IHTTPOutput;
+import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
+
+public class Messages extends org.apache.manifoldcf.ui.i18n.Messages
+{
+    public static final String DEFAULT_BUNDLE_NAME="org.apache.manifoldcf.agents.transformation.opennlp.common";
+    public static final String DEFAULT_PATH_NAME="org.apache.manifoldcf.agents.transformation.opennlp";
+    
+    /** Constructor - do no instantiate
+     */
+     protected Messages()
+     {
+     }
+     
+     public static String getString(Locale locale, String messageKey)
+     {
+       return getString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+     }
+
+     public static String getAttributeString(Locale locale, String messageKey)
+     {
+       return getAttributeString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+     }
+
+     public static String getBodyString(Locale locale, String messageKey)
+     {
+       return getBodyString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+     }
+
+     public static String getAttributeJavascriptString(Locale locale, String messageKey)
+     {
+       return getAttributeJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+     }
+
+     public static String getBodyJavascriptString(Locale locale, String messageKey)
+     {
+       return getBodyJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+     }
+
+     public static String getString(Locale locale, String messageKey, Object[] args)
+     {
+       return getString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+     }
+
+     public static String getAttributeString(Locale locale, String messageKey, Object[] args)
+     {
+       return getAttributeString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+     }
+     
+     public static String getBodyString(Locale locale, String messageKey, Object[] args)
+     {
+       return getBodyString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+     }
+
+     public static String getAttributeJavascriptString(Locale locale, String messageKey, Object[] args)
+     {
+       return getAttributeJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+     }
+
+     public static String getBodyJavascriptString(Locale locale, String messageKey, Object[] args)
+     {
+       return getBodyJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+     }
+
+     // More general methods which allow bundlenames and class loaders to be specified.
+     
+     public static String getString(String bundleName, Locale locale, String messageKey, Object[] args)
+     {
+       return getString(Messages.class, bundleName, locale, messageKey, args);
+     }
+
+     public static String getAttributeString(String bundleName, Locale locale, String messageKey, Object[] args)
+     {
+       return getAttributeString(Messages.class, bundleName, locale, messageKey, args);
+     }
+
+     public static String getBodyString(String bundleName, Locale locale, String messageKey, Object[] args)
+     {
+       return getBodyString(Messages.class, bundleName, locale, messageKey, args);
+     }
+     
+     public static String getAttributeJavascriptString(String bundleName, Locale locale, String messageKey, Object[] args)
+     {
+       return getAttributeJavascriptString(Messages.class, bundleName, locale, messageKey, args);
+     }
+
+     public static String getBodyJavascriptString(String bundleName, Locale locale, String messageKey, Object[] args)
+     {
+       return getBodyJavascriptString(Messages.class, bundleName, locale, messageKey, args);
+     }
+
+     // Resource output
+     
+     public static void outputResource(IHTTPOutput output, Locale locale, String resourceKey,
+       Map<String,String> substitutionParameters, boolean mapToUpperCase)
+       throws ManifoldCFException
+     {
+       outputResource(output,Messages.class,DEFAULT_PATH_NAME,locale,resourceKey,
+         substitutionParameters,mapToUpperCase);
+     }
+     
+     public static void outputResourceWithVelocity(IHTTPOutput output, Locale locale, String resourceKey,
+       Map<String,String> substitutionParameters, boolean mapToUpperCase)
+       throws ManifoldCFException
+     {
+       outputResourceWithVelocity(output,Messages.class,DEFAULT_BUNDLE_NAME,DEFAULT_PATH_NAME,locale,resourceKey,
+         substitutionParameters,mapToUpperCase);
+     }
+
+     public static void outputResourceWithVelocity(IHTTPOutput output, Locale locale, String resourceKey,
+       Map<String,Object> contextObjects)
+       throws ManifoldCFException
+     {
+       outputResourceWithVelocity(output,Messages.class,DEFAULT_BUNDLE_NAME,DEFAULT_PATH_NAME,locale,resourceKey,
+         contextObjects);
+     }
+
+}

Added: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractor.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractor.java?rev=1726831&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractor.java (added)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractor.java Tue Jan 26 15:47:12 2016
@@ -0,0 +1,513 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.manifoldcf.agents.transformation.opennlp;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import opennlp.tools.namefind.NameFinderME;
+import opennlp.tools.sentdetect.SentenceDetector;
+import opennlp.tools.tokenize.Tokenizer;
+import opennlp.tools.util.Span;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.manifoldcf.agents.interfaces.IOutputAddActivity;
+import org.apache.manifoldcf.agents.interfaces.RepositoryDocument;
+import org.apache.manifoldcf.agents.interfaces.ServiceInterruption;
+import org.apache.manifoldcf.agents.system.Logging;
+import org.apache.manifoldcf.agents.transformation.BaseTransformationConnector;
+import org.apache.manifoldcf.core.interfaces.IHTTPOutput;
+import org.apache.manifoldcf.core.interfaces.IPostParameters;
+import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
+import org.apache.manifoldcf.core.interfaces.Specification;
+import org.apache.manifoldcf.core.interfaces.SpecificationNode;
+import org.apache.manifoldcf.core.interfaces.VersionContext;
+
+public class OpenNlpExtractor extends BaseTransformationConnector {
+	private static final String EDIT_SPECIFICATION_JS = "editSpecification.js";
+	private static final String EDIT_SPECIFICATION_FIELDMAPPING_HTML = "editSpecification_FieldMapping.html";
+	private static final String VIEW_SPECIFICATION_HTML = "viewSpecification.html";
+
+	// Meta-data fields added by this connector
+	private static final String PERSONS = "ner_people";
+	private static final String LOCATIONS = "ner_locations";
+	private static final String ORGANIZATIONS = "ner_organizations";
+
+	protected static final String ACTIVITY_EXTRACT = "extract";
+
+	protected static final String[] activitiesList = new String[] { ACTIVITY_EXTRACT };
+
+	/**
+	 * Return a list of activities that this connector generates. The connector
+	 * does NOT need to be connected before this method is called.
+	 * 
+	 * @return the set of activities.
+	 */
+	@Override
+	public String[] getActivitiesList() {
+		return activitiesList;
+	}
+
+	/**
+	 * Get a pipeline version string, given a pipeline specification object. The
+	 * version string is used to uniquely describe the pertinent details of the
+	 * specification and the configuration, to allow the Connector Framework to
+	 * determine whether a document will need to be processed again. Note that
+	 * the contents of any document cannot be considered by this method; only
+	 * configuration and specification information can be considered.
+	 * 
+	 * This method presumes that the underlying connector object has been
+	 * configured.
+	 * 
+	 * @param spec
+	 *            is the current pipeline specification object for this
+	 *            connection for the job that is doing the crawling.
+	 * @return a string, of unlimited length, which uniquely describes
+	 *         configuration and specification in such a way that if two such
+	 *         strings are equal, nothing that affects how or whether the
+	 *         document is indexed will be different.
+	 */
+	@Override
+	public VersionContext getPipelineDescription(Specification os) throws ManifoldCFException, ServiceInterruption {
+		SpecPacker sp = new SpecPacker(os);
+		return new VersionContext(sp.toPackedString(), params, os);
+	}
+
+	/**
+	 * Add (or replace) a document in the output data store using the connector.
+	 * This method presumes that the connector object has been configured, and
+	 * it is thus able to communicate with the output data store should that be
+	 * necessary. The OutputSpecification is *not* provided to this method,
+	 * because the goal is consistency, and if output is done it must be
+	 * consistent with the output description, since that was what was partly
+	 * used to determine if output should be taking place. So it may be
+	 * necessary for this method to decode an output description string in order
+	 * to determine what should be done.
+	 * 
+	 * @param documentURI
+	 *            is the URI of the document. The URI is presumed to be the
+	 *            unique identifier which the output data store will use to
+	 *            process and serve the document. This URI is constructed by the
+	 *            repository connector which fetches the document, and is thus
+	 *            universal across all output connectors.
+	 * @param outputDescription
+	 *            is the description string that was constructed for this
+	 *            document by the getOutputDescription() method.
+	 * @param document
+	 *            is the document data to be processed (handed to the output
+	 *            data store).
+	 * @param authorityNameString
+	 *            is the name of the authority responsible for authorizing any
+	 *            access tokens passed in with the repository document. May be
+	 *            null.
+	 * @param activities
+	 *            is the handle to an object that the implementer of a pipeline
+	 *            connector may use to perform operations, such as logging
+	 *            processing activity, or sending a modified document to the
+	 *            next stage in the pipeline.
+	 * @return the document status (accepted or permanently rejected).
+	 * @throws IOException
+	 *             only if there's a stream error reading the document data.
+	 */
+	@Override
+	public int addOrReplaceDocumentWithException(String documentURI, VersionContext pipelineDescription,
+			RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
+					throws ManifoldCFException, ServiceInterruption, IOException {
+		// assumes use of Tika extractor before using this connector
+		Logging.agents.debug("Starting OpenNlp extraction");
+
+		SpecPacker sp = new SpecPacker(pipelineDescription.getSpecification());
+
+		byte[] bytes = IOUtils.toByteArray(document.getBinaryStream());
+
+		SentenceDetector sentenceDetector = OpenNlpExtractorConfig.sentenceDetector(sp.getSModelPath());
+		Tokenizer tokenizer = OpenNlpExtractorConfig.tokenizer(sp.getTModelPath());
+		NameFinderME peopleFinder = OpenNlpExtractorConfig.peopleFinder(sp.getPModelPath());
+		NameFinderME locationFinder = OpenNlpExtractorConfig.locationFinder(sp.getLModelPath());
+		NameFinderME organizationFinder = OpenNlpExtractorConfig.organizationFinder(sp.getOModelPath());
+
+		// create a duplicate
+		RepositoryDocument docCopy = document.duplicate();
+		Map<String, List<String>> nerMap = new HashMap<>();
+
+		if (document.getBinaryLength() > 0) {
+			String textContent = new String(bytes, StandardCharsets.UTF_8);
+			List<String> peopleList = new ArrayList<>();
+			List<String> locationsList = new ArrayList<>();
+			List<String> organizationsList = new ArrayList<>();
+
+			String[] sentences = sentenceDetector.sentDetect(textContent);
+			for (String sentence : sentences) {
+				String[] tokens = tokenizer.tokenize(sentence);
+
+				Span[] spans = peopleFinder.find(tokens);
+				peopleList.addAll(Arrays.asList(Span.spansToStrings(spans, tokens)));
+
+				spans = locationFinder.find(tokens);
+				locationsList.addAll(Arrays.asList(Span.spansToStrings(spans, tokens)));
+
+				spans = organizationFinder.find(tokens);
+				organizationsList.addAll(Arrays.asList(Span.spansToStrings(spans, tokens)));
+
+			}
+
+			nerMap.put(PERSONS, peopleList);
+			nerMap.put(LOCATIONS, locationsList);
+			nerMap.put(ORGANIZATIONS, organizationsList);
+		}
+		// reset original stream
+		docCopy.setBinary(new ByteArrayInputStream(bytes), bytes.length);
+
+		// add named entity meta-data
+		if (!nerMap.isEmpty()) {
+			for (Entry<String, List<String>> entry : nerMap.entrySet()) {
+				List<String> neList = entry.getValue();
+				String[] neArray = neList.toArray(new String[neList.size()]);
+				docCopy.addField(entry.getKey(), neArray);
+			}
+		}
+
+		return activities.sendDocument(documentURI, docCopy);
+	}
+
+	// ////////////////////////
+	// UI Methods
+	// ////////////////////////
+
+	/**
+	 * Obtain the name of the form check javascript method to call.
+	 * 
+	 * @param connectionSequenceNumber
+	 *            is the unique number of this connection within the job.
+	 * @return the name of the form check javascript method.
+	 */
+	@Override
+	public String getFormCheckJavascriptMethodName(int connectionSequenceNumber) {
+		return "s" + connectionSequenceNumber + "_checkSpecification";
+	}
+
+	/**
+	 * Obtain the name of the form presave check javascript method to call.
+	 * 
+	 * @param connectionSequenceNumber
+	 *            is the unique number of this connection within the job.
+	 * @return the name of the form presave check javascript method.
+	 */
+	@Override
+	public String getFormPresaveCheckJavascriptMethodName(int connectionSequenceNumber) {
+		return "s" + connectionSequenceNumber + "_checkSpecificationForSave";
+	}
+
+	/**
+	 * Output the specification header section. This method is called in the
+	 * head section of a job page which has selected an output connection of the
+	 * current type. Its purpose is to add the required tabs to the list, and to
+	 * output any javascript methods that might be needed by the job editing
+	 * HTML.
+	 * 
+	 * @param out
+	 *            is the output to which any HTML should be sent.
+	 * @param locale
+	 *            is the preferred local of the output.
+	 * @param os
+	 *            is the current output specification for this job.
+	 * @param connectionSequenceNumber
+	 *            is the unique number of this connection within the job.
+	 * @param tabsArray
+	 *            is an array of tab names. Add to this array any tab names that
+	 *            are specific to the connector.
+	 */
+	@Override
+	public void outputSpecificationHeader(IHTTPOutput out, Locale locale, Specification os,
+			int connectionSequenceNumber, List<String> tabsArray) throws ManifoldCFException, IOException {
+		Map<String, Object> paramMap = new HashMap<String, Object>();
+		paramMap.put("SEQNUM", Integer.toString(connectionSequenceNumber));
+
+		tabsArray.add(Messages.getString(locale, "OpenNlpExtractor.FieldMappingTabName"));
+
+		Messages.outputResourceWithVelocity(out, locale, EDIT_SPECIFICATION_JS, paramMap);
+	}
+
+	/**
+	 * Output the specification body section. This method is called in the body
+	 * section of a job page which has selected an output connection of the
+	 * current type. Its purpose is to present the required form elements for
+	 * editing. The coder can presume that the HTML that is output from this
+	 * configuration will be within appropriate <html>, <body>, and <form> tags.
+	 * The name of the form is "editjob".
+	 * 
+	 * @param out
+	 *            is the output to which any HTML should be sent.
+	 * @param locale
+	 *            is the preferred local of the output.
+	 * @param os
+	 *            is the current output specification for this job.
+	 * @param connectionSequenceNumber
+	 *            is the unique number of this connection within the job.
+	 * @param actualSequenceNumber
+	 *            is the connection within the job that has currently been
+	 *            selected.
+	 * @param tabName
+	 *            is the current tab name.
+	 */
+	@Override
+	public void outputSpecificationBody(IHTTPOutput out, Locale locale, Specification os, int connectionSequenceNumber,
+			int actualSequenceNumber, String tabName) throws ManifoldCFException, IOException {
+		Map<String, Object> paramMap = new HashMap<String, Object>();
+
+		paramMap.put("TABNAME", tabName);
+		paramMap.put("SEQNUM", Integer.toString(connectionSequenceNumber));
+		paramMap.put("SELECTEDNUM", Integer.toString(actualSequenceNumber));
+
+		fillInFieldMappingSpecificationMap(paramMap, os);
+
+		Messages.outputResourceWithVelocity(out, locale, EDIT_SPECIFICATION_FIELDMAPPING_HTML, paramMap);
+	}
+
+	/**
+	 * Process a specification post. This method is called at the start of job's
+	 * edit or view page, whenever there is a possibility that form data for a
+	 * connection has been posted. Its purpose is to gather form information and
+	 * modify the output specification accordingly. The name of the posted form
+	 * is "editjob".
+	 * 
+	 * @param variableContext
+	 *            contains the post data, including binary file-upload
+	 *            information.
+	 * @param locale
+	 *            is the preferred local of the output.
+	 * @param os
+	 *            is the current output specification for this job.
+	 * @param connectionSequenceNumber
+	 *            is the unique number of this connection within the job.
+	 * @return null if all is well, or a string error message if there is an
+	 *         error that should prevent saving of the job (and cause a
+	 *         redirection to an error page).
+	 */
+	@Override
+	public String processSpecificationPost(IPostParameters variableContext, Locale locale, Specification os,
+			int connectionSequenceNumber) throws ManifoldCFException {
+		String seqPrefix = "s" + connectionSequenceNumber + "_";
+
+		SpecificationNode node = new SpecificationNode(OpenNlpExtractorConfig.NODE_SMODEL_PATH);
+		String smodelPath = variableContext.getParameter(seqPrefix + "smodelpath");
+		if (smodelPath != null) {
+			node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, smodelPath);
+		} else {
+			node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
+		}
+		os.addChild(os.getChildCount(), node);
+
+		node = new SpecificationNode(OpenNlpExtractorConfig.NODE_TMODEL_PATH);
+		String tmodelPath = variableContext.getParameter(seqPrefix + "tmodelpath");
+		if (tmodelPath != null) {
+			node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, tmodelPath);
+		} else {
+			node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
+		}
+		os.addChild(os.getChildCount(), node);
+
+		node = new SpecificationNode(OpenNlpExtractorConfig.NODE_PMODEL_PATH);
+		String pmodelPath = variableContext.getParameter(seqPrefix + "pmodelpath");
+		if (pmodelPath != null) {
+			node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, pmodelPath);
+		} else {
+			node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
+		}
+		os.addChild(os.getChildCount(), node);
+
+		node = new SpecificationNode(OpenNlpExtractorConfig.NODE_LMODEL_PATH);
+		String lmodelPath = variableContext.getParameter(seqPrefix + "lmodelpath");
+		if (lmodelPath != null) {
+			node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, lmodelPath);
+		} else {
+			node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
+		}
+		os.addChild(os.getChildCount(), node);
+
+		node = new SpecificationNode(OpenNlpExtractorConfig.NODE_OMODEL_PATH);
+		String omodelPath = variableContext.getParameter(seqPrefix + "omodelpath");
+		if (omodelPath != null) {
+			node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, omodelPath);
+		} else {
+			node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
+		}
+		os.addChild(os.getChildCount(), node);
+
+		return null;
+	}
+
+	/**
+	 * View specification. This method is called in the body section of a job's
+	 * view page. Its purpose is to present the output specification information
+	 * to the user. The coder can presume that the HTML that is output from this
+	 * configuration will be within appropriate <html> and <body> tags.
+	 * 
+	 * @param out
+	 *            is the output to which any HTML should be sent.
+	 * @param locale
+	 *            is the preferred local of the output.
+	 * @param connectionSequenceNumber
+	 *            is the unique number of this connection within the job.
+	 * @param os
+	 *            is the current output specification for this job.
+	 */
+	@Override
+	public void viewSpecification(IHTTPOutput out, Locale locale, Specification os, int connectionSequenceNumber)
+			throws ManifoldCFException, IOException {
+		Map<String, Object> paramMap = new HashMap<String, Object>();
+		paramMap.put("SEQNUM", Integer.toString(connectionSequenceNumber));
+
+		fillInFieldMappingSpecificationMap(paramMap, os);
+		Messages.outputResourceWithVelocity(out, locale, VIEW_SPECIFICATION_HTML, paramMap);
+	}
+
+	protected static void fillInFieldMappingSpecificationMap(Map<String, Object> paramMap, Specification os) {
+		String sModelPath = "";
+		String tModelPath = "";
+		String pModelPath = "";
+		String lModelPath = "";
+		String oModelPath = "";
+
+		for (int i = 0; i < os.getChildCount(); i++) {
+			SpecificationNode sn = os.getChild(i);
+			if (sn.getType().equals(OpenNlpExtractorConfig.NODE_SMODEL_PATH)) {
+				sModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+				if (sModelPath == null) {
+					sModelPath = "";
+				}
+			} else if (sn.getType().equals(OpenNlpExtractorConfig.NODE_TMODEL_PATH)) {
+				tModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+				if (tModelPath == null) {
+					tModelPath = "";
+				}
+			} else if (sn.getType().equals(OpenNlpExtractorConfig.NODE_PMODEL_PATH)) {
+				pModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+				if (pModelPath == null) {
+					pModelPath = "";
+				}
+			} else if (sn.getType().equals(OpenNlpExtractorConfig.NODE_LMODEL_PATH)) {
+				lModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+				if (lModelPath == null) {
+					lModelPath = "";
+				}
+			} else if (sn.getType().equals(OpenNlpExtractorConfig.NODE_OMODEL_PATH)) {
+				oModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+				if (oModelPath == null) {
+					oModelPath = "";
+				}
+			}
+
+		}
+		paramMap.put("SMODELPATH", sModelPath);
+		paramMap.put("TMODELPATH", tModelPath);
+		paramMap.put("PMODELPATH", pModelPath);
+		paramMap.put("LMODELPATH", lModelPath);
+		paramMap.put("OMODELPATH", oModelPath);
+	}
+
+	protected static class SpecPacker {
+
+		private final String sModelPath;
+		private final String tModelPath;
+		private final String pModelPath;
+		private final String lModelPath;
+		private final String oModelPath;
+
+		public SpecPacker(Specification os) {
+			String sModelPath = null;
+			String tModelPath = null;
+			String pModelPath = null;
+			String lModelPath = null;
+			String oModelPath = null;
+
+			for (int i = 0; i < os.getChildCount(); i++) {
+				SpecificationNode sn = os.getChild(i);
+
+				if (sn.getType().equals(OpenNlpExtractorConfig.NODE_SMODEL_PATH)) {
+					sModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+				}
+				if (sn.getType().equals(OpenNlpExtractorConfig.NODE_TMODEL_PATH)) {
+					tModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+				}
+				if (sn.getType().equals(OpenNlpExtractorConfig.NODE_PMODEL_PATH)) {
+					pModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+				}
+				if (sn.getType().equals(OpenNlpExtractorConfig.NODE_LMODEL_PATH)) {
+					lModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+				}
+				if (sn.getType().equals(OpenNlpExtractorConfig.NODE_OMODEL_PATH)) {
+					oModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+				}
+
+			}
+			this.sModelPath = sModelPath;
+			this.tModelPath = tModelPath;
+			this.pModelPath = pModelPath;
+			this.lModelPath = lModelPath;
+			this.oModelPath = oModelPath;
+		}
+
+		public String toPackedString() {
+			StringBuilder sb = new StringBuilder();
+
+			// extract nouns
+			if (sModelPath != null)
+				sb.append(sModelPath);
+			if (tModelPath != null)
+				sb.append(tModelPath);
+			if (pModelPath != null)
+				sb.append(pModelPath);
+			if (lModelPath != null)
+				sb.append(lModelPath);
+			if (oModelPath != null)
+				sb.append(oModelPath);
+
+			return sb.toString();
+		}
+
+		public String getSModelPath() {
+			return sModelPath;
+		}
+
+		public String getTModelPath() {
+			return tModelPath;
+		}
+
+		public String getPModelPath() {
+			return pModelPath;
+		}
+
+		public String getLModelPath() {
+			return lModelPath;
+		}
+
+		public String getOModelPath() {
+			return oModelPath;
+		}
+
+	}
+
+}

Added: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractorConfig.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractorConfig.java?rev=1726831&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractorConfig.java (added)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractorConfig.java Tue Jan 26 15:47:12 2016
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.manifoldcf.agents.transformation.opennlp;
+
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+
+import opennlp.tools.namefind.NameFinderME;
+import opennlp.tools.namefind.TokenNameFinderModel;
+import opennlp.tools.sentdetect.SentenceDetector;
+import opennlp.tools.sentdetect.SentenceDetectorME;
+import opennlp.tools.sentdetect.SentenceModel;
+import opennlp.tools.tokenize.Tokenizer;
+import opennlp.tools.tokenize.TokenizerME;
+import opennlp.tools.tokenize.TokenizerModel;
+import opennlp.tools.util.InvalidFormatException;
+
+public class OpenNlpExtractorConfig
+{
+	private static enum MODEL{
+		SENTENCE, TOKENIZER, PEOPLE, LOCATIONS, ORGANIZATIONS;
+	}
+	
+	// Specification nodes and values
+    public static final String NODE_SMODEL_PATH = "SModelPath";
+    public static final String NODE_TMODEL_PATH = "TModelPath";
+    public static final String NODE_PMODEL_PATH = "PModelPath";
+    public static final String NODE_LMODEL_PATH = "LModelPath";
+    public static final String NODE_OMODEL_PATH = "OModelPath";
+
+    public static final String ATTRIBUTE_VALUE = "value";
+    
+    private static SentenceModel sModel = null;
+    private static TokenizerModel tModel = null;
+    private static TokenNameFinderModel pModel = null;
+    private static TokenNameFinderModel lModel = null;
+    private static TokenNameFinderModel oModel = null;
+    
+    private static synchronized void initializeModel(MODEL m, String path) throws InvalidFormatException, FileNotFoundException, IOException{
+    	if(sModel == null && m == MODEL.SENTENCE)
+    		sModel = new SentenceModel(new FileInputStream(path));
+    	if(tModel == null && m == MODEL.TOKENIZER)
+    		tModel = new TokenizerModel(new FileInputStream(path));
+    	if(pModel == null && m == MODEL.PEOPLE)
+    		pModel = new TokenNameFinderModel(new FileInputStream(path));
+    	if(lModel == null && m == MODEL.LOCATIONS)
+    		lModel = new TokenNameFinderModel(new FileInputStream(path));
+    	if(oModel == null && m == MODEL.ORGANIZATIONS)
+    		oModel = new TokenNameFinderModel(new FileInputStream(path));
+    }
+    
+    public static final SentenceDetector sentenceDetector(String path) throws InvalidFormatException, FileNotFoundException, IOException{
+    	if(sModel == null)
+    		initializeModel(MODEL.SENTENCE, path);
+        return new SentenceDetectorME(sModel);
+    }
+    
+    public static final Tokenizer tokenizer(String path) throws InvalidFormatException, FileNotFoundException, IOException{
+    	if(tModel == null)
+    		initializeModel(MODEL.TOKENIZER, path);
+        return new TokenizerME(tModel);
+    }
+    
+    public static final NameFinderME peopleFinder(String path) throws InvalidFormatException, FileNotFoundException, IOException{
+    	if(pModel == null)
+    		initializeModel(MODEL.PEOPLE, path);
+        return new NameFinderME(pModel);
+    }
+    
+    public static final NameFinderME locationFinder(String path) throws InvalidFormatException, FileNotFoundException, IOException{
+    	if(lModel == null)
+    		initializeModel(MODEL.LOCATIONS, path);
+        return new NameFinderME(lModel);
+    }
+    
+    public static final NameFinderME organizationFinder(String path) throws InvalidFormatException, FileNotFoundException, IOException{
+    	if(oModel == null)
+    		initializeModel(MODEL.ORGANIZATIONS, path);
+        return new NameFinderME(oModel);
+    }
+
+
+    
+
+}

Added: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_en_US.properties
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_en_US.properties?rev=1726831&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_en_US.properties (added)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_en_US.properties Tue Jan 26 15:47:12 2016
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+OpenNlpExtractor.FieldMappingTabName=OpenNlp Extraction
+OpenNlpExtractor.SentenceDetectorModelPath=Sentence Detector Model Path:
+OpenNlpExtractor.TokenizerModelPath=Tokenizer Model Path:
+OpenNlpExtractor.PeopleModelPath=People Model Path:
+OpenNlpExtractor.LocationsModelPath=Locations Model Path:
+OpenNlpExtractor.OraganizationsModelPath=Organizations Model Path:
\ No newline at end of file

Added: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_ja_JP.properties
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_ja_JP.properties?rev=1726831&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_ja_JP.properties (added)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_ja_JP.properties Tue Jan 26 15:47:12 2016
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+OpenNlpExtractor.FieldMappingTabName=OpenNlp Extraction
+OpenNlpExtractor.SentenceDetectorModelPath=Sentence Detector Model Path:
+OpenNlpExtractor.TokenizerModelPath=Tokenizer Model Path:
+OpenNlpExtractor.PeopleModelPath=People Model Path:
+OpenNlpExtractor.LocationsModelPath=Locations Model Path:
+OpenNlpExtractor.OraganizationsModelPath=Organizations Model Path:
\ No newline at end of file

Added: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_zh_CN.properties
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_zh_CN.properties?rev=1726831&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_zh_CN.properties (added)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_zh_CN.properties Tue Jan 26 15:47:12 2016
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+OpenNlpExtractor.FieldMappingTabName=OpenNlp Extraction
+OpenNlpExtractor.SentenceDetectorModelPath=Sentence Detector Model Path:
+OpenNlpExtractor.TokenizerModelPath=Tokenizer Model Path:
+OpenNlpExtractor.PeopleModelPath=People Model Path:
+OpenNlpExtractor.LocationsModelPath=Locations Model Path:
+OpenNlpExtractor.OraganizationsModelPath=Organizations Model Path:
\ No newline at end of file

Added: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/editSpecification.js
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/editSpecification.js?rev=1726831&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/editSpecification.js (added)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/editSpecification.js Tue Jan 26 15:47:12 2016
@@ -0,0 +1,25 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<script type="text/javascript">
+<!--
+function s${SEQNUM}_checkSpecification()
+{
+  return true;
+}
+//-->
+</script>

Added: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/editSpecification_FieldMapping.html
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/editSpecification_FieldMapping.html?rev=1726831&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/editSpecification_FieldMapping.html (added)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/editSpecification_FieldMapping.html Tue Jan 26 15:47:12 2016
@@ -0,0 +1,72 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+#if($TABNAME ==
+$ResourceBundle.getString('OpenNlpExtractor.FieldMappingTabName') &&
+${SEQNUM} == ${SELECTEDNUM})
+
+<table class="displaytable">
+	<tr>
+		<td class="separator" colspan="2"><hr /></td>
+	</tr>
+	<tr>
+		<td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenNlpExtractor.SentenceDetectorModelPath'))</nobr></td>
+		<td class="value"><input type="text" name="s${SEQNUM}_smodelpath"
+			size="128" value="$Encoder.attributeEscape($SMODELPATH)" /></td>
+	</tr>
+	<tr>
+		<td class="separator" colspan="2"><hr /></td>
+	</tr>
+	<tr>
+		<td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenNlpExtractor.TokenizerModelPath'))</nobr></td>
+		<td class="value"><input type="text" name="s${SEQNUM}_tmodelpath"
+			size="128" value="$Encoder.attributeEscape($TMODELPATH)" /></td>
+	</tr>
+	<tr>
+		<td class="separator" colspan="2"><hr /></td>
+	</tr>
+	<tr>
+		<td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenNlpExtractor.PeopleModelPath'))</nobr></td>
+		<td class="value"><input type="text" name="s${SEQNUM}_pmodelpath"
+			size="128" value="$Encoder.attributeEscape($PMODELPATH)" /></td>
+	</tr>
+	<tr>
+		<td class="separator" colspan="2"><hr /></td>
+	</tr>
+	<tr>
+		<td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenNlpExtractor.LocationsModelPath'))</nobr></td>
+		<td class="value"><input type="text" name="s${SEQNUM}_lmodelpath"
+			size="128" value="$Encoder.attributeEscape($LMODELPATH)" /></td>
+	</tr>
+	<tr>
+		<td class="separator" colspan="2"><hr /></td>
+	</tr>
+	<tr>
+		<td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenNlpExtractor.OraganizationsModelPath'))</nobr></td>
+		<td class="value"><input type="text" name="s${SEQNUM}_omodelpath"
+			size="128" value="$Encoder.attributeEscape($OMODELPATH)" /></td>
+	</tr>
+
+</table>
+
+#else
+<input type="hidden" name="s${SEQNUM}_smodelpath" value="$Encoder.bodyEscape($SMODELPATH)"/>
+<input type="hidden" name="s${SEQNUM}_tmodelpath" value="$Encoder.bodyEscape($TMODELPATH)"/>
+<input type="hidden" name="s${SEQNUM}_pmodelpath" value="$Encoder.bodyEscape($PMODELPATH)"/>
+<input type="hidden" name="s${SEQNUM}_lmodelpath" value="$Encoder.bodyEscape($LMODELPATH)"/>
+<input type="hidden" name="s${SEQNUM}_omodelpath" value="$Encoder.bodyEscape($OMODELPATH)"/>
+#end

Added: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/viewSpecification.html
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/viewSpecification.html?rev=1726831&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/viewSpecification.html (added)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/viewSpecification.html Tue Jan 26 15:47:12 2016
@@ -0,0 +1,58 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<table class="displaytable">
+	<tr>
+		<td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenNlpExtractor.SentenceDetectorModelPath'))</nobr></td>
+		<td class="value"><nobr>$Encoder.bodyEscape($SMODELPATH)</nobr></td>
+	</tr>
+
+	<tr>
+		<td class="separator" colspan="2"><hr /></td>
+	</tr>
+
+	<tr>
+		<td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenNlpExtractor.TokenizerModelPath'))</nobr></td>
+		<td class="value"><nobr>$Encoder.bodyEscape($TMODELPATH)</nobr></td>
+	</tr>
+
+	<tr>
+		<td class="separator" colspan="2"><hr /></td>
+	</tr>
+	<tr>
+		<td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenNlpExtractor.PeopleModelPath'))</nobr></td>
+		<td class="value"><nobr>$Encoder.bodyEscape($PMODELPATH)</nobr></td>
+	</tr>
+	
+	<tr>
+		<td class="separator" colspan="2"><hr /></td>
+	</tr>
+	<tr>
+		<td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenNlpExtractor.LocationsModelPath'))</nobr></td>
+		<td class="value"><nobr>$Encoder.bodyEscape($LMODELPATH)</nobr></td>
+	</tr>
+	
+	<tr>
+		<td class="separator" colspan="2"><hr /></td>
+	</tr>
+	<tr>
+		<td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenNlpExtractor.OraganizationsModelPath'))</nobr></td>
+		<td class="value"><nobr>$Encoder.bodyEscape($OMODELPATH)</nobr></td>
+	</tr>
+
+
+</table>

Added: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/download-models.sh
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/download-models.sh?rev=1726831&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/download-models.sh (added)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/download-models.sh Tue Jan 26 15:47:12 2016
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+MODELS_DIR=nlpmodels
+
+if [ ! -d "$MODELS_DIR" ]; then
+  echo “$MODELS_DIR does not exist…”
+  echo “creating $MODELS_DIR …”
+  mkdir -p ${MODELS_DIR}
+fi
+
+echo “downloading models…”
+wget -O ${MODELS_DIR}/en-sent.bin http://opennlp.sourceforge.net/models-1.5/en-sent.bin
+wget -O ${MODELS_DIR}/en-token.bin http://opennlp.sourceforge.net/models-1.5/en-token.bin
+wget -O ${MODELS_DIR}/en-ner-person.bin http://opennlp.sourceforge.net/models-1.5/en-ner-person.bin
+wget -O ${MODELS_DIR}/en-ner-location.bin http://opennlp.sourceforge.net/models-1.5/en-ner-location.bin
+wget -O ${MODELS_DIR}/en-ner-organization.bin http://opennlp.sourceforge.net/models-1.5/en-ner-organization.bin
+echo “downloading finished…”
\ No newline at end of file

Added: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/pom.xml
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/pom.xml?rev=1726831&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/pom.xml (added)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/pom.xml Tue Jan 26 15:47:12 2016
@@ -0,0 +1,279 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+	<parent>
+		<groupId>org.apache.manifoldcf</groupId>
+		<artifactId>mcf-connectors</artifactId>
+		<version>2.4-SNAPSHOT</version>
+	</parent>
+	<modelVersion>4.0.0</modelVersion>
+
+	<properties>
+		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+		<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
+	</properties>
+
+	<artifactId>mcf-opennlp-connector</artifactId>
+	<name>ManifoldCF - Connectors - OpenNlp Extractor</name>
+
+	<build>
+		<defaultGoal>integration-test</defaultGoal>
+		<sourceDirectory>${basedir}/connector/src/main/java</sourceDirectory>
+		<testSourceDirectory>${basedir}/connector/src/test/java</testSourceDirectory>
+		<resources>
+			<resource>
+				<directory>${basedir}/connector/src/main/native2ascii</directory>
+				<includes>
+					<include>**/*.properties</include>
+				</includes>
+			</resource>
+			<resource>
+				<directory>${basedir}/connector/src/main/resources</directory>
+				<includes>
+					<include>**/*.html</include>
+					<include>**/*.js</include>
+				</includes>
+			</resource>
+		</resources>
+		<testResources>
+			<testResource>
+				<directory>${basedir}/connector/src/test/resources</directory>
+			</testResource>
+		</testResources>
+
+		<plugins>
+
+			<plugin>
+				<groupId>org.codehaus.mojo</groupId>
+				<artifactId>native2ascii-maven-plugin</artifactId>
+				<version>1.0-beta-1</version>
+				<configuration>
+					<workDir>target/classes</workDir>
+				</configuration>
+				<executions>
+					<execution>
+						<id>native2ascii-utf8</id>
+						<goals>
+							<goal>native2ascii</goal>
+						</goals>
+						<configuration>
+							<encoding>UTF8</encoding>
+							<includes>
+								<include>**/*.properties</include>
+							</includes>
+						</configuration>
+					</execution>
+				</executions>
+			</plugin>
+
+			<plugin>
+				<artifactId>maven-assembly-plugin</artifactId>
+				<configuration>
+					<descriptorRefs>
+						<descriptorRef>jar-with-dependencies</descriptorRef>
+					</descriptorRefs>
+				</configuration>
+				<executions>
+					<execution>
+						<id>make-assembly</id> <!-- this is used for inheritance merges -->
+						<phase>package</phase> <!-- bind to the packaging phase -->
+						<goals>
+							<goal>single</goal>
+						</goals>
+					</execution>
+				</executions>
+			</plugin>
+
+
+			<!-- Test plugin configuration -->
+			<plugin>
+				<artifactId>maven-dependency-plugin</artifactId>
+				<executions>
+					<execution>
+						<id>copy-war</id>
+						<phase>generate-resources</phase>
+						<goals>
+							<goal>copy</goal>
+						</goals>
+						<configuration>
+							<outputDirectory>target/dependency</outputDirectory>
+							<artifactItems>
+								<artifactItem>
+									<groupId>${project.groupId}</groupId>
+									<artifactId>mcf-api-service</artifactId>
+									<version>${project.version}</version>
+									<type>war</type>
+									<overWrite>false</overWrite>
+									<destFileName>mcf-api-service.war</destFileName>
+								</artifactItem>
+								<artifactItem>
+									<groupId>${project.groupId}</groupId>
+									<artifactId>mcf-authority-service</artifactId>
+									<version>${project.version}</version>
+									<type>war</type>
+									<overWrite>false</overWrite>
+									<destFileName>mcf-authority-service.war</destFileName>
+								</artifactItem>
+								<artifactItem>
+									<groupId>${project.groupId}</groupId>
+									<artifactId>mcf-crawler-ui</artifactId>
+									<version>${project.version}</version>
+									<type>war</type>
+									<overWrite>false</overWrite>
+									<destFileName>mcf-crawler-ui.war</destFileName>
+								</artifactItem>
+							</artifactItems>
+						</configuration>
+					</execution>
+				</executions>
+			</plugin>
+
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-surefire-plugin</artifactId>
+				<configuration>
+					<excludes>
+						<exclude>**/*Postgresql*.java</exclude>
+						<exclude>**/*MySQL*.java</exclude>
+					</excludes>
+					<forkMode>always</forkMode>
+					<workingDirectory>target/test-output</workingDirectory>
+				</configuration>
+			</plugin>
+
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-failsafe-plugin</artifactId>
+				<version>2.12.3</version>
+				<configuration>
+					<skipTests>${skipITs}</skipTests>
+					<systemPropertyVariables>
+						<crawlerWarPath>../dependency/mcf-crawler-ui.war</crawlerWarPath>
+						<authorityserviceWarPath>../dependency/mcf-authority-service.war</authorityserviceWarPath>
+						<apiWarPath>../dependency/mcf-api-service.war</apiWarPath>
+					</systemPropertyVariables>
+					<excludes>
+						<exclude>**/*Postgresql*.java</exclude>
+						<exclude>**/*MySQL*.java</exclude>
+					</excludes>
+					<forkMode>always</forkMode>
+					<workingDirectory>target/test-output</workingDirectory>
+				</configuration>
+				<executions>
+					<execution>
+						<id>integration-test</id>
+						<goals>
+							<goal>integration-test</goal>
+						</goals>
+					</execution>
+					<execution>
+						<id>verify</id>
+						<goals>
+							<goal>verify</goal>
+						</goals>
+					</execution>
+				</executions>
+			</plugin>
+
+		</plugins>
+
+		<pluginManagement>
+			<plugins>
+				<!--This plugin's configuration is used to store Eclipse m2e settings 
+					only. It has no influence on the Maven build itself. -->
+				<plugin>
+					<groupId>org.eclipse.m2e</groupId>
+					<artifactId>lifecycle-mapping</artifactId>
+					<version>1.0.0</version>
+					<configuration>
+						<lifecycleMappingMetadata>
+							<pluginExecutions>
+								<pluginExecution>
+									<pluginExecutionFilter>
+										<groupId>
+											org.apache.maven.plugins
+										</groupId>
+										<artifactId>
+											maven-dependency-plugin
+										</artifactId>
+										<versionRange>[2.1,)</versionRange>
+										<goals>
+											<goal>copy</goal>
+										</goals>
+									</pluginExecutionFilter>
+									<action>
+										<ignore></ignore>
+									</action>
+								</pluginExecution>
+								<pluginExecution>
+									<pluginExecutionFilter>
+										<groupId>org.codehaus.mojo</groupId>
+										<artifactId>
+											native2ascii-maven-plugin
+										</artifactId>
+										<versionRange>
+											[1.0-beta-1,)
+										</versionRange>
+										<goals>
+											<goal>native2ascii</goal>
+										</goals>
+									</pluginExecutionFilter>
+									<action>
+										<ignore></ignore>
+									</action>
+								</pluginExecution>
+								<pluginExecution>
+									<pluginExecutionFilter>
+										<groupId>
+											org.apache.maven.plugins
+										</groupId>
+										<artifactId>
+											maven-remote-resources-plugin
+										</artifactId>
+										<versionRange>[1.1,)</versionRange>
+										<goals>
+											<goal>process</goal>
+										</goals>
+									</pluginExecutionFilter>
+									<action>
+										<ignore></ignore>
+									</action>
+								</pluginExecution>
+							</pluginExecutions>
+						</lifecycleMappingMetadata>
+					</configuration>
+				</plugin>
+			</plugins>
+		</pluginManagement>
+	</build>
+
+	<dependencies>
+		<dependency>
+			<groupId>${project.groupId}</groupId>
+			<artifactId>mcf-core</artifactId>
+			<version>${project.version}</version>
+		</dependency>
+		<dependency>
+			<groupId>${project.groupId}</groupId>
+			<artifactId>mcf-connector-common</artifactId>
+			<version>${project.version}</version>
+		</dependency>
+		<dependency>
+			<groupId>${project.groupId}</groupId>
+			<artifactId>mcf-agents</artifactId>
+			<version>${project.version}</version>
+		</dependency>
+		<dependency>
+			<groupId>${project.groupId}</groupId>
+			<artifactId>mcf-ui-core</artifactId>
+			<version>${project.version}</version>
+		</dependency>
+
+		<dependency>
+			<groupId>org.apache.opennlp</groupId>
+			<artifactId>opennlp-tools</artifactId>
+			<version>1.6.0</version>
+		</dependency>
+		
+	</dependencies>
+
+</project>
\ No newline at end of file

Modified: manifoldcf/branches/CONNECTORS-1270/connectors/pom.xml
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/pom.xml?rev=1726831&r1=1726830&r2=1726831&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/pom.xml (original)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/pom.xml Tue Jan 26 15:47:12 2016
@@ -68,6 +68,7 @@
     <module>confluence</module>
     <module>amazons3</module>
     <module>kafka</module>
+    <module>opennlp</module>
   </modules>
 
 </project>