You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by rh...@apache.org on 2016/01/26 16:47:13 UTC
svn commit: r1726831 - in /manifoldcf/branches/CONNECTORS-1270/connectors:
./ opennlp/ opennlp/connector/ opennlp/connector/src/
opennlp/connector/src/main/ opennlp/connector/src/main/java/
opennlp/connector/src/main/java/org/ opennlp/connector/src/mai...
Author: rharo
Date: Tue Jan 26 15:47:12 2016
New Revision: 1726831
URL: http://svn.apache.org/viewvc?rev=1726831&view=rev
Log:
OpenNLP Transformation Connector Initial Import
Added:
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/README.md
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/Messages.java
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractor.java
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractorConfig.java
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_en_US.properties
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_ja_JP.properties
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_zh_CN.properties
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/editSpecification.js
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/editSpecification_FieldMapping.html
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/viewSpecification.html
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/download-models.sh
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/pom.xml
Modified:
manifoldcf/branches/CONNECTORS-1270/connectors/pom.xml
Added: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/README.md
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/README.md?rev=1726831&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/README.md (added)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/README.md Tue Jan 26 15:47:12 2016
@@ -0,0 +1,42 @@
+# OpenNLP Transformation Connector for Apache ManifoldCF
+
+OpenNLP connector extracts named entities(People, Locations and Organizations) from document content attaches metadata (ner_people, ner_locations and ner_organizations) to repository document.
+
+
+## Building the Connector
+---
+
+```
+git clone https://github.com/apache/manifoldcf.git
+cd manifoldcf/
+git checkout release-2.2-branch
+mvn clean install
+
+git clone https://github.com/ChalithaUdara/OpenNLP-Manifold-Connector.git
+cd OpenNLP-Manifold-Connector
+mvn clean install -DskipTests=true
+```
+
+## Configure Connector with ManifoldCF
+---
+
+Copy mcf-opennlp-connector-2.2-jar-with-dependencies.jar to **$MANIFOLD_DIR/connectors-lib**
+To configure connector with manifoldcf add following to **$MANIFOLD_DIR/connectors.xml** file.
+
+```
+<transformationconnector name="OpenNLP Extractor" class="org.apache.manifoldcf.agents.transformation.opennlp.OpenNlpExtractor" />
+```
+---
+
+In order to extract named entities with OpenNLP, you first need to download the required OpenNLP models. Run **download-models** script to download models.
+
+```
+sh download-models.sh
+```
+
+This will download models to nlpmodels directory.
+
+In manifoldcf job configuration, you need to configure paths to corresponding models.
+
+
+
Added: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/Messages.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/Messages.java?rev=1726831&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/Messages.java (added)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/Messages.java Tue Jan 26 15:47:12 2016
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.manifoldcf.agents.transformation.opennlp;
+
+import java.util.Locale;
+import java.util.Map;
+
+import org.apache.manifoldcf.core.interfaces.IHTTPOutput;
+import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
+
+public class Messages extends org.apache.manifoldcf.ui.i18n.Messages
+{
+ public static final String DEFAULT_BUNDLE_NAME="org.apache.manifoldcf.agents.transformation.opennlp.common";
+ public static final String DEFAULT_PATH_NAME="org.apache.manifoldcf.agents.transformation.opennlp";
+
+ /** Constructor - do no instantiate
+ */
+ protected Messages()
+ {
+ }
+
+ public static String getString(Locale locale, String messageKey)
+ {
+ return getString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+ }
+
+ public static String getAttributeString(Locale locale, String messageKey)
+ {
+ return getAttributeString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+ }
+
+ public static String getBodyString(Locale locale, String messageKey)
+ {
+ return getBodyString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+ }
+
+ public static String getAttributeJavascriptString(Locale locale, String messageKey)
+ {
+ return getAttributeJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+ }
+
+ public static String getBodyJavascriptString(Locale locale, String messageKey)
+ {
+ return getBodyJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+ }
+
+ public static String getString(Locale locale, String messageKey, Object[] args)
+ {
+ return getString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+ }
+
+ public static String getAttributeString(Locale locale, String messageKey, Object[] args)
+ {
+ return getAttributeString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+ }
+
+ public static String getBodyString(Locale locale, String messageKey, Object[] args)
+ {
+ return getBodyString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+ }
+
+ public static String getAttributeJavascriptString(Locale locale, String messageKey, Object[] args)
+ {
+ return getAttributeJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+ }
+
+ public static String getBodyJavascriptString(Locale locale, String messageKey, Object[] args)
+ {
+ return getBodyJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+ }
+
+ // More general methods which allow bundlenames and class loaders to be specified.
+
+ public static String getString(String bundleName, Locale locale, String messageKey, Object[] args)
+ {
+ return getString(Messages.class, bundleName, locale, messageKey, args);
+ }
+
+ public static String getAttributeString(String bundleName, Locale locale, String messageKey, Object[] args)
+ {
+ return getAttributeString(Messages.class, bundleName, locale, messageKey, args);
+ }
+
+ public static String getBodyString(String bundleName, Locale locale, String messageKey, Object[] args)
+ {
+ return getBodyString(Messages.class, bundleName, locale, messageKey, args);
+ }
+
+ public static String getAttributeJavascriptString(String bundleName, Locale locale, String messageKey, Object[] args)
+ {
+ return getAttributeJavascriptString(Messages.class, bundleName, locale, messageKey, args);
+ }
+
+ public static String getBodyJavascriptString(String bundleName, Locale locale, String messageKey, Object[] args)
+ {
+ return getBodyJavascriptString(Messages.class, bundleName, locale, messageKey, args);
+ }
+
+ // Resource output
+
+ public static void outputResource(IHTTPOutput output, Locale locale, String resourceKey,
+ Map<String,String> substitutionParameters, boolean mapToUpperCase)
+ throws ManifoldCFException
+ {
+ outputResource(output,Messages.class,DEFAULT_PATH_NAME,locale,resourceKey,
+ substitutionParameters,mapToUpperCase);
+ }
+
+ public static void outputResourceWithVelocity(IHTTPOutput output, Locale locale, String resourceKey,
+ Map<String,String> substitutionParameters, boolean mapToUpperCase)
+ throws ManifoldCFException
+ {
+ outputResourceWithVelocity(output,Messages.class,DEFAULT_BUNDLE_NAME,DEFAULT_PATH_NAME,locale,resourceKey,
+ substitutionParameters,mapToUpperCase);
+ }
+
+ public static void outputResourceWithVelocity(IHTTPOutput output, Locale locale, String resourceKey,
+ Map<String,Object> contextObjects)
+ throws ManifoldCFException
+ {
+ outputResourceWithVelocity(output,Messages.class,DEFAULT_BUNDLE_NAME,DEFAULT_PATH_NAME,locale,resourceKey,
+ contextObjects);
+ }
+
+}
Added: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractor.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractor.java?rev=1726831&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractor.java (added)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractor.java Tue Jan 26 15:47:12 2016
@@ -0,0 +1,513 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.manifoldcf.agents.transformation.opennlp;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import opennlp.tools.namefind.NameFinderME;
+import opennlp.tools.sentdetect.SentenceDetector;
+import opennlp.tools.tokenize.Tokenizer;
+import opennlp.tools.util.Span;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.manifoldcf.agents.interfaces.IOutputAddActivity;
+import org.apache.manifoldcf.agents.interfaces.RepositoryDocument;
+import org.apache.manifoldcf.agents.interfaces.ServiceInterruption;
+import org.apache.manifoldcf.agents.system.Logging;
+import org.apache.manifoldcf.agents.transformation.BaseTransformationConnector;
+import org.apache.manifoldcf.core.interfaces.IHTTPOutput;
+import org.apache.manifoldcf.core.interfaces.IPostParameters;
+import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
+import org.apache.manifoldcf.core.interfaces.Specification;
+import org.apache.manifoldcf.core.interfaces.SpecificationNode;
+import org.apache.manifoldcf.core.interfaces.VersionContext;
+
+public class OpenNlpExtractor extends BaseTransformationConnector {
+ private static final String EDIT_SPECIFICATION_JS = "editSpecification.js";
+ private static final String EDIT_SPECIFICATION_FIELDMAPPING_HTML = "editSpecification_FieldMapping.html";
+ private static final String VIEW_SPECIFICATION_HTML = "viewSpecification.html";
+
+ // Meta-data fields added by this connector
+ private static final String PERSONS = "ner_people";
+ private static final String LOCATIONS = "ner_locations";
+ private static final String ORGANIZATIONS = "ner_organizations";
+
+ protected static final String ACTIVITY_EXTRACT = "extract";
+
+ protected static final String[] activitiesList = new String[] { ACTIVITY_EXTRACT };
+
+ /**
+ * Return a list of activities that this connector generates. The connector
+ * does NOT need to be connected before this method is called.
+ *
+ * @return the set of activities.
+ */
+ @Override
+ public String[] getActivitiesList() {
+ return activitiesList;
+ }
+
+ /**
+ * Get a pipeline version string, given a pipeline specification object. The
+ * version string is used to uniquely describe the pertinent details of the
+ * specification and the configuration, to allow the Connector Framework to
+ * determine whether a document will need to be processed again. Note that
+ * the contents of any document cannot be considered by this method; only
+ * configuration and specification information can be considered.
+ *
+ * This method presumes that the underlying connector object has been
+ * configured.
+ *
+ * @param spec
+ * is the current pipeline specification object for this
+ * connection for the job that is doing the crawling.
+ * @return a string, of unlimited length, which uniquely describes
+ * configuration and specification in such a way that if two such
+ * strings are equal, nothing that affects how or whether the
+ * document is indexed will be different.
+ */
+ @Override
+ public VersionContext getPipelineDescription(Specification os) throws ManifoldCFException, ServiceInterruption {
+ SpecPacker sp = new SpecPacker(os);
+ return new VersionContext(sp.toPackedString(), params, os);
+ }
+
+ /**
+ * Add (or replace) a document in the output data store using the connector.
+ * This method presumes that the connector object has been configured, and
+ * it is thus able to communicate with the output data store should that be
+ * necessary. The OutputSpecification is *not* provided to this method,
+ * because the goal is consistency, and if output is done it must be
+ * consistent with the output description, since that was what was partly
+ * used to determine if output should be taking place. So it may be
+ * necessary for this method to decode an output description string in order
+ * to determine what should be done.
+ *
+ * @param documentURI
+ * is the URI of the document. The URI is presumed to be the
+ * unique identifier which the output data store will use to
+ * process and serve the document. This URI is constructed by the
+ * repository connector which fetches the document, and is thus
+ * universal across all output connectors.
+ * @param outputDescription
+ * is the description string that was constructed for this
+ * document by the getOutputDescription() method.
+ * @param document
+ * is the document data to be processed (handed to the output
+ * data store).
+ * @param authorityNameString
+ * is the name of the authority responsible for authorizing any
+ * access tokens passed in with the repository document. May be
+ * null.
+ * @param activities
+ * is the handle to an object that the implementer of a pipeline
+ * connector may use to perform operations, such as logging
+ * processing activity, or sending a modified document to the
+ * next stage in the pipeline.
+ * @return the document status (accepted or permanently rejected).
+ * @throws IOException
+ * only if there's a stream error reading the document data.
+ */
+ @Override
+ public int addOrReplaceDocumentWithException(String documentURI, VersionContext pipelineDescription,
+ RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
+ throws ManifoldCFException, ServiceInterruption, IOException {
+ // assumes use of Tika extractor before using this connector
+ Logging.agents.debug("Starting OpenNlp extraction");
+
+ SpecPacker sp = new SpecPacker(pipelineDescription.getSpecification());
+
+ byte[] bytes = IOUtils.toByteArray(document.getBinaryStream());
+
+ SentenceDetector sentenceDetector = OpenNlpExtractorConfig.sentenceDetector(sp.getSModelPath());
+ Tokenizer tokenizer = OpenNlpExtractorConfig.tokenizer(sp.getTModelPath());
+ NameFinderME peopleFinder = OpenNlpExtractorConfig.peopleFinder(sp.getPModelPath());
+ NameFinderME locationFinder = OpenNlpExtractorConfig.locationFinder(sp.getLModelPath());
+ NameFinderME organizationFinder = OpenNlpExtractorConfig.organizationFinder(sp.getOModelPath());
+
+ // create a duplicate
+ RepositoryDocument docCopy = document.duplicate();
+ Map<String, List<String>> nerMap = new HashMap<>();
+
+ if (document.getBinaryLength() > 0) {
+ String textContent = new String(bytes, StandardCharsets.UTF_8);
+ List<String> peopleList = new ArrayList<>();
+ List<String> locationsList = new ArrayList<>();
+ List<String> organizationsList = new ArrayList<>();
+
+ String[] sentences = sentenceDetector.sentDetect(textContent);
+ for (String sentence : sentences) {
+ String[] tokens = tokenizer.tokenize(sentence);
+
+ Span[] spans = peopleFinder.find(tokens);
+ peopleList.addAll(Arrays.asList(Span.spansToStrings(spans, tokens)));
+
+ spans = locationFinder.find(tokens);
+ locationsList.addAll(Arrays.asList(Span.spansToStrings(spans, tokens)));
+
+ spans = organizationFinder.find(tokens);
+ organizationsList.addAll(Arrays.asList(Span.spansToStrings(spans, tokens)));
+
+ }
+
+ nerMap.put(PERSONS, peopleList);
+ nerMap.put(LOCATIONS, locationsList);
+ nerMap.put(ORGANIZATIONS, organizationsList);
+ }
+ // reset original stream
+ docCopy.setBinary(new ByteArrayInputStream(bytes), bytes.length);
+
+ // add named entity meta-data
+ if (!nerMap.isEmpty()) {
+ for (Entry<String, List<String>> entry : nerMap.entrySet()) {
+ List<String> neList = entry.getValue();
+ String[] neArray = neList.toArray(new String[neList.size()]);
+ docCopy.addField(entry.getKey(), neArray);
+ }
+ }
+
+ return activities.sendDocument(documentURI, docCopy);
+ }
+
+ // ////////////////////////
+ // UI Methods
+ // ////////////////////////
+
+ /**
+ * Obtain the name of the form check javascript method to call.
+ *
+ * @param connectionSequenceNumber
+ * is the unique number of this connection within the job.
+ * @return the name of the form check javascript method.
+ */
+ @Override
+ public String getFormCheckJavascriptMethodName(int connectionSequenceNumber) {
+ return "s" + connectionSequenceNumber + "_checkSpecification";
+ }
+
+ /**
+ * Obtain the name of the form presave check javascript method to call.
+ *
+ * @param connectionSequenceNumber
+ * is the unique number of this connection within the job.
+ * @return the name of the form presave check javascript method.
+ */
+ @Override
+ public String getFormPresaveCheckJavascriptMethodName(int connectionSequenceNumber) {
+ return "s" + connectionSequenceNumber + "_checkSpecificationForSave";
+ }
+
+ /**
+ * Output the specification header section. This method is called in the
+ * head section of a job page which has selected an output connection of the
+ * current type. Its purpose is to add the required tabs to the list, and to
+ * output any javascript methods that might be needed by the job editing
+ * HTML.
+ *
+ * @param out
+ * is the output to which any HTML should be sent.
+ * @param locale
+ * is the preferred local of the output.
+ * @param os
+ * is the current output specification for this job.
+ * @param connectionSequenceNumber
+ * is the unique number of this connection within the job.
+ * @param tabsArray
+ * is an array of tab names. Add to this array any tab names that
+ * are specific to the connector.
+ */
+ @Override
+ public void outputSpecificationHeader(IHTTPOutput out, Locale locale, Specification os,
+ int connectionSequenceNumber, List<String> tabsArray) throws ManifoldCFException, IOException {
+ Map<String, Object> paramMap = new HashMap<String, Object>();
+ paramMap.put("SEQNUM", Integer.toString(connectionSequenceNumber));
+
+ tabsArray.add(Messages.getString(locale, "OpenNlpExtractor.FieldMappingTabName"));
+
+ Messages.outputResourceWithVelocity(out, locale, EDIT_SPECIFICATION_JS, paramMap);
+ }
+
+ /**
+ * Output the specification body section. This method is called in the body
+ * section of a job page which has selected an output connection of the
+ * current type. Its purpose is to present the required form elements for
+ * editing. The coder can presume that the HTML that is output from this
+ * configuration will be within appropriate <html>, <body>, and <form> tags.
+ * The name of the form is "editjob".
+ *
+ * @param out
+ * is the output to which any HTML should be sent.
+ * @param locale
+ * is the preferred local of the output.
+ * @param os
+ * is the current output specification for this job.
+ * @param connectionSequenceNumber
+ * is the unique number of this connection within the job.
+ * @param actualSequenceNumber
+ * is the connection within the job that has currently been
+ * selected.
+ * @param tabName
+ * is the current tab name.
+ */
+ @Override
+ public void outputSpecificationBody(IHTTPOutput out, Locale locale, Specification os, int connectionSequenceNumber,
+ int actualSequenceNumber, String tabName) throws ManifoldCFException, IOException {
+ Map<String, Object> paramMap = new HashMap<String, Object>();
+
+ paramMap.put("TABNAME", tabName);
+ paramMap.put("SEQNUM", Integer.toString(connectionSequenceNumber));
+ paramMap.put("SELECTEDNUM", Integer.toString(actualSequenceNumber));
+
+ fillInFieldMappingSpecificationMap(paramMap, os);
+
+ Messages.outputResourceWithVelocity(out, locale, EDIT_SPECIFICATION_FIELDMAPPING_HTML, paramMap);
+ }
+
+ /**
+ * Process a specification post. This method is called at the start of job's
+ * edit or view page, whenever there is a possibility that form data for a
+ * connection has been posted. Its purpose is to gather form information and
+ * modify the output specification accordingly. The name of the posted form
+ * is "editjob".
+ *
+ * @param variableContext
+ * contains the post data, including binary file-upload
+ * information.
+ * @param locale
+ * is the preferred local of the output.
+ * @param os
+ * is the current output specification for this job.
+ * @param connectionSequenceNumber
+ * is the unique number of this connection within the job.
+ * @return null if all is well, or a string error message if there is an
+ * error that should prevent saving of the job (and cause a
+ * redirection to an error page).
+ */
+ @Override
+ public String processSpecificationPost(IPostParameters variableContext, Locale locale, Specification os,
+ int connectionSequenceNumber) throws ManifoldCFException {
+ String seqPrefix = "s" + connectionSequenceNumber + "_";
+
+ SpecificationNode node = new SpecificationNode(OpenNlpExtractorConfig.NODE_SMODEL_PATH);
+ String smodelPath = variableContext.getParameter(seqPrefix + "smodelpath");
+ if (smodelPath != null) {
+ node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, smodelPath);
+ } else {
+ node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
+ }
+ os.addChild(os.getChildCount(), node);
+
+ node = new SpecificationNode(OpenNlpExtractorConfig.NODE_TMODEL_PATH);
+ String tmodelPath = variableContext.getParameter(seqPrefix + "tmodelpath");
+ if (tmodelPath != null) {
+ node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, tmodelPath);
+ } else {
+ node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
+ }
+ os.addChild(os.getChildCount(), node);
+
+ node = new SpecificationNode(OpenNlpExtractorConfig.NODE_PMODEL_PATH);
+ String pmodelPath = variableContext.getParameter(seqPrefix + "pmodelpath");
+ if (pmodelPath != null) {
+ node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, pmodelPath);
+ } else {
+ node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
+ }
+ os.addChild(os.getChildCount(), node);
+
+ node = new SpecificationNode(OpenNlpExtractorConfig.NODE_LMODEL_PATH);
+ String lmodelPath = variableContext.getParameter(seqPrefix + "lmodelpath");
+ if (lmodelPath != null) {
+ node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, lmodelPath);
+ } else {
+ node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
+ }
+ os.addChild(os.getChildCount(), node);
+
+ node = new SpecificationNode(OpenNlpExtractorConfig.NODE_OMODEL_PATH);
+ String omodelPath = variableContext.getParameter(seqPrefix + "omodelpath");
+ if (omodelPath != null) {
+ node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, omodelPath);
+ } else {
+ node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
+ }
+ os.addChild(os.getChildCount(), node);
+
+ return null;
+ }
+
+ /**
+ * View specification. This method is called in the body section of a job's
+ * view page. Its purpose is to present the output specification information
+ * to the user. The coder can presume that the HTML that is output from this
+ * configuration will be within appropriate <html> and <body> tags.
+ *
+ * @param out
+ * is the output to which any HTML should be sent.
+ * @param locale
+ * is the preferred local of the output.
+ * @param connectionSequenceNumber
+ * is the unique number of this connection within the job.
+ * @param os
+ * is the current output specification for this job.
+ */
+ @Override
+ public void viewSpecification(IHTTPOutput out, Locale locale, Specification os, int connectionSequenceNumber)
+ throws ManifoldCFException, IOException {
+ Map<String, Object> paramMap = new HashMap<String, Object>();
+ paramMap.put("SEQNUM", Integer.toString(connectionSequenceNumber));
+
+ fillInFieldMappingSpecificationMap(paramMap, os);
+ Messages.outputResourceWithVelocity(out, locale, VIEW_SPECIFICATION_HTML, paramMap);
+ }
+
+ protected static void fillInFieldMappingSpecificationMap(Map<String, Object> paramMap, Specification os) {
+ String sModelPath = "";
+ String tModelPath = "";
+ String pModelPath = "";
+ String lModelPath = "";
+ String oModelPath = "";
+
+ for (int i = 0; i < os.getChildCount(); i++) {
+ SpecificationNode sn = os.getChild(i);
+ if (sn.getType().equals(OpenNlpExtractorConfig.NODE_SMODEL_PATH)) {
+ sModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+ if (sModelPath == null) {
+ sModelPath = "";
+ }
+ } else if (sn.getType().equals(OpenNlpExtractorConfig.NODE_TMODEL_PATH)) {
+ tModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+ if (tModelPath == null) {
+ tModelPath = "";
+ }
+ } else if (sn.getType().equals(OpenNlpExtractorConfig.NODE_PMODEL_PATH)) {
+ pModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+ if (pModelPath == null) {
+ pModelPath = "";
+ }
+ } else if (sn.getType().equals(OpenNlpExtractorConfig.NODE_LMODEL_PATH)) {
+ lModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+ if (lModelPath == null) {
+ lModelPath = "";
+ }
+ } else if (sn.getType().equals(OpenNlpExtractorConfig.NODE_OMODEL_PATH)) {
+ oModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+ if (oModelPath == null) {
+ oModelPath = "";
+ }
+ }
+
+ }
+ paramMap.put("SMODELPATH", sModelPath);
+ paramMap.put("TMODELPATH", tModelPath);
+ paramMap.put("PMODELPATH", pModelPath);
+ paramMap.put("LMODELPATH", lModelPath);
+ paramMap.put("OMODELPATH", oModelPath);
+ }
+
+ protected static class SpecPacker {
+
+ private final String sModelPath;
+ private final String tModelPath;
+ private final String pModelPath;
+ private final String lModelPath;
+ private final String oModelPath;
+
+ public SpecPacker(Specification os) {
+ String sModelPath = null;
+ String tModelPath = null;
+ String pModelPath = null;
+ String lModelPath = null;
+ String oModelPath = null;
+
+ for (int i = 0; i < os.getChildCount(); i++) {
+ SpecificationNode sn = os.getChild(i);
+
+ if (sn.getType().equals(OpenNlpExtractorConfig.NODE_SMODEL_PATH)) {
+ sModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+ }
+ if (sn.getType().equals(OpenNlpExtractorConfig.NODE_TMODEL_PATH)) {
+ tModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+ }
+ if (sn.getType().equals(OpenNlpExtractorConfig.NODE_PMODEL_PATH)) {
+ pModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+ }
+ if (sn.getType().equals(OpenNlpExtractorConfig.NODE_LMODEL_PATH)) {
+ lModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+ }
+ if (sn.getType().equals(OpenNlpExtractorConfig.NODE_OMODEL_PATH)) {
+ oModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+ }
+
+ }
+ this.sModelPath = sModelPath;
+ this.tModelPath = tModelPath;
+ this.pModelPath = pModelPath;
+ this.lModelPath = lModelPath;
+ this.oModelPath = oModelPath;
+ }
+
+ public String toPackedString() {
+ StringBuilder sb = new StringBuilder();
+
+ // extract nouns
+ if (sModelPath != null)
+ sb.append(sModelPath);
+ if (tModelPath != null)
+ sb.append(tModelPath);
+ if (pModelPath != null)
+ sb.append(pModelPath);
+ if (lModelPath != null)
+ sb.append(lModelPath);
+ if (oModelPath != null)
+ sb.append(oModelPath);
+
+ return sb.toString();
+ }
+
+ public String getSModelPath() {
+ return sModelPath;
+ }
+
+ public String getTModelPath() {
+ return tModelPath;
+ }
+
+ public String getPModelPath() {
+ return pModelPath;
+ }
+
+ public String getLModelPath() {
+ return lModelPath;
+ }
+
+ public String getOModelPath() {
+ return oModelPath;
+ }
+
+ }
+
+}
Added: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractorConfig.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractorConfig.java?rev=1726831&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractorConfig.java (added)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractorConfig.java Tue Jan 26 15:47:12 2016
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.manifoldcf.agents.transformation.opennlp;
+
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+
+import opennlp.tools.namefind.NameFinderME;
+import opennlp.tools.namefind.TokenNameFinderModel;
+import opennlp.tools.sentdetect.SentenceDetector;
+import opennlp.tools.sentdetect.SentenceDetectorME;
+import opennlp.tools.sentdetect.SentenceModel;
+import opennlp.tools.tokenize.Tokenizer;
+import opennlp.tools.tokenize.TokenizerME;
+import opennlp.tools.tokenize.TokenizerModel;
+import opennlp.tools.util.InvalidFormatException;
+
+public class OpenNlpExtractorConfig
+{
+ private static enum MODEL{
+ SENTENCE, TOKENIZER, PEOPLE, LOCATIONS, ORGANIZATIONS;
+ }
+
+ // Specification nodes and values
+ public static final String NODE_SMODEL_PATH = "SModelPath";
+ public static final String NODE_TMODEL_PATH = "TModelPath";
+ public static final String NODE_PMODEL_PATH = "PModelPath";
+ public static final String NODE_LMODEL_PATH = "LModelPath";
+ public static final String NODE_OMODEL_PATH = "OModelPath";
+
+ public static final String ATTRIBUTE_VALUE = "value";
+
+ private static SentenceModel sModel = null;
+ private static TokenizerModel tModel = null;
+ private static TokenNameFinderModel pModel = null;
+ private static TokenNameFinderModel lModel = null;
+ private static TokenNameFinderModel oModel = null;
+
+ private static synchronized void initializeModel(MODEL m, String path) throws InvalidFormatException, FileNotFoundException, IOException{
+ if(sModel == null && m == MODEL.SENTENCE)
+ sModel = new SentenceModel(new FileInputStream(path));
+ if(tModel == null && m == MODEL.TOKENIZER)
+ tModel = new TokenizerModel(new FileInputStream(path));
+ if(pModel == null && m == MODEL.PEOPLE)
+ pModel = new TokenNameFinderModel(new FileInputStream(path));
+ if(lModel == null && m == MODEL.LOCATIONS)
+ lModel = new TokenNameFinderModel(new FileInputStream(path));
+ if(oModel == null && m == MODEL.ORGANIZATIONS)
+ oModel = new TokenNameFinderModel(new FileInputStream(path));
+ }
+
+ public static final SentenceDetector sentenceDetector(String path) throws InvalidFormatException, FileNotFoundException, IOException{
+ if(sModel == null)
+ initializeModel(MODEL.SENTENCE, path);
+ return new SentenceDetectorME(sModel);
+ }
+
+ public static final Tokenizer tokenizer(String path) throws InvalidFormatException, FileNotFoundException, IOException{
+ if(tModel == null)
+ initializeModel(MODEL.TOKENIZER, path);
+ return new TokenizerME(tModel);
+ }
+
+ public static final NameFinderME peopleFinder(String path) throws InvalidFormatException, FileNotFoundException, IOException{
+ if(pModel == null)
+ initializeModel(MODEL.PEOPLE, path);
+ return new NameFinderME(pModel);
+ }
+
+ public static final NameFinderME locationFinder(String path) throws InvalidFormatException, FileNotFoundException, IOException{
+ if(lModel == null)
+ initializeModel(MODEL.LOCATIONS, path);
+ return new NameFinderME(lModel);
+ }
+
+ public static final NameFinderME organizationFinder(String path) throws InvalidFormatException, FileNotFoundException, IOException{
+ if(oModel == null)
+ initializeModel(MODEL.ORGANIZATIONS, path);
+ return new NameFinderME(oModel);
+ }
+
+
+
+
+}
Added: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_en_US.properties
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_en_US.properties?rev=1726831&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_en_US.properties (added)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_en_US.properties Tue Jan 26 15:47:12 2016
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+OpenNlpExtractor.FieldMappingTabName=OpenNlp Extraction
+OpenNlpExtractor.SentenceDetectorModelPath=Sentence Detector Model Path:
+OpenNlpExtractor.TokenizerModelPath=Tokenizer Model Path:
+OpenNlpExtractor.PeopleModelPath=People Model Path:
+OpenNlpExtractor.LocationsModelPath=Locations Model Path:
+OpenNlpExtractor.OraganizationsModelPath=Organizations Model Path:
\ No newline at end of file
Added: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_ja_JP.properties
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_ja_JP.properties?rev=1726831&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_ja_JP.properties (added)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_ja_JP.properties Tue Jan 26 15:47:12 2016
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+OpenNlpExtractor.FieldMappingTabName=OpenNlp Extraction
+OpenNlpExtractor.SentenceDetectorModelPath=Sentence Detector Model Path:
+OpenNlpExtractor.TokenizerModelPath=Tokenizer Model Path:
+OpenNlpExtractor.PeopleModelPath=People Model Path:
+OpenNlpExtractor.LocationsModelPath=Locations Model Path:
+OpenNlpExtractor.OraganizationsModelPath=Organizations Model Path:
\ No newline at end of file
Added: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_zh_CN.properties
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_zh_CN.properties?rev=1726831&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_zh_CN.properties (added)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/opennlp/common_zh_CN.properties Tue Jan 26 15:47:12 2016
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+OpenNlpExtractor.FieldMappingTabName=OpenNlp Extraction
+OpenNlpExtractor.SentenceDetectorModelPath=Sentence Detector Model Path:
+OpenNlpExtractor.TokenizerModelPath=Tokenizer Model Path:
+OpenNlpExtractor.PeopleModelPath=People Model Path:
+OpenNlpExtractor.LocationsModelPath=Locations Model Path:
+OpenNlpExtractor.OraganizationsModelPath=Organizations Model Path:
\ No newline at end of file
Added: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/editSpecification.js
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/editSpecification.js?rev=1726831&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/editSpecification.js (added)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/editSpecification.js Tue Jan 26 15:47:12 2016
@@ -0,0 +1,25 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<script type="text/javascript">
+<!--
+function s${SEQNUM}_checkSpecification()
+{
+ return true;
+}
+//-->
+</script>
Added: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/editSpecification_FieldMapping.html
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/editSpecification_FieldMapping.html?rev=1726831&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/editSpecification_FieldMapping.html (added)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/editSpecification_FieldMapping.html Tue Jan 26 15:47:12 2016
@@ -0,0 +1,72 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+#if($TABNAME ==
+$ResourceBundle.getString('OpenNlpExtractor.FieldMappingTabName') &&
+${SEQNUM} == ${SELECTEDNUM})
+
+<table class="displaytable">
+ <tr>
+ <td class="separator" colspan="2"><hr /></td>
+ </tr>
+ <tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenNlpExtractor.SentenceDetectorModelPath'))</nobr></td>
+ <td class="value"><input type="text" name="s${SEQNUM}_smodelpath"
+ size="128" value="$Encoder.attributeEscape($SMODELPATH)" /></td>
+ </tr>
+ <tr>
+ <td class="separator" colspan="2"><hr /></td>
+ </tr>
+ <tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenNlpExtractor.TokenizerModelPath'))</nobr></td>
+ <td class="value"><input type="text" name="s${SEQNUM}_tmodelpath"
+ size="128" value="$Encoder.attributeEscape($TMODELPATH)" /></td>
+ </tr>
+ <tr>
+ <td class="separator" colspan="2"><hr /></td>
+ </tr>
+ <tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenNlpExtractor.PeopleModelPath'))</nobr></td>
+ <td class="value"><input type="text" name="s${SEQNUM}_pmodelpath"
+ size="128" value="$Encoder.attributeEscape($PMODELPATH)" /></td>
+ </tr>
+ <tr>
+ <td class="separator" colspan="2"><hr /></td>
+ </tr>
+ <tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenNlpExtractor.LocationsModelPath'))</nobr></td>
+ <td class="value"><input type="text" name="s${SEQNUM}_lmodelpath"
+ size="128" value="$Encoder.attributeEscape($LMODELPATH)" /></td>
+ </tr>
+ <tr>
+ <td class="separator" colspan="2"><hr /></td>
+ </tr>
+ <tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenNlpExtractor.OraganizationsModelPath'))</nobr></td>
+ <td class="value"><input type="text" name="s${SEQNUM}_omodelpath"
+ size="128" value="$Encoder.attributeEscape($OMODELPATH)" /></td>
+ </tr>
+
+</table>
+
+#else
+<input type="hidden" name="s${SEQNUM}_smodelpath" value="$Encoder.bodyEscape($SMODELPATH)"/>
+<input type="hidden" name="s${SEQNUM}_tmodelpath" value="$Encoder.bodyEscape($TMODELPATH)"/>
+<input type="hidden" name="s${SEQNUM}_pmodelpath" value="$Encoder.bodyEscape($PMODELPATH)"/>
+<input type="hidden" name="s${SEQNUM}_lmodelpath" value="$Encoder.bodyEscape($LMODELPATH)"/>
+<input type="hidden" name="s${SEQNUM}_omodelpath" value="$Encoder.bodyEscape($OMODELPATH)"/>
+#end
Added: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/viewSpecification.html
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/viewSpecification.html?rev=1726831&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/viewSpecification.html (added)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/opennlp/viewSpecification.html Tue Jan 26 15:47:12 2016
@@ -0,0 +1,58 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<table class="displaytable">
+ <tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenNlpExtractor.SentenceDetectorModelPath'))</nobr></td>
+ <td class="value"><nobr>$Encoder.bodyEscape($SMODELPATH)</nobr></td>
+ </tr>
+
+ <tr>
+ <td class="separator" colspan="2"><hr /></td>
+ </tr>
+
+ <tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenNlpExtractor.TokenizerModelPath'))</nobr></td>
+ <td class="value"><nobr>$Encoder.bodyEscape($TMODELPATH)</nobr></td>
+ </tr>
+
+ <tr>
+ <td class="separator" colspan="2"><hr /></td>
+ </tr>
+ <tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenNlpExtractor.PeopleModelPath'))</nobr></td>
+ <td class="value"><nobr>$Encoder.bodyEscape($PMODELPATH)</nobr></td>
+ </tr>
+
+ <tr>
+ <td class="separator" colspan="2"><hr /></td>
+ </tr>
+ <tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenNlpExtractor.LocationsModelPath'))</nobr></td>
+ <td class="value"><nobr>$Encoder.bodyEscape($LMODELPATH)</nobr></td>
+ </tr>
+
+ <tr>
+ <td class="separator" colspan="2"><hr /></td>
+ </tr>
+ <tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenNlpExtractor.OraganizationsModelPath'))</nobr></td>
+ <td class="value"><nobr>$Encoder.bodyEscape($OMODELPATH)</nobr></td>
+ </tr>
+
+
+</table>
Added: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/download-models.sh
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/download-models.sh?rev=1726831&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/download-models.sh (added)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/download-models.sh Tue Jan 26 15:47:12 2016
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+MODELS_DIR=nlpmodels
+
+if [ ! -d "$MODELS_DIR" ]; then
+ echo â$MODELS_DIR does not existâ¦â
+ echo âcreating $MODELS_DIR â¦â
+ mkdir -p ${MODELS_DIR}
+fi
+
+echo âdownloading modelsâ¦â
+wget -O ${MODELS_DIR}/en-sent.bin http://opennlp.sourceforge.net/models-1.5/en-sent.bin
+wget -O ${MODELS_DIR}/en-token.bin http://opennlp.sourceforge.net/models-1.5/en-token.bin
+wget -O ${MODELS_DIR}/en-ner-person.bin http://opennlp.sourceforge.net/models-1.5/en-ner-person.bin
+wget -O ${MODELS_DIR}/en-ner-location.bin http://opennlp.sourceforge.net/models-1.5/en-ner-location.bin
+wget -O ${MODELS_DIR}/en-ner-organization.bin http://opennlp.sourceforge.net/models-1.5/en-ner-organization.bin
+echo âdownloading finishedâ¦â
\ No newline at end of file
Added: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/pom.xml
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/pom.xml?rev=1726831&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/pom.xml (added)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/pom.xml Tue Jan 26 15:47:12 2016
@@ -0,0 +1,279 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <parent>
+ <groupId>org.apache.manifoldcf</groupId>
+ <artifactId>mcf-connectors</artifactId>
+ <version>2.4-SNAPSHOT</version>
+ </parent>
+ <modelVersion>4.0.0</modelVersion>
+
+ <properties>
+ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+ <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
+ </properties>
+
+ <artifactId>mcf-opennlp-connector</artifactId>
+ <name>ManifoldCF - Connectors - OpenNlp Extractor</name>
+
+ <build>
+ <defaultGoal>integration-test</defaultGoal>
+ <sourceDirectory>${basedir}/connector/src/main/java</sourceDirectory>
+ <testSourceDirectory>${basedir}/connector/src/test/java</testSourceDirectory>
+ <resources>
+ <resource>
+ <directory>${basedir}/connector/src/main/native2ascii</directory>
+ <includes>
+ <include>**/*.properties</include>
+ </includes>
+ </resource>
+ <resource>
+ <directory>${basedir}/connector/src/main/resources</directory>
+ <includes>
+ <include>**/*.html</include>
+ <include>**/*.js</include>
+ </includes>
+ </resource>
+ </resources>
+ <testResources>
+ <testResource>
+ <directory>${basedir}/connector/src/test/resources</directory>
+ </testResource>
+ </testResources>
+
+ <plugins>
+
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>native2ascii-maven-plugin</artifactId>
+ <version>1.0-beta-1</version>
+ <configuration>
+ <workDir>target/classes</workDir>
+ </configuration>
+ <executions>
+ <execution>
+ <id>native2ascii-utf8</id>
+ <goals>
+ <goal>native2ascii</goal>
+ </goals>
+ <configuration>
+ <encoding>UTF8</encoding>
+ <includes>
+ <include>**/*.properties</include>
+ </includes>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+
+ <plugin>
+ <artifactId>maven-assembly-plugin</artifactId>
+ <configuration>
+ <descriptorRefs>
+ <descriptorRef>jar-with-dependencies</descriptorRef>
+ </descriptorRefs>
+ </configuration>
+ <executions>
+ <execution>
+ <id>make-assembly</id> <!-- this is used for inheritance merges -->
+ <phase>package</phase> <!-- bind to the packaging phase -->
+ <goals>
+ <goal>single</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+
+
+ <!-- Test plugin configuration -->
+ <plugin>
+ <artifactId>maven-dependency-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>copy-war</id>
+ <phase>generate-resources</phase>
+ <goals>
+ <goal>copy</goal>
+ </goals>
+ <configuration>
+ <outputDirectory>target/dependency</outputDirectory>
+ <artifactItems>
+ <artifactItem>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>mcf-api-service</artifactId>
+ <version>${project.version}</version>
+ <type>war</type>
+ <overWrite>false</overWrite>
+ <destFileName>mcf-api-service.war</destFileName>
+ </artifactItem>
+ <artifactItem>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>mcf-authority-service</artifactId>
+ <version>${project.version}</version>
+ <type>war</type>
+ <overWrite>false</overWrite>
+ <destFileName>mcf-authority-service.war</destFileName>
+ </artifactItem>
+ <artifactItem>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>mcf-crawler-ui</artifactId>
+ <version>${project.version}</version>
+ <type>war</type>
+ <overWrite>false</overWrite>
+ <destFileName>mcf-crawler-ui.war</destFileName>
+ </artifactItem>
+ </artifactItems>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <configuration>
+ <excludes>
+ <exclude>**/*Postgresql*.java</exclude>
+ <exclude>**/*MySQL*.java</exclude>
+ </excludes>
+ <forkMode>always</forkMode>
+ <workingDirectory>target/test-output</workingDirectory>
+ </configuration>
+ </plugin>
+
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-failsafe-plugin</artifactId>
+ <version>2.12.3</version>
+ <configuration>
+ <skipTests>${skipITs}</skipTests>
+ <systemPropertyVariables>
+ <crawlerWarPath>../dependency/mcf-crawler-ui.war</crawlerWarPath>
+ <authorityserviceWarPath>../dependency/mcf-authority-service.war</authorityserviceWarPath>
+ <apiWarPath>../dependency/mcf-api-service.war</apiWarPath>
+ </systemPropertyVariables>
+ <excludes>
+ <exclude>**/*Postgresql*.java</exclude>
+ <exclude>**/*MySQL*.java</exclude>
+ </excludes>
+ <forkMode>always</forkMode>
+ <workingDirectory>target/test-output</workingDirectory>
+ </configuration>
+ <executions>
+ <execution>
+ <id>integration-test</id>
+ <goals>
+ <goal>integration-test</goal>
+ </goals>
+ </execution>
+ <execution>
+ <id>verify</id>
+ <goals>
+ <goal>verify</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+
+ </plugins>
+
+ <pluginManagement>
+ <plugins>
+ <!--This plugin's configuration is used to store Eclipse m2e settings
+ only. It has no influence on the Maven build itself. -->
+ <plugin>
+ <groupId>org.eclipse.m2e</groupId>
+ <artifactId>lifecycle-mapping</artifactId>
+ <version>1.0.0</version>
+ <configuration>
+ <lifecycleMappingMetadata>
+ <pluginExecutions>
+ <pluginExecution>
+ <pluginExecutionFilter>
+ <groupId>
+ org.apache.maven.plugins
+ </groupId>
+ <artifactId>
+ maven-dependency-plugin
+ </artifactId>
+ <versionRange>[2.1,)</versionRange>
+ <goals>
+ <goal>copy</goal>
+ </goals>
+ </pluginExecutionFilter>
+ <action>
+ <ignore></ignore>
+ </action>
+ </pluginExecution>
+ <pluginExecution>
+ <pluginExecutionFilter>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>
+ native2ascii-maven-plugin
+ </artifactId>
+ <versionRange>
+ [1.0-beta-1,)
+ </versionRange>
+ <goals>
+ <goal>native2ascii</goal>
+ </goals>
+ </pluginExecutionFilter>
+ <action>
+ <ignore></ignore>
+ </action>
+ </pluginExecution>
+ <pluginExecution>
+ <pluginExecutionFilter>
+ <groupId>
+ org.apache.maven.plugins
+ </groupId>
+ <artifactId>
+ maven-remote-resources-plugin
+ </artifactId>
+ <versionRange>[1.1,)</versionRange>
+ <goals>
+ <goal>process</goal>
+ </goals>
+ </pluginExecutionFilter>
+ <action>
+ <ignore></ignore>
+ </action>
+ </pluginExecution>
+ </pluginExecutions>
+ </lifecycleMappingMetadata>
+ </configuration>
+ </plugin>
+ </plugins>
+ </pluginManagement>
+ </build>
+
+ <dependencies>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>mcf-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>mcf-connector-common</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>mcf-agents</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>mcf-ui-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.opennlp</groupId>
+ <artifactId>opennlp-tools</artifactId>
+ <version>1.6.0</version>
+ </dependency>
+
+ </dependencies>
+
+</project>
\ No newline at end of file
Modified: manifoldcf/branches/CONNECTORS-1270/connectors/pom.xml
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/pom.xml?rev=1726831&r1=1726830&r2=1726831&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/pom.xml (original)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/pom.xml Tue Jan 26 15:47:12 2016
@@ -68,6 +68,7 @@
<module>confluence</module>
<module>amazons3</module>
<module>kafka</module>
+ <module>opennlp</module>
</modules>
</project>