You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2016/01/27 01:33:07 UTC
svn commit: r1726929 - in
/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp:
Messages.java OpenNlpExtractor.java OpenNlpExtractorConfig.java
Author: kwright
Date: Wed Jan 27 00:33:06 2016
New Revision: 1726929
URL: http://svn.apache.org/viewvc?rev=1726929&view=rev
Log:
Use standard indents
Modified:
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/Messages.java
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractor.java
manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractorConfig.java
Modified: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/Messages.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/Messages.java?rev=1726929&r1=1726928&r2=1726929&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/Messages.java (original)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/Messages.java Wed Jan 27 00:33:06 2016
@@ -24,116 +24,116 @@ import org.apache.manifoldcf.core.interf
public class Messages extends org.apache.manifoldcf.ui.i18n.Messages
{
- public static final String DEFAULT_BUNDLE_NAME="org.apache.manifoldcf.agents.transformation.opennlp.common";
- public static final String DEFAULT_PATH_NAME="org.apache.manifoldcf.agents.transformation.opennlp";
-
- /** Constructor - do no instantiate
- */
- protected Messages()
- {
- }
-
- public static String getString(Locale locale, String messageKey)
- {
- return getString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
- }
-
- public static String getAttributeString(Locale locale, String messageKey)
- {
- return getAttributeString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
- }
-
- public static String getBodyString(Locale locale, String messageKey)
- {
- return getBodyString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
- }
-
- public static String getAttributeJavascriptString(Locale locale, String messageKey)
- {
- return getAttributeJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
- }
-
- public static String getBodyJavascriptString(Locale locale, String messageKey)
- {
- return getBodyJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
- }
-
- public static String getString(Locale locale, String messageKey, Object[] args)
- {
- return getString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
- }
-
- public static String getAttributeString(Locale locale, String messageKey, Object[] args)
- {
- return getAttributeString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
- }
-
- public static String getBodyString(Locale locale, String messageKey, Object[] args)
- {
- return getBodyString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
- }
-
- public static String getAttributeJavascriptString(Locale locale, String messageKey, Object[] args)
- {
- return getAttributeJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
- }
-
- public static String getBodyJavascriptString(Locale locale, String messageKey, Object[] args)
- {
- return getBodyJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
- }
-
- // More general methods which allow bundlenames and class loaders to be specified.
-
- public static String getString(String bundleName, Locale locale, String messageKey, Object[] args)
- {
- return getString(Messages.class, bundleName, locale, messageKey, args);
- }
-
- public static String getAttributeString(String bundleName, Locale locale, String messageKey, Object[] args)
- {
- return getAttributeString(Messages.class, bundleName, locale, messageKey, args);
- }
-
- public static String getBodyString(String bundleName, Locale locale, String messageKey, Object[] args)
- {
- return getBodyString(Messages.class, bundleName, locale, messageKey, args);
- }
-
- public static String getAttributeJavascriptString(String bundleName, Locale locale, String messageKey, Object[] args)
- {
- return getAttributeJavascriptString(Messages.class, bundleName, locale, messageKey, args);
- }
-
- public static String getBodyJavascriptString(String bundleName, Locale locale, String messageKey, Object[] args)
- {
- return getBodyJavascriptString(Messages.class, bundleName, locale, messageKey, args);
- }
-
- // Resource output
-
- public static void outputResource(IHTTPOutput output, Locale locale, String resourceKey,
- Map<String,String> substitutionParameters, boolean mapToUpperCase)
- throws ManifoldCFException
- {
- outputResource(output,Messages.class,DEFAULT_PATH_NAME,locale,resourceKey,
- substitutionParameters,mapToUpperCase);
- }
-
- public static void outputResourceWithVelocity(IHTTPOutput output, Locale locale, String resourceKey,
- Map<String,String> substitutionParameters, boolean mapToUpperCase)
- throws ManifoldCFException
- {
- outputResourceWithVelocity(output,Messages.class,DEFAULT_BUNDLE_NAME,DEFAULT_PATH_NAME,locale,resourceKey,
- substitutionParameters,mapToUpperCase);
- }
-
- public static void outputResourceWithVelocity(IHTTPOutput output, Locale locale, String resourceKey,
- Map<String,Object> contextObjects)
- throws ManifoldCFException
- {
- outputResourceWithVelocity(output,Messages.class,DEFAULT_BUNDLE_NAME,DEFAULT_PATH_NAME,locale,resourceKey,
- contextObjects);
- }
+ public static final String DEFAULT_BUNDLE_NAME="org.apache.manifoldcf.agents.transformation.opennlp.common";
+ public static final String DEFAULT_PATH_NAME="org.apache.manifoldcf.agents.transformation.opennlp";
+
+ /** Constructor - do no instantiate
+ */
+ protected Messages()
+ {
+ }
+
+ public static String getString(Locale locale, String messageKey)
+ {
+ return getString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+ }
+
+ public static String getAttributeString(Locale locale, String messageKey)
+ {
+ return getAttributeString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+ }
+
+ public static String getBodyString(Locale locale, String messageKey)
+ {
+ return getBodyString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+ }
+
+ public static String getAttributeJavascriptString(Locale locale, String messageKey)
+ {
+ return getAttributeJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+ }
+
+ public static String getBodyJavascriptString(Locale locale, String messageKey)
+ {
+ return getBodyJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+ }
+
+ public static String getString(Locale locale, String messageKey, Object[] args)
+ {
+ return getString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+ }
+
+ public static String getAttributeString(Locale locale, String messageKey, Object[] args)
+ {
+ return getAttributeString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+ }
+
+ public static String getBodyString(Locale locale, String messageKey, Object[] args)
+ {
+ return getBodyString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+ }
+
+ public static String getAttributeJavascriptString(Locale locale, String messageKey, Object[] args)
+ {
+ return getAttributeJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+ }
+
+ public static String getBodyJavascriptString(Locale locale, String messageKey, Object[] args)
+ {
+ return getBodyJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+ }
+
+ // More general methods which allow bundlenames and class loaders to be specified.
+
+ public static String getString(String bundleName, Locale locale, String messageKey, Object[] args)
+ {
+ return getString(Messages.class, bundleName, locale, messageKey, args);
+ }
+
+ public static String getAttributeString(String bundleName, Locale locale, String messageKey, Object[] args)
+ {
+ return getAttributeString(Messages.class, bundleName, locale, messageKey, args);
+ }
+
+ public static String getBodyString(String bundleName, Locale locale, String messageKey, Object[] args)
+ {
+ return getBodyString(Messages.class, bundleName, locale, messageKey, args);
+ }
+
+ public static String getAttributeJavascriptString(String bundleName, Locale locale, String messageKey, Object[] args)
+ {
+ return getAttributeJavascriptString(Messages.class, bundleName, locale, messageKey, args);
+ }
+
+ public static String getBodyJavascriptString(String bundleName, Locale locale, String messageKey, Object[] args)
+ {
+ return getBodyJavascriptString(Messages.class, bundleName, locale, messageKey, args);
+ }
+
+ // Resource output
+
+ public static void outputResource(IHTTPOutput output, Locale locale, String resourceKey,
+ Map<String,String> substitutionParameters, boolean mapToUpperCase)
+ throws ManifoldCFException
+ {
+ outputResource(output,Messages.class,DEFAULT_PATH_NAME,locale,resourceKey,
+ substitutionParameters,mapToUpperCase);
+ }
+
+ public static void outputResourceWithVelocity(IHTTPOutput output, Locale locale, String resourceKey,
+ Map<String,String> substitutionParameters, boolean mapToUpperCase)
+ throws ManifoldCFException
+ {
+ outputResourceWithVelocity(output,Messages.class,DEFAULT_BUNDLE_NAME,DEFAULT_PATH_NAME,locale,resourceKey,
+ substitutionParameters,mapToUpperCase);
+ }
+
+ public static void outputResourceWithVelocity(IHTTPOutput output, Locale locale, String resourceKey,
+ Map<String,Object> contextObjects)
+ throws ManifoldCFException
+ {
+ outputResourceWithVelocity(output,Messages.class,DEFAULT_BUNDLE_NAME,DEFAULT_PATH_NAME,locale,resourceKey,
+ contextObjects);
+ }
}
Modified: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractor.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractor.java?rev=1726929&r1=1726928&r2=1726929&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractor.java (original)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractor.java Wed Jan 27 00:33:06 2016
@@ -46,468 +46,468 @@ import org.apache.manifoldcf.core.interf
import org.apache.manifoldcf.core.interfaces.VersionContext;
public class OpenNlpExtractor extends BaseTransformationConnector {
- private static final String EDIT_SPECIFICATION_JS = "editSpecification.js";
- private static final String EDIT_SPECIFICATION_FIELDMAPPING_HTML = "editSpecification_FieldMapping.html";
- private static final String VIEW_SPECIFICATION_HTML = "viewSpecification.html";
-
- // Meta-data fields added by this connector
- private static final String PERSONS = "ner_people";
- private static final String LOCATIONS = "ner_locations";
- private static final String ORGANIZATIONS = "ner_organizations";
-
- protected static final String ACTIVITY_EXTRACT = "extract";
-
- protected static final String[] activitiesList = new String[] { ACTIVITY_EXTRACT };
-
- /**
- * Return a list of activities that this connector generates. The connector
- * does NOT need to be connected before this method is called.
- *
- * @return the set of activities.
- */
- @Override
- public String[] getActivitiesList() {
- return activitiesList;
- }
-
- /**
- * Get a pipeline version string, given a pipeline specification object. The
- * version string is used to uniquely describe the pertinent details of the
- * specification and the configuration, to allow the Connector Framework to
- * determine whether a document will need to be processed again. Note that
- * the contents of any document cannot be considered by this method; only
- * configuration and specification information can be considered.
- *
- * This method presumes that the underlying connector object has been
- * configured.
- *
- * @param spec
- * is the current pipeline specification object for this
- * connection for the job that is doing the crawling.
- * @return a string, of unlimited length, which uniquely describes
- * configuration and specification in such a way that if two such
- * strings are equal, nothing that affects how or whether the
- * document is indexed will be different.
- */
- @Override
- public VersionContext getPipelineDescription(Specification os) throws ManifoldCFException, ServiceInterruption {
- SpecPacker sp = new SpecPacker(os);
- return new VersionContext(sp.toPackedString(), params, os);
- }
-
- /**
- * Add (or replace) a document in the output data store using the connector.
- * This method presumes that the connector object has been configured, and
- * it is thus able to communicate with the output data store should that be
- * necessary. The OutputSpecification is *not* provided to this method,
- * because the goal is consistency, and if output is done it must be
- * consistent with the output description, since that was what was partly
- * used to determine if output should be taking place. So it may be
- * necessary for this method to decode an output description string in order
- * to determine what should be done.
- *
- * @param documentURI
- * is the URI of the document. The URI is presumed to be the
- * unique identifier which the output data store will use to
- * process and serve the document. This URI is constructed by the
- * repository connector which fetches the document, and is thus
- * universal across all output connectors.
- * @param outputDescription
- * is the description string that was constructed for this
- * document by the getOutputDescription() method.
- * @param document
- * is the document data to be processed (handed to the output
- * data store).
- * @param authorityNameString
- * is the name of the authority responsible for authorizing any
- * access tokens passed in with the repository document. May be
- * null.
- * @param activities
- * is the handle to an object that the implementer of a pipeline
- * connector may use to perform operations, such as logging
- * processing activity, or sending a modified document to the
- * next stage in the pipeline.
- * @return the document status (accepted or permanently rejected).
- * @throws IOException
- * only if there's a stream error reading the document data.
- */
- @Override
- public int addOrReplaceDocumentWithException(String documentURI, VersionContext pipelineDescription,
- RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
- throws ManifoldCFException, ServiceInterruption, IOException {
- // assumes use of Tika extractor before using this connector
- Logging.agents.debug("Starting OpenNlp extraction");
-
- SpecPacker sp = new SpecPacker(pipelineDescription.getSpecification());
-
- byte[] bytes = IOUtils.toByteArray(document.getBinaryStream());
-
- SentenceDetector sentenceDetector = OpenNlpExtractorConfig.sentenceDetector(sp.getSModelPath());
- Tokenizer tokenizer = OpenNlpExtractorConfig.tokenizer(sp.getTModelPath());
- NameFinderME peopleFinder = OpenNlpExtractorConfig.peopleFinder(sp.getPModelPath());
- NameFinderME locationFinder = OpenNlpExtractorConfig.locationFinder(sp.getLModelPath());
- NameFinderME organizationFinder = OpenNlpExtractorConfig.organizationFinder(sp.getOModelPath());
-
- // create a duplicate
- RepositoryDocument docCopy = document.duplicate();
- Map<String, List<String>> nerMap = new HashMap<>();
-
- if (document.getBinaryLength() > 0) {
- String textContent = new String(bytes, StandardCharsets.UTF_8);
- List<String> peopleList = new ArrayList<>();
- List<String> locationsList = new ArrayList<>();
- List<String> organizationsList = new ArrayList<>();
-
- String[] sentences = sentenceDetector.sentDetect(textContent);
- for (String sentence : sentences) {
- String[] tokens = tokenizer.tokenize(sentence);
-
- Span[] spans = peopleFinder.find(tokens);
- peopleList.addAll(Arrays.asList(Span.spansToStrings(spans, tokens)));
-
- spans = locationFinder.find(tokens);
- locationsList.addAll(Arrays.asList(Span.spansToStrings(spans, tokens)));
-
- spans = organizationFinder.find(tokens);
- organizationsList.addAll(Arrays.asList(Span.spansToStrings(spans, tokens)));
-
- }
-
- nerMap.put(PERSONS, peopleList);
- nerMap.put(LOCATIONS, locationsList);
- nerMap.put(ORGANIZATIONS, organizationsList);
- }
- // reset original stream
- docCopy.setBinary(new ByteArrayInputStream(bytes), bytes.length);
-
- // add named entity meta-data
- if (!nerMap.isEmpty()) {
- for (Entry<String, List<String>> entry : nerMap.entrySet()) {
- List<String> neList = entry.getValue();
- String[] neArray = neList.toArray(new String[neList.size()]);
- docCopy.addField(entry.getKey(), neArray);
- }
- }
-
- return activities.sendDocument(documentURI, docCopy);
- }
-
- // ////////////////////////
- // UI Methods
- // ////////////////////////
-
- /**
- * Obtain the name of the form check javascript method to call.
- *
- * @param connectionSequenceNumber
- * is the unique number of this connection within the job.
- * @return the name of the form check javascript method.
- */
- @Override
- public String getFormCheckJavascriptMethodName(int connectionSequenceNumber) {
- return "s" + connectionSequenceNumber + "_checkSpecification";
- }
-
- /**
- * Obtain the name of the form presave check javascript method to call.
- *
- * @param connectionSequenceNumber
- * is the unique number of this connection within the job.
- * @return the name of the form presave check javascript method.
- */
- @Override
- public String getFormPresaveCheckJavascriptMethodName(int connectionSequenceNumber) {
- return "s" + connectionSequenceNumber + "_checkSpecificationForSave";
- }
-
- /**
- * Output the specification header section. This method is called in the
- * head section of a job page which has selected an output connection of the
- * current type. Its purpose is to add the required tabs to the list, and to
- * output any javascript methods that might be needed by the job editing
- * HTML.
- *
- * @param out
- * is the output to which any HTML should be sent.
- * @param locale
- * is the preferred local of the output.
- * @param os
- * is the current output specification for this job.
- * @param connectionSequenceNumber
- * is the unique number of this connection within the job.
- * @param tabsArray
- * is an array of tab names. Add to this array any tab names that
- * are specific to the connector.
- */
- @Override
- public void outputSpecificationHeader(IHTTPOutput out, Locale locale, Specification os,
- int connectionSequenceNumber, List<String> tabsArray) throws ManifoldCFException, IOException {
- Map<String, Object> paramMap = new HashMap<String, Object>();
- paramMap.put("SEQNUM", Integer.toString(connectionSequenceNumber));
-
- tabsArray.add(Messages.getString(locale, "OpenNlpExtractor.FieldMappingTabName"));
-
- Messages.outputResourceWithVelocity(out, locale, EDIT_SPECIFICATION_JS, paramMap);
- }
-
- /**
- * Output the specification body section. This method is called in the body
- * section of a job page which has selected an output connection of the
- * current type. Its purpose is to present the required form elements for
- * editing. The coder can presume that the HTML that is output from this
- * configuration will be within appropriate <html>, <body>, and <form> tags.
- * The name of the form is "editjob".
- *
- * @param out
- * is the output to which any HTML should be sent.
- * @param locale
- * is the preferred local of the output.
- * @param os
- * is the current output specification for this job.
- * @param connectionSequenceNumber
- * is the unique number of this connection within the job.
- * @param actualSequenceNumber
- * is the connection within the job that has currently been
- * selected.
- * @param tabName
- * is the current tab name.
- */
- @Override
- public void outputSpecificationBody(IHTTPOutput out, Locale locale, Specification os, int connectionSequenceNumber,
- int actualSequenceNumber, String tabName) throws ManifoldCFException, IOException {
- Map<String, Object> paramMap = new HashMap<String, Object>();
-
- paramMap.put("TABNAME", tabName);
- paramMap.put("SEQNUM", Integer.toString(connectionSequenceNumber));
- paramMap.put("SELECTEDNUM", Integer.toString(actualSequenceNumber));
-
- fillInFieldMappingSpecificationMap(paramMap, os);
-
- Messages.outputResourceWithVelocity(out, locale, EDIT_SPECIFICATION_FIELDMAPPING_HTML, paramMap);
- }
-
- /**
- * Process a specification post. This method is called at the start of job's
- * edit or view page, whenever there is a possibility that form data for a
- * connection has been posted. Its purpose is to gather form information and
- * modify the output specification accordingly. The name of the posted form
- * is "editjob".
- *
- * @param variableContext
- * contains the post data, including binary file-upload
- * information.
- * @param locale
- * is the preferred local of the output.
- * @param os
- * is the current output specification for this job.
- * @param connectionSequenceNumber
- * is the unique number of this connection within the job.
- * @return null if all is well, or a string error message if there is an
- * error that should prevent saving of the job (and cause a
- * redirection to an error page).
- */
- @Override
- public String processSpecificationPost(IPostParameters variableContext, Locale locale, Specification os,
- int connectionSequenceNumber) throws ManifoldCFException {
- String seqPrefix = "s" + connectionSequenceNumber + "_";
-
- SpecificationNode node = new SpecificationNode(OpenNlpExtractorConfig.NODE_SMODEL_PATH);
- String smodelPath = variableContext.getParameter(seqPrefix + "smodelpath");
- if (smodelPath != null) {
- node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, smodelPath);
- } else {
- node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
- }
- os.addChild(os.getChildCount(), node);
-
- node = new SpecificationNode(OpenNlpExtractorConfig.NODE_TMODEL_PATH);
- String tmodelPath = variableContext.getParameter(seqPrefix + "tmodelpath");
- if (tmodelPath != null) {
- node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, tmodelPath);
- } else {
- node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
- }
- os.addChild(os.getChildCount(), node);
-
- node = new SpecificationNode(OpenNlpExtractorConfig.NODE_PMODEL_PATH);
- String pmodelPath = variableContext.getParameter(seqPrefix + "pmodelpath");
- if (pmodelPath != null) {
- node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, pmodelPath);
- } else {
- node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
- }
- os.addChild(os.getChildCount(), node);
-
- node = new SpecificationNode(OpenNlpExtractorConfig.NODE_LMODEL_PATH);
- String lmodelPath = variableContext.getParameter(seqPrefix + "lmodelpath");
- if (lmodelPath != null) {
- node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, lmodelPath);
- } else {
- node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
- }
- os.addChild(os.getChildCount(), node);
-
- node = new SpecificationNode(OpenNlpExtractorConfig.NODE_OMODEL_PATH);
- String omodelPath = variableContext.getParameter(seqPrefix + "omodelpath");
- if (omodelPath != null) {
- node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, omodelPath);
- } else {
- node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
- }
- os.addChild(os.getChildCount(), node);
-
- return null;
- }
-
- /**
- * View specification. This method is called in the body section of a job's
- * view page. Its purpose is to present the output specification information
- * to the user. The coder can presume that the HTML that is output from this
- * configuration will be within appropriate <html> and <body> tags.
- *
- * @param out
- * is the output to which any HTML should be sent.
- * @param locale
- * is the preferred local of the output.
- * @param connectionSequenceNumber
- * is the unique number of this connection within the job.
- * @param os
- * is the current output specification for this job.
- */
- @Override
- public void viewSpecification(IHTTPOutput out, Locale locale, Specification os, int connectionSequenceNumber)
- throws ManifoldCFException, IOException {
- Map<String, Object> paramMap = new HashMap<String, Object>();
- paramMap.put("SEQNUM", Integer.toString(connectionSequenceNumber));
-
- fillInFieldMappingSpecificationMap(paramMap, os);
- Messages.outputResourceWithVelocity(out, locale, VIEW_SPECIFICATION_HTML, paramMap);
- }
-
- protected static void fillInFieldMappingSpecificationMap(Map<String, Object> paramMap, Specification os) {
- String sModelPath = "";
- String tModelPath = "";
- String pModelPath = "";
- String lModelPath = "";
- String oModelPath = "";
-
- for (int i = 0; i < os.getChildCount(); i++) {
- SpecificationNode sn = os.getChild(i);
- if (sn.getType().equals(OpenNlpExtractorConfig.NODE_SMODEL_PATH)) {
- sModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
- if (sModelPath == null) {
- sModelPath = "";
- }
- } else if (sn.getType().equals(OpenNlpExtractorConfig.NODE_TMODEL_PATH)) {
- tModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
- if (tModelPath == null) {
- tModelPath = "";
- }
- } else if (sn.getType().equals(OpenNlpExtractorConfig.NODE_PMODEL_PATH)) {
- pModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
- if (pModelPath == null) {
- pModelPath = "";
- }
- } else if (sn.getType().equals(OpenNlpExtractorConfig.NODE_LMODEL_PATH)) {
- lModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
- if (lModelPath == null) {
- lModelPath = "";
- }
- } else if (sn.getType().equals(OpenNlpExtractorConfig.NODE_OMODEL_PATH)) {
- oModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
- if (oModelPath == null) {
- oModelPath = "";
- }
- }
-
- }
- paramMap.put("SMODELPATH", sModelPath);
- paramMap.put("TMODELPATH", tModelPath);
- paramMap.put("PMODELPATH", pModelPath);
- paramMap.put("LMODELPATH", lModelPath);
- paramMap.put("OMODELPATH", oModelPath);
- }
-
- protected static class SpecPacker {
-
- private final String sModelPath;
- private final String tModelPath;
- private final String pModelPath;
- private final String lModelPath;
- private final String oModelPath;
-
- public SpecPacker(Specification os) {
- String sModelPath = null;
- String tModelPath = null;
- String pModelPath = null;
- String lModelPath = null;
- String oModelPath = null;
-
- for (int i = 0; i < os.getChildCount(); i++) {
- SpecificationNode sn = os.getChild(i);
-
- if (sn.getType().equals(OpenNlpExtractorConfig.NODE_SMODEL_PATH)) {
- sModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
- }
- if (sn.getType().equals(OpenNlpExtractorConfig.NODE_TMODEL_PATH)) {
- tModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
- }
- if (sn.getType().equals(OpenNlpExtractorConfig.NODE_PMODEL_PATH)) {
- pModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
- }
- if (sn.getType().equals(OpenNlpExtractorConfig.NODE_LMODEL_PATH)) {
- lModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
- }
- if (sn.getType().equals(OpenNlpExtractorConfig.NODE_OMODEL_PATH)) {
- oModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
- }
-
- }
- this.sModelPath = sModelPath;
- this.tModelPath = tModelPath;
- this.pModelPath = pModelPath;
- this.lModelPath = lModelPath;
- this.oModelPath = oModelPath;
- }
-
- public String toPackedString() {
- StringBuilder sb = new StringBuilder();
-
- // extract nouns
- if (sModelPath != null)
- sb.append(sModelPath);
- if (tModelPath != null)
- sb.append(tModelPath);
- if (pModelPath != null)
- sb.append(pModelPath);
- if (lModelPath != null)
- sb.append(lModelPath);
- if (oModelPath != null)
- sb.append(oModelPath);
-
- return sb.toString();
- }
-
- public String getSModelPath() {
- return sModelPath;
- }
-
- public String getTModelPath() {
- return tModelPath;
- }
-
- public String getPModelPath() {
- return pModelPath;
- }
-
- public String getLModelPath() {
- return lModelPath;
- }
-
- public String getOModelPath() {
- return oModelPath;
- }
+ private static final String EDIT_SPECIFICATION_JS = "editSpecification.js";
+ private static final String EDIT_SPECIFICATION_FIELDMAPPING_HTML = "editSpecification_FieldMapping.html";
+ private static final String VIEW_SPECIFICATION_HTML = "viewSpecification.html";
+
+ // Meta-data fields added by this connector
+ private static final String PERSONS = "ner_people";
+ private static final String LOCATIONS = "ner_locations";
+ private static final String ORGANIZATIONS = "ner_organizations";
+
+ protected static final String ACTIVITY_EXTRACT = "extract";
+
+ protected static final String[] activitiesList = new String[] { ACTIVITY_EXTRACT };
+
+ /**
+ * Return a list of activities that this connector generates. The connector
+ * does NOT need to be connected before this method is called.
+ *
+ * @return the set of activities.
+ */
+ @Override
+ public String[] getActivitiesList() {
+ return activitiesList;
+ }
+
+ /**
+ * Get a pipeline version string, given a pipeline specification object. The
+ * version string is used to uniquely describe the pertinent details of the
+ * specification and the configuration, to allow the Connector Framework to
+ * determine whether a document will need to be processed again. Note that
+ * the contents of any document cannot be considered by this method; only
+ * configuration and specification information can be considered.
+ *
+ * This method presumes that the underlying connector object has been
+ * configured.
+ *
+ * @param spec
+ * is the current pipeline specification object for this
+ * connection for the job that is doing the crawling.
+ * @return a string, of unlimited length, which uniquely describes
+ * configuration and specification in such a way that if two such
+ * strings are equal, nothing that affects how or whether the
+ * document is indexed will be different.
+ */
+ @Override
+ public VersionContext getPipelineDescription(Specification os) throws ManifoldCFException, ServiceInterruption {
+ SpecPacker sp = new SpecPacker(os);
+ return new VersionContext(sp.toPackedString(), params, os);
+ }
+
+ /**
+ * Add (or replace) a document in the output data store using the connector.
+ * This method presumes that the connector object has been configured, and
+ * it is thus able to communicate with the output data store should that be
+ * necessary. The OutputSpecification is *not* provided to this method,
+ * because the goal is consistency, and if output is done it must be
+ * consistent with the output description, since that was what was partly
+ * used to determine if output should be taking place. So it may be
+ * necessary for this method to decode an output description string in order
+ * to determine what should be done.
+ *
+ * @param documentURI
+ * is the URI of the document. The URI is presumed to be the
+ * unique identifier which the output data store will use to
+ * process and serve the document. This URI is constructed by the
+ * repository connector which fetches the document, and is thus
+ * universal across all output connectors.
+ * @param outputDescription
+ * is the description string that was constructed for this
+ * document by the getOutputDescription() method.
+ * @param document
+ * is the document data to be processed (handed to the output
+ * data store).
+ * @param authorityNameString
+ * is the name of the authority responsible for authorizing any
+ * access tokens passed in with the repository document. May be
+ * null.
+ * @param activities
+ * is the handle to an object that the implementer of a pipeline
+ * connector may use to perform operations, such as logging
+ * processing activity, or sending a modified document to the
+ * next stage in the pipeline.
+ * @return the document status (accepted or permanently rejected).
+ * @throws IOException
+ * only if there's a stream error reading the document data.
+ */
+ @Override
+ public int addOrReplaceDocumentWithException(String documentURI, VersionContext pipelineDescription,
+ RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
+ throws ManifoldCFException, ServiceInterruption, IOException {
+ // assumes use of Tika extractor before using this connector
+ Logging.agents.debug("Starting OpenNlp extraction");
+
+ SpecPacker sp = new SpecPacker(pipelineDescription.getSpecification());
+
+ byte[] bytes = IOUtils.toByteArray(document.getBinaryStream());
+
+ SentenceDetector sentenceDetector = OpenNlpExtractorConfig.sentenceDetector(sp.getSModelPath());
+ Tokenizer tokenizer = OpenNlpExtractorConfig.tokenizer(sp.getTModelPath());
+ NameFinderME peopleFinder = OpenNlpExtractorConfig.peopleFinder(sp.getPModelPath());
+ NameFinderME locationFinder = OpenNlpExtractorConfig.locationFinder(sp.getLModelPath());
+ NameFinderME organizationFinder = OpenNlpExtractorConfig.organizationFinder(sp.getOModelPath());
+
+ // create a duplicate
+ RepositoryDocument docCopy = document.duplicate();
+ Map<String, List<String>> nerMap = new HashMap<>();
+
+ if (document.getBinaryLength() > 0) {
+ String textContent = new String(bytes, StandardCharsets.UTF_8);
+ List<String> peopleList = new ArrayList<>();
+ List<String> locationsList = new ArrayList<>();
+ List<String> organizationsList = new ArrayList<>();
+
+ String[] sentences = sentenceDetector.sentDetect(textContent);
+ for (String sentence : sentences) {
+ String[] tokens = tokenizer.tokenize(sentence);
+
+ Span[] spans = peopleFinder.find(tokens);
+ peopleList.addAll(Arrays.asList(Span.spansToStrings(spans, tokens)));
+
+ spans = locationFinder.find(tokens);
+ locationsList.addAll(Arrays.asList(Span.spansToStrings(spans, tokens)));
+
+ spans = organizationFinder.find(tokens);
+ organizationsList.addAll(Arrays.asList(Span.spansToStrings(spans, tokens)));
+
+ }
+
+ nerMap.put(PERSONS, peopleList);
+ nerMap.put(LOCATIONS, locationsList);
+ nerMap.put(ORGANIZATIONS, organizationsList);
+ }
+ // reset original stream
+ docCopy.setBinary(new ByteArrayInputStream(bytes), bytes.length);
+
+ // add named entity meta-data
+ if (!nerMap.isEmpty()) {
+ for (Entry<String, List<String>> entry : nerMap.entrySet()) {
+ List<String> neList = entry.getValue();
+ String[] neArray = neList.toArray(new String[neList.size()]);
+ docCopy.addField(entry.getKey(), neArray);
+ }
+ }
+
+ return activities.sendDocument(documentURI, docCopy);
+ }
+
+ // ////////////////////////
+ // UI Methods
+ // ////////////////////////
+
+ /**
+ * Obtain the name of the form check javascript method to call.
+ *
+ * @param connectionSequenceNumber
+ * is the unique number of this connection within the job.
+ * @return the name of the form check javascript method.
+ */
+ @Override
+ public String getFormCheckJavascriptMethodName(int connectionSequenceNumber) {
+ return "s" + connectionSequenceNumber + "_checkSpecification";
+ }
+
+ /**
+ * Obtain the name of the form presave check javascript method to call.
+ *
+ * @param connectionSequenceNumber
+ * is the unique number of this connection within the job.
+ * @return the name of the form presave check javascript method.
+ */
+ @Override
+ public String getFormPresaveCheckJavascriptMethodName(int connectionSequenceNumber) {
+ return "s" + connectionSequenceNumber + "_checkSpecificationForSave";
+ }
+
+ /**
+ * Output the specification header section. This method is called in the
+ * head section of a job page which has selected an output connection of the
+ * current type. Its purpose is to add the required tabs to the list, and to
+ * output any javascript methods that might be needed by the job editing
+ * HTML.
+ *
+ * @param out
+ * is the output to which any HTML should be sent.
+ * @param locale
+ * is the preferred local of the output.
+ * @param os
+ * is the current output specification for this job.
+ * @param connectionSequenceNumber
+ * is the unique number of this connection within the job.
+ * @param tabsArray
+ * is an array of tab names. Add to this array any tab names that
+ * are specific to the connector.
+ */
+ @Override
+ public void outputSpecificationHeader(IHTTPOutput out, Locale locale, Specification os,
+ int connectionSequenceNumber, List<String> tabsArray) throws ManifoldCFException, IOException {
+ Map<String, Object> paramMap = new HashMap<String, Object>();
+ paramMap.put("SEQNUM", Integer.toString(connectionSequenceNumber));
+
+ tabsArray.add(Messages.getString(locale, "OpenNlpExtractor.FieldMappingTabName"));
+
+ Messages.outputResourceWithVelocity(out, locale, EDIT_SPECIFICATION_JS, paramMap);
+ }
+
+ /**
+ * Output the specification body section. This method is called in the body
+ * section of a job page which has selected an output connection of the
+ * current type. Its purpose is to present the required form elements for
+ * editing. The coder can presume that the HTML that is output from this
+ * configuration will be within appropriate <html>, <body>, and <form> tags.
+ * The name of the form is "editjob".
+ *
+ * @param out
+ * is the output to which any HTML should be sent.
+ * @param locale
+ * is the preferred local of the output.
+ * @param os
+ * is the current output specification for this job.
+ * @param connectionSequenceNumber
+ * is the unique number of this connection within the job.
+ * @param actualSequenceNumber
+ * is the connection within the job that has currently been
+ * selected.
+ * @param tabName
+ * is the current tab name.
+ */
+ @Override
+ public void outputSpecificationBody(IHTTPOutput out, Locale locale, Specification os, int connectionSequenceNumber,
+ int actualSequenceNumber, String tabName) throws ManifoldCFException, IOException {
+ Map<String, Object> paramMap = new HashMap<String, Object>();
+
+ paramMap.put("TABNAME", tabName);
+ paramMap.put("SEQNUM", Integer.toString(connectionSequenceNumber));
+ paramMap.put("SELECTEDNUM", Integer.toString(actualSequenceNumber));
+
+ fillInFieldMappingSpecificationMap(paramMap, os);
+
+ Messages.outputResourceWithVelocity(out, locale, EDIT_SPECIFICATION_FIELDMAPPING_HTML, paramMap);
+ }
+
+ /**
+ * Process a specification post. This method is called at the start of job's
+ * edit or view page, whenever there is a possibility that form data for a
+ * connection has been posted. Its purpose is to gather form information and
+ * modify the output specification accordingly. The name of the posted form
+ * is "editjob".
+ *
+ * @param variableContext
+ * contains the post data, including binary file-upload
+ * information.
+ * @param locale
+ * is the preferred local of the output.
+ * @param os
+ * is the current output specification for this job.
+ * @param connectionSequenceNumber
+ * is the unique number of this connection within the job.
+ * @return null if all is well, or a string error message if there is an
+ * error that should prevent saving of the job (and cause a
+ * redirection to an error page).
+ */
+ @Override
+ public String processSpecificationPost(IPostParameters variableContext, Locale locale, Specification os,
+ int connectionSequenceNumber) throws ManifoldCFException {
+ String seqPrefix = "s" + connectionSequenceNumber + "_";
+
+ SpecificationNode node = new SpecificationNode(OpenNlpExtractorConfig.NODE_SMODEL_PATH);
+ String smodelPath = variableContext.getParameter(seqPrefix + "smodelpath");
+ if (smodelPath != null) {
+ node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, smodelPath);
+ } else {
+ node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
+ }
+ os.addChild(os.getChildCount(), node);
+
+ node = new SpecificationNode(OpenNlpExtractorConfig.NODE_TMODEL_PATH);
+ String tmodelPath = variableContext.getParameter(seqPrefix + "tmodelpath");
+ if (tmodelPath != null) {
+ node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, tmodelPath);
+ } else {
+ node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
+ }
+ os.addChild(os.getChildCount(), node);
+
+ node = new SpecificationNode(OpenNlpExtractorConfig.NODE_PMODEL_PATH);
+ String pmodelPath = variableContext.getParameter(seqPrefix + "pmodelpath");
+ if (pmodelPath != null) {
+ node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, pmodelPath);
+ } else {
+ node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
+ }
+ os.addChild(os.getChildCount(), node);
+
+ node = new SpecificationNode(OpenNlpExtractorConfig.NODE_LMODEL_PATH);
+ String lmodelPath = variableContext.getParameter(seqPrefix + "lmodelpath");
+ if (lmodelPath != null) {
+ node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, lmodelPath);
+ } else {
+ node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
+ }
+ os.addChild(os.getChildCount(), node);
+
+ node = new SpecificationNode(OpenNlpExtractorConfig.NODE_OMODEL_PATH);
+ String omodelPath = variableContext.getParameter(seqPrefix + "omodelpath");
+ if (omodelPath != null) {
+ node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, omodelPath);
+ } else {
+ node.setAttribute(OpenNlpExtractorConfig.ATTRIBUTE_VALUE, "");
+ }
+ os.addChild(os.getChildCount(), node);
+
+ return null;
+ }
+
+ /**
+ * View specification. This method is called in the body section of a job's
+ * view page. Its purpose is to present the output specification information
+ * to the user. The coder can presume that the HTML that is output from this
+ * configuration will be within appropriate <html> and <body> tags.
+ *
+ * @param out
+ * is the output to which any HTML should be sent.
+ * @param locale
+ * is the preferred local of the output.
+ * @param connectionSequenceNumber
+ * is the unique number of this connection within the job.
+ * @param os
+ * is the current output specification for this job.
+ */
+ @Override
+ public void viewSpecification(IHTTPOutput out, Locale locale, Specification os, int connectionSequenceNumber)
+ throws ManifoldCFException, IOException {
+ Map<String, Object> paramMap = new HashMap<String, Object>();
+ paramMap.put("SEQNUM", Integer.toString(connectionSequenceNumber));
+
+ fillInFieldMappingSpecificationMap(paramMap, os);
+ Messages.outputResourceWithVelocity(out, locale, VIEW_SPECIFICATION_HTML, paramMap);
+ }
+
+ protected static void fillInFieldMappingSpecificationMap(Map<String, Object> paramMap, Specification os) {
+ String sModelPath = "";
+ String tModelPath = "";
+ String pModelPath = "";
+ String lModelPath = "";
+ String oModelPath = "";
+
+ for (int i = 0; i < os.getChildCount(); i++) {
+ SpecificationNode sn = os.getChild(i);
+ if (sn.getType().equals(OpenNlpExtractorConfig.NODE_SMODEL_PATH)) {
+ sModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+ if (sModelPath == null) {
+ sModelPath = "";
+ }
+ } else if (sn.getType().equals(OpenNlpExtractorConfig.NODE_TMODEL_PATH)) {
+ tModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+ if (tModelPath == null) {
+ tModelPath = "";
+ }
+ } else if (sn.getType().equals(OpenNlpExtractorConfig.NODE_PMODEL_PATH)) {
+ pModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+ if (pModelPath == null) {
+ pModelPath = "";
+ }
+ } else if (sn.getType().equals(OpenNlpExtractorConfig.NODE_LMODEL_PATH)) {
+ lModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+ if (lModelPath == null) {
+ lModelPath = "";
+ }
+ } else if (sn.getType().equals(OpenNlpExtractorConfig.NODE_OMODEL_PATH)) {
+ oModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+ if (oModelPath == null) {
+ oModelPath = "";
+ }
+ }
+
+ }
+ paramMap.put("SMODELPATH", sModelPath);
+ paramMap.put("TMODELPATH", tModelPath);
+ paramMap.put("PMODELPATH", pModelPath);
+ paramMap.put("LMODELPATH", lModelPath);
+ paramMap.put("OMODELPATH", oModelPath);
+ }
+
+ protected static class SpecPacker {
+
+ private final String sModelPath;
+ private final String tModelPath;
+ private final String pModelPath;
+ private final String lModelPath;
+ private final String oModelPath;
+
+ public SpecPacker(Specification os) {
+ String sModelPath = null;
+ String tModelPath = null;
+ String pModelPath = null;
+ String lModelPath = null;
+ String oModelPath = null;
+
+ for (int i = 0; i < os.getChildCount(); i++) {
+ SpecificationNode sn = os.getChild(i);
+
+ if (sn.getType().equals(OpenNlpExtractorConfig.NODE_SMODEL_PATH)) {
+ sModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+ }
+ if (sn.getType().equals(OpenNlpExtractorConfig.NODE_TMODEL_PATH)) {
+ tModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+ }
+ if (sn.getType().equals(OpenNlpExtractorConfig.NODE_PMODEL_PATH)) {
+ pModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+ }
+ if (sn.getType().equals(OpenNlpExtractorConfig.NODE_LMODEL_PATH)) {
+ lModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+ }
+ if (sn.getType().equals(OpenNlpExtractorConfig.NODE_OMODEL_PATH)) {
+ oModelPath = sn.getAttributeValue(OpenNlpExtractorConfig.ATTRIBUTE_VALUE);
+ }
+
+ }
+ this.sModelPath = sModelPath;
+ this.tModelPath = tModelPath;
+ this.pModelPath = pModelPath;
+ this.lModelPath = lModelPath;
+ this.oModelPath = oModelPath;
+ }
+
+ public String toPackedString() {
+ StringBuilder sb = new StringBuilder();
+
+ // extract nouns
+ if (sModelPath != null)
+ sb.append(sModelPath);
+ if (tModelPath != null)
+ sb.append(tModelPath);
+ if (pModelPath != null)
+ sb.append(pModelPath);
+ if (lModelPath != null)
+ sb.append(lModelPath);
+ if (oModelPath != null)
+ sb.append(oModelPath);
+
+ return sb.toString();
+ }
+
+ public String getSModelPath() {
+ return sModelPath;
+ }
+
+ public String getTModelPath() {
+ return tModelPath;
+ }
+
+ public String getPModelPath() {
+ return pModelPath;
+ }
+
+ public String getLModelPath() {
+ return lModelPath;
+ }
+
+ public String getOModelPath() {
+ return oModelPath;
+ }
- }
+ }
}
Modified: manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractorConfig.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractorConfig.java?rev=1726929&r1=1726928&r2=1726929&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractorConfig.java (original)
+++ manifoldcf/branches/CONNECTORS-1270/connectors/opennlp/connector/src/main/java/org/apache/manifoldcf/agents/transformation/opennlp/OpenNlpExtractorConfig.java Wed Jan 27 00:33:06 2016
@@ -32,11 +32,11 @@ import opennlp.tools.util.InvalidFormatE
public class OpenNlpExtractorConfig
{
- private static enum MODEL{
- SENTENCE, TOKENIZER, PEOPLE, LOCATIONS, ORGANIZATIONS;
- }
-
- // Specification nodes and values
+ private static enum MODEL{
+ SENTENCE, TOKENIZER, PEOPLE, LOCATIONS, ORGANIZATIONS;
+ }
+
+ // Specification nodes and values
public static final String NODE_SMODEL_PATH = "SModelPath";
public static final String NODE_TMODEL_PATH = "TModelPath";
public static final String NODE_PMODEL_PATH = "PModelPath";
@@ -52,45 +52,45 @@ public class OpenNlpExtractorConfig
private static TokenNameFinderModel oModel = null;
private static synchronized void initializeModel(MODEL m, String path) throws InvalidFormatException, FileNotFoundException, IOException{
- if(sModel == null && m == MODEL.SENTENCE)
- sModel = new SentenceModel(new FileInputStream(path));
- if(tModel == null && m == MODEL.TOKENIZER)
- tModel = new TokenizerModel(new FileInputStream(path));
- if(pModel == null && m == MODEL.PEOPLE)
- pModel = new TokenNameFinderModel(new FileInputStream(path));
- if(lModel == null && m == MODEL.LOCATIONS)
- lModel = new TokenNameFinderModel(new FileInputStream(path));
- if(oModel == null && m == MODEL.ORGANIZATIONS)
- oModel = new TokenNameFinderModel(new FileInputStream(path));
+ if(sModel == null && m == MODEL.SENTENCE)
+ sModel = new SentenceModel(new FileInputStream(path));
+ if(tModel == null && m == MODEL.TOKENIZER)
+ tModel = new TokenizerModel(new FileInputStream(path));
+ if(pModel == null && m == MODEL.PEOPLE)
+ pModel = new TokenNameFinderModel(new FileInputStream(path));
+ if(lModel == null && m == MODEL.LOCATIONS)
+ lModel = new TokenNameFinderModel(new FileInputStream(path));
+ if(oModel == null && m == MODEL.ORGANIZATIONS)
+ oModel = new TokenNameFinderModel(new FileInputStream(path));
}
public static final SentenceDetector sentenceDetector(String path) throws InvalidFormatException, FileNotFoundException, IOException{
- if(sModel == null)
- initializeModel(MODEL.SENTENCE, path);
+ if(sModel == null)
+ initializeModel(MODEL.SENTENCE, path);
return new SentenceDetectorME(sModel);
}
public static final Tokenizer tokenizer(String path) throws InvalidFormatException, FileNotFoundException, IOException{
- if(tModel == null)
- initializeModel(MODEL.TOKENIZER, path);
+ if(tModel == null)
+ initializeModel(MODEL.TOKENIZER, path);
return new TokenizerME(tModel);
}
public static final NameFinderME peopleFinder(String path) throws InvalidFormatException, FileNotFoundException, IOException{
- if(pModel == null)
- initializeModel(MODEL.PEOPLE, path);
+ if(pModel == null)
+ initializeModel(MODEL.PEOPLE, path);
return new NameFinderME(pModel);
}
public static final NameFinderME locationFinder(String path) throws InvalidFormatException, FileNotFoundException, IOException{
- if(lModel == null)
- initializeModel(MODEL.LOCATIONS, path);
+ if(lModel == null)
+ initializeModel(MODEL.LOCATIONS, path);
return new NameFinderME(lModel);
}
public static final NameFinderME organizationFinder(String path) throws InvalidFormatException, FileNotFoundException, IOException{
- if(oModel == null)
- initializeModel(MODEL.ORGANIZATIONS, path);
+ if(oModel == null)
+ initializeModel(MODEL.ORGANIZATIONS, path);
return new NameFinderME(oModel);
}