You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by le...@apache.org on 2018/02/28 04:43:11 UTC

[01/10] any23 git commit: ANY23-321 Add openie toggle functionality to service

Repository: any23
Updated Branches:
  refs/heads/master 66ce1241a -> 394d36a0c


ANY23-321 Add openie toggle functionality to service


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/706e891c
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/706e891c
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/706e891c

Branch: refs/heads/master
Commit: 706e891cf582736f90cfbe83bc1ef5d629e6dfd7
Parents: 0613280
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Wed Jan 3 00:05:39 2018 +0000
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Wed Jan 3 00:05:39 2018 +0000

----------------------------------------------------------------------
 .../apache/any23/extractor/ExtractorGroup.java  |   1 +
 .../apache/any23/plugin/Any23PluginManager.java |  23 +--
 core/src/main/java/org/apache/any23/Any23.java  |   8 +-
 .../any23/extractor/ExtractorRegistryImpl.java  |  11 +-
 openie/pom.xml                                  | 152 -----------------
 .../any23/extractor/openie/OpenIEExtractor.java | 130 ---------------
 .../openie/OpenIEExtractorFactory.java          |  52 ------
 .../org.apache.any23.extractor.ExtractorFactory |   1 -
 .../any23/openie/OpenIEExtractorTest.java       |  88 ----------
 .../htmlscraper/HTMLScraperExtractor.java       |  12 +-
 plugins/integration-test/pom.xml                |   5 +
 .../java/org/apache/any23/plugin/PluginIT.java  |  11 +-
 plugins/openie/pom.xml                          | 165 +++++++++++++++++++
 .../extractor/openie/OpenIEExtractor.java       | 137 +++++++++++++++
 .../openie/OpenIEExtractorFactory.java          |  52 ++++++
 .../org.apache.any23.extractor.ExtractorFactory |   1 +
 .../any23/openie/OpenIEExtractorTest.java       |  88 ++++++++++
 pom.xml                                         |   6 +-
 service/README.md                               |  49 ++++++
 service/README.txt                              |  50 ------
 service/pom.xml                                 |  84 +++++++++-
 .../java/org/apache/any23/servlet/Servlet.java  |  48 +++++-
 service/src/main/resources/form.html            |  59 ++++++-
 .../main/webapp/resources/js/bootstrap-modal.js |  22 ++-
 src/site/apt/any23-plugins.apt                  |   9 +-
 25 files changed, 734 insertions(+), 530 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/706e891c/api/src/main/java/org/apache/any23/extractor/ExtractorGroup.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/org/apache/any23/extractor/ExtractorGroup.java b/api/src/main/java/org/apache/any23/extractor/ExtractorGroup.java
index 9242ea6..4e77690 100644
--- a/api/src/main/java/org/apache/any23/extractor/ExtractorGroup.java
+++ b/api/src/main/java/org/apache/any23/extractor/ExtractorGroup.java
@@ -61,6 +61,7 @@ public class ExtractorGroup implements Iterable<ExtractorFactory<?>> {
         return new ExtractorGroup(matching);
     }
 
+    @Override
     public Iterator<ExtractorFactory<?>> iterator() {
         return factories.iterator();
     }

http://git-wip-us.apache.org/repos/asf/any23/blob/706e891c/api/src/main/java/org/apache/any23/plugin/Any23PluginManager.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/org/apache/any23/plugin/Any23PluginManager.java b/api/src/main/java/org/apache/any23/plugin/Any23PluginManager.java
index 5898210..3cd0829 100644
--- a/api/src/main/java/org/apache/any23/plugin/Any23PluginManager.java
+++ b/api/src/main/java/org/apache/any23/plugin/Any23PluginManager.java
@@ -116,7 +116,7 @@ public class Any23PluginManager {
      * @return list of exceptions raised during the loading.
      */
     public synchronized Throwable[] loadJARs(File... jars) {
-        final List<Throwable> result = new ArrayList<Throwable>();
+        final List<Throwable> result = new ArrayList<>();
         for (File jar : jars) {
             try {
                 loadJAR(jar);
@@ -158,7 +158,7 @@ public class Any23PluginManager {
      * @return  list of exceptions raised during the loading.
      */
     public synchronized Throwable[] loadClassDirs(File... classDirs) {
-        final List<Throwable> result = new ArrayList<Throwable>();
+        final List<Throwable> result = new ArrayList<>();
         for (File classDir : classDirs) {
             try {
                 loadClassDir(classDir);
@@ -178,14 +178,15 @@ public class Any23PluginManager {
      * Loads all the JARs detected in a given directory.
      *
      * @param jarDir directory containing the JARs to be loaded.
+     *     Example '/usr/local/apache-tomcat-7.0.72/webapps/apache-any23-service-2.2-SNAPSHOT/WEB-INF/lib/apache-any23-openie'
      * @return <code>true</code> if all JARs in dir are loaded.
      */
     public synchronized boolean loadJARDir(File jarDir) {
         if(jarDir == null)
             throw new NullPointerException("JAR dir must be not null.");
-        if(  ! jarDir.exists() )
+        if(!jarDir.exists() )
             throw new IllegalArgumentException("Given directory doesn't exist:" + jarDir.getAbsolutePath());
-        if(! jarDir.isDirectory() )
+        if(!jarDir.isDirectory() )
             throw new IllegalArgumentException(
                     "given file exists and it is not a directory: " + jarDir.getAbsolutePath()
             );
@@ -210,7 +211,7 @@ public class Any23PluginManager {
      * @return list of errors occurred during loading.
      */
     public synchronized Throwable[] loadFiles(File... files) {
-        final List<Throwable> errors = new ArrayList<Throwable>();
+        final List<Throwable> errors = new ArrayList<>();
         for(File file : files) {
             try {
                 if (file.isFile() && file.getName().endsWith(".jar")) {
@@ -263,6 +264,7 @@ public class Any23PluginManager {
      * @return not <code>null</code> list of plugin classes.
      * @throws IOException if there is an error obtaining Extractors.
      */
+    @SuppressWarnings("rawtypes")
     public synchronized Iterator<ExtractorFactory> getExtractors() throws IOException {
         return getPlugins(ExtractorFactory.class);
     }
@@ -312,7 +314,8 @@ public class Any23PluginManager {
 
         final StringBuilder report = new StringBuilder();
         try {
-            final List<ExtractorFactory<?>> newFactoryList = new ArrayList<ExtractorFactory<?>>();
+            final List<ExtractorFactory<?>> newFactoryList = new ArrayList<>();
+            @SuppressWarnings("rawtypes")
             Iterator<ExtractorFactory> extractors = getExtractors();
             while (extractors.hasNext()) {
                 ExtractorFactory<?> factory = extractors.next();
@@ -386,7 +389,7 @@ public class Any23PluginManager {
      */
     private File[] getPluginLocations(String pluginDirsList) {
         final String[] locationsStr = pluginDirsList.split(PLUGIN_DIRS_LIST_SEPARATOR);
-        final List<File> locations = new ArrayList<File>();
+        final List<File> locations = new ArrayList<>();
         for(String locationStr : locationsStr) {
             final File location = new File(locationStr);
             if( ! location.exists()) {
@@ -404,7 +407,7 @@ public class Any23PluginManager {
      */
     private static final class DynamicClassLoader extends URLClassLoader {
 
-        private final Set<String> addedURLs = new HashSet<String>();
+        private final Set<String> addedURLs = new HashSet<>();
 
         private final List<File> jars;
 
@@ -412,8 +415,8 @@ public class Any23PluginManager {
 
         public DynamicClassLoader(URL[] urls) {
             super(urls, Any23PluginManager.class.getClassLoader());
-            jars = new ArrayList<File>();
-            dirs = new ArrayList<File>();
+            jars = new ArrayList<>();
+            dirs = new ArrayList<>();
         }
 
         public DynamicClassLoader() {

http://git-wip-us.apache.org/repos/asf/any23/blob/706e891c/core/src/main/java/org/apache/any23/Any23.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/Any23.java b/core/src/main/java/org/apache/any23/Any23.java
index 9be8a28..cba13d8 100644
--- a/core/src/main/java/org/apache/any23/Any23.java
+++ b/core/src/main/java/org/apache/any23/Any23.java
@@ -98,7 +98,8 @@ public class Any23 {
      * @param extractorGroup the group of extractors to be applied.
      */
     public Any23(Configuration configuration, ExtractorGroup extractorGroup) {
-        if(configuration == null) throw new NullPointerException("configuration must be not null.");
+        if(configuration == null)
+            throw new NullPointerException("configuration must be not null.");
         this.configuration = configuration;
         logger.debug( configuration.getConfigurationDump() );
 
@@ -259,7 +260,8 @@ public class Any23 {
      * @throws IOException if an error occurs while initializing the internal {@link org.apache.any23.http.HTTPClient}.
      */
     public DocumentSource createDocumentSource(String documentIRI) throws URISyntaxException, IOException {
-        if(documentIRI == null) throw new NullPointerException("documentIRI cannot be null.");
+        if(documentIRI == null)
+            throw new NullPointerException("documentIRI cannot be null.");
         if (documentIRI.toLowerCase().startsWith("file:")) {
             return new FileDocumentSource( new File(new URI(documentIRI)) );
         }
@@ -453,7 +455,7 @@ public class Any23 {
     }
 
     private String getAcceptHeader() {
-        Collection<MIMEType> mimeTypes = new ArrayList<MIMEType>();
+        Collection<MIMEType> mimeTypes = new ArrayList<>();
         for (ExtractorFactory<?> factory : factories) {
             mimeTypes.addAll(factory.getSupportedMIMETypes());
         }

http://git-wip-us.apache.org/repos/asf/any23/blob/706e891c/core/src/main/java/org/apache/any23/extractor/ExtractorRegistryImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/ExtractorRegistryImpl.java b/core/src/main/java/org/apache/any23/extractor/ExtractorRegistryImpl.java
index 86dc982..ca3bb98 100644
--- a/core/src/main/java/org/apache/any23/extractor/ExtractorRegistryImpl.java
+++ b/core/src/main/java/org/apache/any23/extractor/ExtractorRegistryImpl.java
@@ -30,9 +30,15 @@ import java.util.List;
  *  Singleton class acting as a register for all the various
  *  {@link Extractor}.
  */
+@SuppressWarnings("rawtypes")
 public class ExtractorRegistryImpl extends org.eclipse.rdf4j.common.lang.service.ServiceRegistry<String, ExtractorFactory> implements ExtractorRegistry {
 
     /**
+     * The instance.
+     */
+    private static ExtractorRegistry instance = null;
+
+    /**
      * Public constructor for ExtractorRegistryImpl. Should normally call getInstance.
      */
     public ExtractorRegistryImpl() {
@@ -40,11 +46,6 @@ public class ExtractorRegistryImpl extends org.eclipse.rdf4j.common.lang.service
     }
 
     /**
-     * The instance.
-     */
-    private static ExtractorRegistry instance = null;
-
-    /**
      * @return returns the {@link ExtractorRegistry} instance.
      */
     public static ExtractorRegistry getInstance() {

http://git-wip-us.apache.org/repos/asf/any23/blob/706e891c/openie/pom.xml
----------------------------------------------------------------------
diff --git a/openie/pom.xml b/openie/pom.xml
deleted file mode 100644
index 7440812..0000000
--- a/openie/pom.xml
+++ /dev/null
@@ -1,152 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-   http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
--->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-
-  <parent>
-    <artifactId>apache-any23</artifactId>
-    <groupId>org.apache.any23</groupId>
-    <version>2.2-SNAPSHOT</version>
-    <relativePath>../</relativePath>
-  </parent>
-
-  <repositories>
-    <repository>
-      <snapshots>
-        <enabled>false</enabled>
-      </snapshots>
-      <id>bintray-allenai-maven</id>
-      <name>bintray</name>
-      <url>http://allenai.bintray.com/maven</url>
-    </repository>
-  </repositories>
-  <pluginRepositories>
-    <pluginRepository>
-      <snapshots>
-        <enabled>false</enabled>
-      </snapshots>
-      <id>bintray-allenai-maven</id>
-      <name>bintray-plugins</name>
-      <url>http://allenai.bintray.com/maven</url>
-    </pluginRepository>
-  </pluginRepositories>
-
-  <artifactId>apache-any23-openie</artifactId>
-
-  <name>Apache Any23 :: OpenIE</name>
-  <description>Open Information Extraction module.</description>
-
-  <dependencies>
-    <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>apache-any23-core</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>apache-any23-test-resources</artifactId>
-      <version>${project.version}</version>
-      <scope>test</scope>
-      <type>test-jar</type>
-    </dependency>
-    <dependency>
-      <groupId>org.allenai.openie</groupId>
-      <artifactId>openie_2.11</artifactId>
-      <version>4.2.6</version>
-      <scope>compile</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.allenai.openie</groupId>
-      <artifactId>openie_2.11</artifactId>
-      <version>4.2.6</version>
-      <scope>compile</scope>
-      <type>pom</type>
-    </dependency>
-    <dependency>
-      <groupId>edu.washington.cs.knowitall</groupId>
-      <artifactId>openregex</artifactId>
-      <version>1.1.1</version>
-      <scope>runtime</scope>
-    </dependency>
-    <dependency>
-      <groupId>junit</groupId>
-      <artifactId>junit</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.slf4j</groupId>
-      <artifactId>slf4j-log4j12</artifactId>
-      <scope>test</scope>
-    </dependency>
-  </dependencies>
-
-  <build>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-surefire-plugin</artifactId>
-        <configuration>
-          <skipTests>true</skipTests>
-        </configuration>
-      </plugin>
-    </plugins>
-    <pluginManagement>
-      <plugins>
-        <plugin>
-          <groupId>org.apache.maven.plugins</groupId>
-          <artifactId>maven-assembly-plugin</artifactId>
-          <version>${maven-assembly-plugin.version}</version>
-          <executions>
-            <execution>
-              <id>assembly</id>
-              <phase>package</phase>
-              <goals>
-                <goal>single</goal>
-              </goals>
-            </execution>
-          </executions>
-          <configuration>
-            <attach>true</attach>
-            <skipAssembly>true</skipAssembly>
-            <tarLongFileMode>gnu</tarLongFileMode>
-          </configuration>
-        </plugin>
-      </plugins>
-    </pluginManagement>
-  </build>
-
-  <profiles>
-    <profile>
-      <id>release</id>
-      <build>
-        <resources>
-          <resource>
-            <directory>${basedir}/../</directory>
-            <targetPath>${project.build.directory}/apidocs/META-INF</targetPath>
-            <includes>
-              <include>LICENSE.txt</include>
-              <include>NOTICE.txt</include>
-            </includes>
-          </resource>
-        </resources>
-      </build>
-    </profile>
-
-  </profiles>
-
-</project>

http://git-wip-us.apache.org/repos/asf/any23/blob/706e891c/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractor.java
----------------------------------------------------------------------
diff --git a/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractor.java b/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractor.java
deleted file mode 100644
index 812ed9c..0000000
--- a/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractor.java
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.any23.extractor.openie;
-
-import java.io.IOException;
-import java.util.List;
-
-import javax.xml.transform.TransformerConfigurationException;
-import javax.xml.transform.TransformerFactoryConfigurationError;
-
-import org.apache.any23.extractor.Extractor;
-import org.apache.any23.configuration.Configuration;
-import org.apache.any23.configuration.DefaultConfiguration;
-import org.apache.any23.extractor.ExtractionContext;
-import org.apache.any23.extractor.ExtractorDescription;
-import org.apache.any23.rdf.RDFUtils;
-import org.apache.any23.util.StreamUtils;
-import org.apache.tika.Tika;
-import org.apache.tika.exception.TikaException;
-import org.eclipse.rdf4j.model.IRI;
-import org.eclipse.rdf4j.model.Resource;
-import org.eclipse.rdf4j.model.Value;
-import org.eclipse.rdf4j.model.vocabulary.RDF;
-import org.eclipse.rdf4j.model.vocabulary.RDFS;
-import org.apache.any23.extractor.ExtractionException;
-import org.apache.any23.extractor.ExtractionParameters;
-import org.apache.any23.extractor.ExtractionResult;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.w3c.dom.Document;
-
-import edu.knowitall.openie.Argument;
-import edu.knowitall.openie.Instance;
-import edu.knowitall.openie.OpenIE;
-import edu.knowitall.tool.parse.ClearParser;
-import edu.knowitall.tool.postag.ClearPostagger;
-import edu.knowitall.tool.srl.ClearSrl;
-import edu.knowitall.tool.tokenize.ClearTokenizer;
-import scala.collection.JavaConversions;
-import scala.collection.Seq;
-
-/**
- * An <a href="https://github.com/allenai/openie-standalone">OpenIE</a> 
- * extractor able to generate <i>RDF</i> statements from 
- * sentences representing relations in the text.
- */
-public class OpenIEExtractor implements Extractor.TagSoupDOMExtractor {
-
-    private static final Logger LOG = LoggerFactory.getLogger(OpenIEExtractor.class);
-
-    private IRI documentRoot;
-
-    /**
-     * default constructor
-     */
-    public OpenIEExtractor() {
-        // default constructor
-    }
-
-    /**
-     * @see org.apache.any23.extractor.Extractor#getDescription()
-     */
-    @Override
-    public ExtractorDescription getDescription() {
-        return OpenIEExtractorFactory.getDescriptionInstance();
-    }
-
-    @Override
-    public void run(ExtractionParameters extractionParameters,
-            ExtractionContext context, Document in, ExtractionResult out)
-                    throws IOException, ExtractionException {
-
-        IRI documentIRI = context.getDocumentIRI();
-        documentRoot = RDFUtils.iri(documentIRI.toString() + "root");
-        out.writeNamespace(RDF.PREFIX, RDF.NAMESPACE);
-        out.writeNamespace(RDFS.PREFIX, RDFS.NAMESPACE);
-        LOG.debug("Processing: {}", documentIRI.toString());
-
-        OpenIE openIE = new OpenIE(
-                new ClearParser(
-                        new ClearPostagger(
-                                new ClearTokenizer())), new ClearSrl(), false, false);
-
-        Seq<Instance> extractions = null;
-        Tika tika = new Tika();
-        try {
-            extractions = openIE.extract(tika.parseToString(StreamUtils.documentToInputStream(in)));
-        } catch (TransformerConfigurationException | TransformerFactoryConfigurationError e) {
-            LOG.error("Encountered error during OpenIE extraction.", e);
-        } catch (TikaException e) {
-            LOG.error("Encountered error whilst parsing InputStream with Tika.", e);
-        }
-
-        List<Instance> listExtractions = JavaConversions.seqAsJavaList(extractions);
-        // for each extraction instance we can obtain a number of extraction elements
-        // instance.confidence() - a confidence value for the extraction itself
-        // instance.extr().context() - an optional representation of the context for this extraction
-        // instance.extr().arg1().text() - subject
-        // instance.extr().rel().text() - predicate
-        // instance.extr().arg2s().text() - object
-        final Configuration immutableConf = DefaultConfiguration.singleton();
-        Double threshold = Double.parseDouble(immutableConf.getProperty("any23.extraction.openie.confidence.threshold", "0.5"));
-        for(Instance instance : listExtractions) {
-            if (instance.confidence() > threshold) {
-                List<Argument> listArg2s = JavaConversions.seqAsJavaList(instance.extr().arg2s());
-                for(Argument argument : listArg2s) {
-                    Resource subject = RDFUtils.makeIRI(instance.extr().arg1().text(), documentIRI);
-                    IRI predicate = (IRI) RDFUtils.makeIRI(instance.extr().rel().text(), documentIRI);
-                    Value object = RDFUtils.toValue(argument.text());
-                    out.writeTriple(subject, predicate, object);
-                }
-            }
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/any23/blob/706e891c/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractorFactory.java
----------------------------------------------------------------------
diff --git a/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractorFactory.java b/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractorFactory.java
deleted file mode 100644
index 31760d2..0000000
--- a/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractorFactory.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.any23.extractor.openie;
-
-import java.util.Arrays;
-
-import org.apache.any23.extractor.ExtractorDescription;
-import org.apache.any23.extractor.ExtractorFactory;
-import org.apache.any23.extractor.SimpleExtractorFactory;
-import org.apache.any23.rdf.Prefixes;
-
-/**
- * @author lewismc
- *
- */
-public class OpenIEExtractorFactory extends SimpleExtractorFactory<OpenIEExtractor>
-    implements ExtractorFactory<OpenIEExtractor> {
-
-    public static final String NAME = "openie";
-
-    public static final Prefixes prefixes = null;
-
-    private static final ExtractorDescription descriptionInstance = new OpenIEExtractorFactory();
-
-    public OpenIEExtractorFactory() {
-        super(NAME, prefixes, Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"), "example-openie.html");
-    }
-
-    @Override
-    public OpenIEExtractor createExtractor() {
-        return new OpenIEExtractor();
-    }
-
-    public static ExtractorDescription getDescriptionInstance() {
-        return descriptionInstance;
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/any23/blob/706e891c/openie/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory
----------------------------------------------------------------------
diff --git a/openie/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory b/openie/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory
deleted file mode 100644
index 4faf7ce..0000000
--- a/openie/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory
+++ /dev/null
@@ -1 +0,0 @@
-org.apache.any23.extractor.openie.OpenIEExtractorFactory
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/706e891c/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java
----------------------------------------------------------------------
diff --git a/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java b/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java
deleted file mode 100644
index 9455311..0000000
--- a/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.any23.openie;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-
-import org.apache.any23.extractor.ExtractionContext;
-import org.apache.any23.extractor.ExtractionException;
-import org.apache.any23.extractor.ExtractionParameters;
-import org.apache.any23.extractor.ExtractionResult;
-import org.apache.any23.extractor.ExtractionResultImpl;
-import org.apache.any23.extractor.openie.OpenIEExtractor;
-import org.apache.any23.rdf.RDFUtils;
-import org.apache.any23.util.StreamUtils;
-import org.apache.any23.writer.RDFXMLWriter;
-import org.apache.any23.writer.TripleHandler;
-import org.apache.any23.writer.TripleHandlerException;
-import org.eclipse.rdf4j.model.IRI;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Test;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * @author lewismc
- *
- */
-public class OpenIEExtractorTest {
-
-    private static final Logger logger = LoggerFactory.getLogger(OpenIEExtractorTest.class);
-
-    private OpenIEExtractor extractor;
-
-    @Before
-    public void setUp() throws Exception {
-        extractor = new OpenIEExtractor();
-    }
-
-    @After
-    public void tearDown() throws Exception {
-        extractor = null;
-    }
-
-    @Test
-    public void testExtractFromHTMLDocument() 
-      throws IOException, ExtractionException, TripleHandlerException {
-        final IRI uri = RDFUtils.iri("http://podaac.jpl.nasa.gov/aquarius");
-        extract(uri, "/org/apache/any23/extractor/openie/example-openie.html");
-    }
-    
-    public void extract(IRI uri, String filePath) 
-      throws IOException, ExtractionException, TripleHandlerException {
-      FileOutputStream fos = new FileOutputStream(File.createTempFile("OpenIEExtractorTest", "tmp"));
-      final TripleHandler tHandler = new RDFXMLWriter(fos);
-      final ExtractionContext extractionContext = new ExtractionContext("rdf-openie", uri);
-      final ExtractionResult result = new ExtractionResultImpl(extractionContext, extractor, tHandler);
-      try {
-        extractor.run(
-                ExtractionParameters.newDefault(),
-                extractionContext,
-                StreamUtils.inputStreamToDocument(this.getClass().getResourceAsStream(filePath)),
-                result
-        );
-      } finally {
-        logger.debug(fos.toString());
-        tHandler.close();
-        result.close();
-      }
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/any23/blob/706e891c/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractor.java
----------------------------------------------------------------------
diff --git a/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractor.java b/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractor.java
index ab7d34a..94a3210 100644
--- a/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractor.java
+++ b/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractor.java
@@ -46,16 +46,16 @@ import java.util.List;
  */
 public class HTMLScraperExtractor implements Extractor.ContentExtractor {
 
-    public final static IRI PAGE_CONTENT_DE_PROPERTY  =
+    public static final IRI PAGE_CONTENT_DE_PROPERTY  =
             SimpleValueFactory.getInstance().createIRI(SINDICE.NS + "pagecontent/de");
-    public final static IRI PAGE_CONTENT_AE_PROPERTY  =
+    public static final IRI PAGE_CONTENT_AE_PROPERTY  =
             SimpleValueFactory.getInstance().createIRI(SINDICE.NS + "pagecontent/ae");
-    public final static IRI PAGE_CONTENT_LCE_PROPERTY =
+    public static final IRI PAGE_CONTENT_LCE_PROPERTY =
             SimpleValueFactory.getInstance().createIRI(SINDICE.NS + "pagecontent/lce");
-    public final static IRI PAGE_CONTENT_CE_PROPERTY  =
+    public static final IRI PAGE_CONTENT_CE_PROPERTY  =
             SimpleValueFactory.getInstance().createIRI(SINDICE.NS + "pagecontent/ce");
 
-    private final List<ExtractionRule> extractionRules = new ArrayList<ExtractionRule>();
+    private final List<ExtractionRule> extractionRules = new ArrayList<>();
 
     public HTMLScraperExtractor() {
         loadDefaultRules();
@@ -66,7 +66,7 @@ public class HTMLScraperExtractor implements Extractor.ContentExtractor {
     }
 
     public String[] getTextExtractors() {
-        final List<String> extractors = new ArrayList<String>();
+        final List<String> extractors = new ArrayList<>();
         for(ExtractionRule er : extractionRules) {
             extractors.add(er.name);
         }

http://git-wip-us.apache.org/repos/asf/any23/blob/706e891c/plugins/integration-test/pom.xml
----------------------------------------------------------------------
diff --git a/plugins/integration-test/pom.xml b/plugins/integration-test/pom.xml
index ab062cd..c99a7e0 100644
--- a/plugins/integration-test/pom.xml
+++ b/plugins/integration-test/pom.xml
@@ -57,6 +57,11 @@
       <artifactId>apache-any23-basic-crawler</artifactId>
       <version>1.0.6-SNAPSHOT</version>
     </dependency>
+    <dependency>
+      <groupId>org.apache.any23.plugins</groupId>
+      <artifactId>apache-any23-openie</artifactId>
+      <version>${project.parent.version}</version>
+    </dependency>
 
     <!-- BEGIN: Test Dependencies -->
     <dependency>

http://git-wip-us.apache.org/repos/asf/any23/blob/706e891c/plugins/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java
----------------------------------------------------------------------
diff --git a/plugins/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java b/plugins/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java
index 1b69463..e8e4505 100644
--- a/plugins/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java
+++ b/plugins/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java
@@ -56,6 +56,9 @@ public class PluginIT {
     private static final File CRAWLER_TARGET_DIR     = new File(PLUGIN_DIR + "basic-crawler/target/classes");
     private static final File CRAWLER_DEPENDENCY_DIR = new File(PLUGIN_DIR + "basic-crawler/target/dependency");
 
+    private static final File OPENIE_TARGET_DIR     = new File(PLUGIN_DIR + "openie/target/classes");
+    private static final File OPENIE_DEPENDENCY_DIR = new File(PLUGIN_DIR + "openie/target/dependency");
+
     private Any23PluginManager manager;
 
     @Before
@@ -79,13 +82,15 @@ public class PluginIT {
     public void testDetectExtractorPlugins() throws IOException, InstantiationException, IllegalAccessException {
         final ExtractorGroup extractorGroup = manager.getApplicableExtractors(
                 new ExtractorRegistryImpl(),
-                HTML_SCRAPER_TARGET_DIR,  // Required to satisfy class dependencies.
+                HTML_SCRAPER_TARGET_DIR,
                 HTML_SCRAPER_DEPENDENCY_DIR,
                 OFFICE_SCRAPER_TARGET_DIR,
-                OFFICE_SCRAPER_DEPENDENCY_DIR // Required to satisfy class dependencies.
+                OFFICE_SCRAPER_DEPENDENCY_DIR,
+                OPENIE_TARGET_DIR,
+                OPENIE_DEPENDENCY_DIR
         );
         try {
-          Class.forName("org.apache.any23.extractor.openie.OpenIEExtractor", false, this.getClass().getClassLoader());
+          Class.forName("org.apache.any23.plugin.extractor.openie.OpenIEExtractor", false, this.getClass().getClassLoader());
           assertEquals("Did not find the number of expected extractors", NUM_OF_EXTRACTORS_INCL_OPENIE ,
                   extractorGroup.getNumOfExtractors()
           );

http://git-wip-us.apache.org/repos/asf/any23/blob/706e891c/plugins/openie/pom.xml
----------------------------------------------------------------------
diff --git a/plugins/openie/pom.xml b/plugins/openie/pom.xml
new file mode 100644
index 0000000..64c6806
--- /dev/null
+++ b/plugins/openie/pom.xml
@@ -0,0 +1,165 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.any23</groupId>
+    <artifactId>apache-any23</artifactId>
+    <version>2.2-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.any23.plugins</groupId>
+  <artifactId>apache-any23-openie</artifactId>
+
+  <name>Apache Any23 :: Plugins :: OpenIE</name>
+  <description>Open Information Extraction module.</description>
+
+  <repositories>
+    <repository>
+      <snapshots>
+        <enabled>false</enabled>
+      </snapshots>
+      <id>bintray-allenai-maven</id>
+      <name>bintray</name>
+      <url>http://allenai.bintray.com/maven</url>
+    </repository>
+  </repositories>
+  <pluginRepositories>
+    <pluginRepository>
+      <snapshots>
+        <enabled>false</enabled>
+      </snapshots>
+      <id>bintray-allenai-maven</id>
+      <name>bintray-plugins</name>
+      <url>http://allenai.bintray.com/maven</url>
+    </pluginRepository>
+  </pluginRepositories>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.any23</groupId>
+      <artifactId>apache-any23-core</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.any23</groupId>
+      <artifactId>apache-any23-test-resources</artifactId>
+      <version>${project.version}</version>
+      <scope>test</scope>
+      <type>test-jar</type>
+    </dependency>
+    <dependency>
+      <groupId>org.allenai.openie</groupId>
+      <artifactId>openie_2.11</artifactId>
+      <version>${openie_2.11.version}</version>
+      <scope>compile</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.allenai.openie</groupId>
+      <artifactId>openie_2.11</artifactId>
+      <version>${openie_2.11.version}</version>
+      <scope>compile</scope>
+      <type>pom</type>
+    </dependency>
+    <dependency>
+      <groupId>edu.washington.cs.knowitall</groupId>
+      <artifactId>openregex</artifactId>
+      <version>${openregex.version}</version>
+      <scope>runtime</scope>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <configuration>
+          <skipTests>true</skipTests>
+        </configuration>
+      </plugin>
+      <!-- Generates the distribution package -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-assembly-plugin</artifactId>
+        <configuration>
+          <appendAssemblyId>false</appendAssemblyId>
+          <descriptors>
+            <descriptor>${basedir}/src/main/assembly/bin.xml</descriptor>
+          </descriptors>
+        </configuration>
+      </plugin>
+    </plugins>
+    <pluginManagement>
+      <plugins>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-assembly-plugin</artifactId>
+          <version>${maven-assembly-plugin.version}</version>
+          <executions>
+            <execution>
+              <id>assembly</id>
+              <phase>package</phase>
+              <goals>
+                <goal>single</goal>
+              </goals>
+            </execution>
+          </executions>
+          <configuration>
+            <attach>true</attach>
+            <skipAssembly>true</skipAssembly>
+            <tarLongFileMode>gnu</tarLongFileMode>
+          </configuration>
+        </plugin>
+      </plugins>
+    </pluginManagement>
+  </build>
+
+  <profiles>
+    <profile>
+      <id>release</id>
+      <build>
+        <resources>
+          <resource>
+            <directory>${basedir}/../</directory>
+            <targetPath>${project.build.directory}/apidocs/META-INF</targetPath>
+            <includes>
+              <include>LICENSE.txt</include>
+              <include>NOTICE.txt</include>
+            </includes>
+          </resource>
+        </resources>
+      </build>
+    </profile>
+
+  </profiles>
+
+</project>

http://git-wip-us.apache.org/repos/asf/any23/blob/706e891c/plugins/openie/src/main/java/org/apache/any23/plugin/extractor/openie/OpenIEExtractor.java
----------------------------------------------------------------------
diff --git a/plugins/openie/src/main/java/org/apache/any23/plugin/extractor/openie/OpenIEExtractor.java b/plugins/openie/src/main/java/org/apache/any23/plugin/extractor/openie/OpenIEExtractor.java
new file mode 100644
index 0000000..1b6a9cf
--- /dev/null
+++ b/plugins/openie/src/main/java/org/apache/any23/plugin/extractor/openie/OpenIEExtractor.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.any23.plugin.extractor.openie;
+
+import java.io.IOException;
+import java.util.List;
+
+import javax.xml.transform.TransformerConfigurationException;
+import javax.xml.transform.TransformerFactoryConfigurationError;
+
+import org.apache.any23.extractor.Extractor;
+import org.apache.any23.configuration.Configuration;
+import org.apache.any23.configuration.DefaultConfiguration;
+import org.apache.any23.extractor.ExtractionContext;
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.plugin.Author;
+import org.apache.any23.plugin.ExtractorPlugin;
+import org.apache.any23.rdf.RDFUtils;
+import org.apache.any23.util.StreamUtils;
+import org.apache.tika.Tika;
+import org.apache.tika.exception.TikaException;
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.Resource;
+import org.eclipse.rdf4j.model.Value;
+import org.eclipse.rdf4j.model.vocabulary.RDF;
+import org.eclipse.rdf4j.model.vocabulary.RDFS;
+import org.apache.any23.extractor.ExtractionException;
+import org.apache.any23.extractor.ExtractionParameters;
+import org.apache.any23.extractor.ExtractionResult;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.w3c.dom.Document;
+
+import edu.knowitall.openie.Argument;
+import edu.knowitall.openie.Instance;
+import edu.knowitall.openie.OpenIE;
+import edu.knowitall.tool.parse.ClearParser;
+import edu.knowitall.tool.postag.ClearPostagger;
+import edu.knowitall.tool.srl.ClearSrl;
+import edu.knowitall.tool.tokenize.ClearTokenizer;
+import scala.collection.JavaConversions;
+import scala.collection.Seq;
+
+/**
+ * An <a href="https://github.com/allenai/openie-standalone">OpenIE</a> 
+ * extractor able to generate <i>RDF</i> statements from 
+ * sentences representing relations in the text.
+ */
+@Author(name="Lewis John McGibbney (lewismc@apache.org)")
+public class OpenIEExtractor implements Extractor.TagSoupDOMExtractor, ExtractorPlugin {
+
+    private static final Logger LOG = LoggerFactory.getLogger(OpenIEExtractor.class);
+
+    /**
+     * default constructor
+     */
+    public OpenIEExtractor() {
+        // default constructor
+    }
+
+    /**
+     * @see org.apache.any23.extractor.Extractor#getDescription()
+     */
+    @Override
+    public ExtractorDescription getDescription() {
+        return OpenIEExtractorFactory.getDescriptionInstance();
+    }
+
+    @Override
+    public void run(ExtractionParameters extractionParameters,
+            ExtractionContext context, Document in, ExtractionResult out)
+                    throws IOException, ExtractionException {
+
+        IRI documentIRI = context.getDocumentIRI();
+        RDFUtils.iri(documentIRI.toString() + "root");
+        out.writeNamespace(RDF.PREFIX, RDF.NAMESPACE);
+        out.writeNamespace(RDFS.PREFIX, RDFS.NAMESPACE);
+        LOG.debug("Processing: {}", documentIRI.toString());
+
+        OpenIE openIE = new OpenIE(
+                new ClearParser(
+                        new ClearPostagger(
+                                new ClearTokenizer())), new ClearSrl(), false, false);
+
+        Seq<Instance> extractions = null;
+        Tika tika = new Tika();
+        try {
+            extractions = openIE.extract(tika.parseToString(StreamUtils.documentToInputStream(in)));
+        } catch (TransformerConfigurationException | TransformerFactoryConfigurationError e) {
+            LOG.error("Encountered error during OpenIE extraction.", e);
+        } catch (TikaException e) {
+            LOG.error("Encountered error whilst parsing InputStream with Tika.", e);
+        }
+
+        List<Instance> listExtractions = JavaConversions.seqAsJavaList(extractions);
+        // for each extraction instance we can obtain a number of extraction elements
+        // instance.confidence() - a confidence value for the extraction itself
+        // instance.extr().context() - an optional representation of the context for this extraction
+        // instance.extr().arg1().text() - subject
+        // instance.extr().rel().text() - predicate
+        // instance.extr().arg2s().text() - object
+        final Configuration immutableConf = DefaultConfiguration.singleton();
+        Double threshold = Double.parseDouble(immutableConf.getProperty("any23.extraction.openie.confidence.threshold", "0.5"));
+        for(Instance instance : listExtractions) {
+            if (instance.confidence() > threshold) {
+                List<Argument> listArg2s = JavaConversions.seqAsJavaList(instance.extr().arg2s());
+                for(Argument argument : listArg2s) {
+                    Resource subject = RDFUtils.makeIRI(instance.extr().arg1().text(), documentIRI);
+                    IRI predicate = (IRI) RDFUtils.makeIRI(instance.extr().rel().text(), documentIRI);
+                    Value object = RDFUtils.toValue(argument.text());
+                    out.writeTriple(subject, predicate, object);
+                }
+            }
+        }
+    }
+
+    @Override
+    public ExtractorFactory<?> getExtractorFactory() {
+      return (ExtractorFactory<?>) OpenIEExtractorFactory.getDescriptionInstance();
+    }
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/706e891c/plugins/openie/src/main/java/org/apache/any23/plugin/extractor/openie/OpenIEExtractorFactory.java
----------------------------------------------------------------------
diff --git a/plugins/openie/src/main/java/org/apache/any23/plugin/extractor/openie/OpenIEExtractorFactory.java b/plugins/openie/src/main/java/org/apache/any23/plugin/extractor/openie/OpenIEExtractorFactory.java
new file mode 100644
index 0000000..1c86c62
--- /dev/null
+++ b/plugins/openie/src/main/java/org/apache/any23/plugin/extractor/openie/OpenIEExtractorFactory.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.any23.plugin.extractor.openie;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.Prefixes;
+
+/**
+ * @author lewismc
+ *
+ */
+public class OpenIEExtractorFactory extends SimpleExtractorFactory<OpenIEExtractor>
+        implements ExtractorFactory<OpenIEExtractor> {
+
+    public static final String NAME = "openie";
+
+    public static final Prefixes prefixes = null;
+
+    private static final ExtractorDescription descriptionInstance = new OpenIEExtractorFactory();
+
+    public OpenIEExtractorFactory() {
+        super(NAME, prefixes, Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"), "example-openie.html");
+    }
+
+    @Override
+    public OpenIEExtractor createExtractor() {
+        return new OpenIEExtractor();
+    }
+
+    public static ExtractorDescription getDescriptionInstance() {
+        return descriptionInstance;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/706e891c/plugins/openie/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory
----------------------------------------------------------------------
diff --git a/plugins/openie/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory b/plugins/openie/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory
new file mode 100644
index 0000000..10ebf16
--- /dev/null
+++ b/plugins/openie/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory
@@ -0,0 +1 @@
+org.apache.any23.plugin.extractor.openie.OpenIEExtractorFactory
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/706e891c/plugins/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java
----------------------------------------------------------------------
diff --git a/plugins/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java b/plugins/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java
new file mode 100644
index 0000000..dcc4e8f
--- /dev/null
+++ b/plugins/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.any23.openie;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+import org.apache.any23.extractor.ExtractionContext;
+import org.apache.any23.extractor.ExtractionException;
+import org.apache.any23.extractor.ExtractionParameters;
+import org.apache.any23.extractor.ExtractionResult;
+import org.apache.any23.extractor.ExtractionResultImpl;
+import org.apache.any23.plugin.extractor.openie.OpenIEExtractor;
+import org.apache.any23.rdf.RDFUtils;
+import org.apache.any23.util.StreamUtils;
+import org.apache.any23.writer.RDFXMLWriter;
+import org.apache.any23.writer.TripleHandler;
+import org.apache.any23.writer.TripleHandlerException;
+import org.eclipse.rdf4j.model.IRI;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * @author lewismc
+ *
+ */
+public class OpenIEExtractorTest {
+
+    private static final Logger logger = LoggerFactory.getLogger(OpenIEExtractorTest.class);
+
+    private OpenIEExtractor extractor;
+
+    @Before
+    public void setUp() throws Exception {
+        extractor = new OpenIEExtractor();
+    }
+
+    @After
+    public void tearDown() throws Exception {
+        extractor = null;
+    }
+
+    @Test
+    public void testExtractFromHTMLDocument() 
+      throws IOException, ExtractionException, TripleHandlerException {
+        final IRI uri = RDFUtils.iri("http://podaac.jpl.nasa.gov/aquarius");
+        extract(uri, "/org/apache/any23/extractor/openie/example-openie.html");
+    }
+    
+    public void extract(IRI uri, String filePath) 
+      throws IOException, ExtractionException, TripleHandlerException {
+      FileOutputStream fos = new FileOutputStream(File.createTempFile("OpenIEExtractorTest", "tmp"));
+      final TripleHandler tHandler = new RDFXMLWriter(fos);
+      final ExtractionContext extractionContext = new ExtractionContext("rdf-openie", uri);
+      final ExtractionResult result = new ExtractionResultImpl(extractionContext, extractor, tHandler);
+      try {
+        extractor.run(
+                ExtractionParameters.newDefault(),
+                extractionContext,
+                StreamUtils.inputStreamToDocument(this.getClass().getResourceAsStream(filePath)),
+                result
+        );
+      } finally {
+        logger.debug(fos.toString());
+        tHandler.close();
+        result.close();
+      }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/706e891c/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 9f69936..df1059e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -204,10 +204,10 @@
     <module>encoding</module>
     <module>core</module>
     <module>cli</module>
-    <module>openie</module>
     <module>plugins/basic-crawler</module>
     <module>plugins/html-scraper</module>
     <module>plugins/office-scraper</module>
+    <module>plugins/openie</module>
     <module>plugins/integration-test</module>
     <module>service</module>
   </modules>
@@ -248,6 +248,8 @@
     <semargl.version>0.7</semargl.version>
     <slf4j.logger.version>1.7.25</slf4j.logger.version>
     <tika.version>1.17</tika.version>
+    <openie_2.11.version>4.2.6</openie_2.11.version>
+    <openregex.version>1.1.1</openregex.version>
 
     <!-- Overridden in profiles to add JDK specific arguments to surefire -->
     <surefire-extra-args />
@@ -270,7 +272,7 @@
     <buildnumber-maven-plugin.version>1.4</buildnumber-maven-plugin.version>
     <maven-compiler-plugin.version>3.6.1</maven-compiler-plugin.version>
     <maven-jar-plugin.version>3.0.2</maven-jar-plugin.version>
-    <maven-surefire-plugin.version>2.20</maven-surefire-plugin.version>
+    <maven-surefire-plugin.version>2.20.1</maven-surefire-plugin.version>
     <jacoco-maven-plugin.version>0.7.9</jacoco-maven-plugin.version>
     <maven-site-plugin.version>3.6</maven-site-plugin.version>
     <maven-changes-plugin.version>2.12.1</maven-changes-plugin.version>

http://git-wip-us.apache.org/repos/asf/any23/blob/706e891c/service/README.md
----------------------------------------------------------------------
diff --git a/service/README.md b/service/README.md
new file mode 100644
index 0000000..0de9b8a
--- /dev/null
+++ b/service/README.md
@@ -0,0 +1,49 @@
+# Any23 Web Service
+
+This is the root dir of the Any23 Web-Service module.
+
+Apache Any23 provides a Web-Service that can be used to extract RDF from Web documents.
+
+## Generate Web-Service Packaging
+
+To generate the desired Web-service package, execute 'mvn package' from this directory.
+
+```
+$ cd $ANY23-HOME/service
+$ mvn package
+```
+From this directory it generates roughly the following...
+```
+.
+├── pom.xml
+├── README.txt
+├── src
+│   ├── main
+│   │   ├── assembly
+│   │   ├── bin
+│   │   ├── java
+│   │   ├── resources
+│   │   └── webapp
+│   └── test
+│       ├── java
+│       └── resources
+└── target
+    ├── any23-service-${version}.war
+    ├── any23-service-${version}-without-deps.war
+    ├── apache-any23-service-${version}-bin-server-embedded.tar.gz <<<
+    ├── apache-any23-service-${version}-bin-server-embedded.zip <<<
+    ├── apache-any23-service-${version}-bin.tar.gz <<<
+    ├── apache-any23-service-${version}-bin-without-deps.tar.gz <<<
+    ├── apache-any23-service-${version}-bin-without-deps.zip <<<
+    ├── apache-any23-service-${version}-bin.zip <<<
+    ├── archive-tmp
+    ├── classes
+    ├── generated-sources
+    ├── maven-archiver
+    ├── maven-shared-archive-resources
+    ├── surefire
+    ├── surefire-reports
+    └── test-classes
+```
+
+Specific README's for each of the artifacts can be found in either ./target/*.tar.gz || ./target/*.zip (annotated above with '<<<'), where much more detailed information sources can be located.

http://git-wip-us.apache.org/repos/asf/any23/blob/706e891c/service/README.txt
----------------------------------------------------------------------
diff --git a/service/README.txt b/service/README.txt
deleted file mode 100644
index a4d26d0..0000000
--- a/service/README.txt
+++ /dev/null
@@ -1,50 +0,0 @@
-=============
-Any23 Web Service
-=============
-
-This is the root dir of the Any23 Web-Service module.
-
-Apache Any23 provides a Web-Service that can be used to extract RDF from Web documents.
-
-Generate Web-Service Packaging
-===============================
-
-To generate the desired Web-service package, execute 'mvn package' from this directory.
-
-$cd $ANY23-HOME/service
-$ mvn package
-
-From this directory it generates:
-.
-├── pom.xml
-├── README.txt
-├── src
-│   ├── main
-│   │   ├── assembly
-│   │   ├── bin
-│   │   ├── java
-│   │   ├── resources
-│   │   └── webapp
-│   └── test
-│       ├── java
-│       └── resources
-└── target
-    ├── any23-service-${version}.war
-    ├── any23-service-${version}-without-deps.war
-    ├── apache-any23-service-${version}-bin-server-embedded.tar.gz <<<
-    ├── apache-any23-service-${version}-bin-server-embedded.zip <<<
-    ├── apache-any23-service-${version}-bin.tar.gz <<<
-    ├── apache-any23-service-${version}-bin-without-deps.tar.gz <<<
-    ├── apache-any23-service-${version}-bin-without-deps.zip <<<
-    ├── apache-any23-service-${version}-bin.zip <<<
-    ├── archive-tmp
-    ├── classes
-    ├── generated-sources
-    ├── maven-archiver
-    ├── maven-shared-archive-resources
-    ├── surefire
-    ├── surefire-reports
-    └── test-classes
-...
-
-Specific README's can be found in either ./target/*.tar.gz || ./target/*.zip (annotated above with '<<<'), where much more detailed information sources can be located.

http://git-wip-us.apache.org/repos/asf/any23/blob/706e891c/service/pom.xml
----------------------------------------------------------------------
diff --git a/service/pom.xml b/service/pom.xml
index fe4911f..d5b275f 100644
--- a/service/pom.xml
+++ b/service/pom.xml
@@ -34,15 +34,41 @@
   <properties>
     <!-- the following property is used in the bash script as well, don't remove it! -->
     <jetty.runner.version>8.1.16.v20140903</jetty.runner.version>
+    <output.directory>${project.build.directory}/${project.artifactId}-${project.version}/WEB-INF/lib/apache-any23-openie</output.directory>
   </properties>
 
   <dependencies>
-    <!-- Core Module -->
+    <!-- Any23 Modules -->
     <dependency>
       <groupId>org.apache.any23</groupId>
       <artifactId>apache-any23-core</artifactId>
       <version>${project.version}</version>
     </dependency>
+    <dependency>
+      <groupId>org.apache.any23.plugins</groupId>
+      <artifactId>apache-any23-openie</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.allenai.openie</groupId>
+      <artifactId>openie_2.11</artifactId>
+      <version>${openie_2.11.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.allenai.openie</groupId>
+      <artifactId>openie_2.11</artifactId>
+      <version>${openie_2.11.version}</version>
+      <scope>provided</scope>
+      <type>pom</type>
+    </dependency>
+    <dependency>
+      <groupId>edu.washington.cs.knowitall</groupId>
+      <artifactId>openregex</artifactId>
+      <version>${openregex.version}</version>
+      <scope>provided</scope>
+    </dependency>
 
     <!-- Logging -->
     <dependency>
@@ -181,6 +207,62 @@
           </descriptors>
         </configuration>
       </plugin>
+
+      <!-- Used to provide dynamic OpenIE toggling within service -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-dependency-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>copy</id>
+            <phase>prepare-package</phase>
+            <goals>
+              <goal>copy</goal>
+            </goals>
+            <configuration>
+              <artifactItems>
+                <artifactItem>
+                  <groupId>org.apache.any23.plugins</groupId>
+                  <artifactId>apache-any23-openie</artifactId>
+                  <version>${project.version}</version>
+                  <outputDirectory>${output.directory}</outputDirectory>
+                </artifactItem>
+                <artifactItem>
+                  <groupId>org.allenai.openie</groupId>
+                  <artifactId>openie_2.11</artifactId>
+                  <version>${openie_2.11.version}</version>
+                  <outputDirectory>${output.directory}</outputDirectory>
+                </artifactItem>
+                <artifactItem>
+                  <groupId>org.allenai.openie</groupId>
+                  <artifactId>openie_2.11</artifactId>
+                  <version>${openie_2.11.version}</version>
+                  <type>pom</type>
+                  <outputDirectory>${output.directory}</outputDirectory>
+                </artifactItem>
+                <artifactItem>
+                  <groupId>edu.washington.cs.knowitall</groupId>
+                  <artifactId>openregex</artifactId>
+                  <version>${openregex.version}</version>
+                  <outputDirectory>${output.directory}</outputDirectory>
+                </artifactItem>
+              </artifactItems>
+              <!-- other configurations here -->
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <configuration>
+          <classpathDependencyExcludes>
+            <classpathDependencyExclude>org.apache.any23.plugins:apache-any23-openie</classpathDependencyExclude>
+          </classpathDependencyExcludes>
+        </configuration>
+      </plugin>
+
     </plugins>
   </build>
 

http://git-wip-us.apache.org/repos/asf/any23/blob/706e891c/service/src/main/java/org/apache/any23/servlet/Servlet.java
----------------------------------------------------------------------
diff --git a/service/src/main/java/org/apache/any23/servlet/Servlet.java b/service/src/main/java/org/apache/any23/servlet/Servlet.java
index b63d052..1ab542c 100644
--- a/service/src/main/java/org/apache/any23/servlet/Servlet.java
+++ b/service/src/main/java/org/apache/any23/servlet/Servlet.java
@@ -20,6 +20,7 @@ package org.apache.any23.servlet;
 import org.apache.any23.configuration.DefaultConfiguration;
 import org.apache.any23.extractor.ExtractionParameters;
 import org.apache.any23.http.HTTPClient;
+import org.apache.any23.plugin.Any23PluginManager;
 import org.apache.any23.servlet.conneg.Any23Negotiator;
 import org.apache.any23.servlet.conneg.MediaRangeSpec;
 import org.apache.any23.source.ByteArrayDocumentSource;
@@ -35,6 +36,8 @@ import javax.servlet.ServletException;
 import javax.servlet.http.HttpServlet;
 import javax.servlet.http.HttpServletRequest;
 import javax.servlet.http.HttpServletResponse;
+
+import java.io.File;
 import java.io.IOException;
 import java.net.URISyntaxException;
 import java.util.regex.Pattern;
@@ -69,17 +72,43 @@ public class Servlet extends HttpServlet {
         final String format = getFormatFromRequestOrNegotiation(req);
         final boolean report = isReport(req);
         final boolean annotate = isAnnotated(req);
+        final boolean openie = isOpenIE(req);
         if (format == null) {
-            responder.sendError(406, "Client accept header does not include a supported output format", report);
-            return;
+            try {
+                responder.sendError(406, "Client accept header does not include a supported output format", report);
+                return;
+            } catch (IOException e) {
+                LOG.error("Unable to send error for null request format.", e);
+            }
         }
         final String uri = getInputIRIFromRequest(req);
         if (uri == null) {
-            responder.sendError(404, "Missing IRI in GET request. Try /format/http://example.com/myfile", report);
-            return;
+            try {
+                responder.sendError(404, "Missing IRI in GET request. Try /format/http://example.com/myfile", report);
+                return;
+            } catch (Exception e) {
+                LOG.error("Unable to send error for null request IRI.", e);
+            }
+        }
+        if (openie) {
+            Any23PluginManager pManager = Any23PluginManager.getInstance();
+            //Dynamically adding Jar's to the Classpath via the following logic
+            //is absolutely dependant on the 'apache-any23-openie' directory being
+            //present within the webapp /lib directory. This is specified within 
+            //the maven-dependency-plugin.
+            File webappClasspath = new File(getClass().getClassLoader().getResource("").getPath());
+            File openIEJarPath = new File(webappClasspath.getParentFile().getPath() + "/lib/apache-any23-openie");
+            boolean loadedJars = pManager.loadJARDir(openIEJarPath);
+            if (loadedJars) {
+                LOG.info("Successful dynamic classloading of apache-any23-openie directory from webapp lib.");
+            }
         }
         final ExtractionParameters eps = getExtractionParameters(req);
-        responder.runExtraction(createHTTPDocumentSource(responder, uri, report), eps, format, report, annotate);
+        try {
+            responder.runExtraction(createHTTPDocumentSource(responder, uri, report), eps, format, report, annotate);
+        } catch (IOException e) {
+            LOG.error("Unable to run extraction on HTTPDocumentSource.", e);
+        }
     }
 
     @Override
@@ -87,6 +116,7 @@ public class Servlet extends HttpServlet {
         final WebResponder responder = new WebResponder(this, resp);
         final boolean report = isReport(req);
         final boolean annotate = isAnnotated(req);
+        final boolean openie = isOpenIE(req);
         if (req.getContentType() == null) {
             responder.sendError(400, "Invalid POST request, no Content-Type for the message body specified", report);
             return;
@@ -97,6 +127,10 @@ public class Servlet extends HttpServlet {
             responder.sendError(406, "Client accept header does not include a supported output format", report);
             return;
         }
+        if (openie) {
+          Any23PluginManager pManager = Any23PluginManager.getInstance();
+          pManager.loadJARDir(new File(getClass().getResource("apache-any23-openie").getPath()));
+        }
         final ExtractionParameters eps = getExtractionParameters(req);
         if ("application/x-www-form-urlencoded".equals(getContentTypeHeader(req))) {
             if (uri != null) {
@@ -283,4 +317,8 @@ public class Servlet extends HttpServlet {
         return request.getParameter("annotate") != null;
     }
 
+    private boolean isOpenIE(HttpServletRequest request) {
+      return request.getParameter("openie") != null;
+  }
+
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/706e891c/service/src/main/resources/form.html
----------------------------------------------------------------------
diff --git a/service/src/main/resources/form.html b/service/src/main/resources/form.html
index 374d017..a5cf937 100644
--- a/service/src/main/resources/form.html
+++ b/service/src/main/resources/form.html
@@ -80,6 +80,15 @@ function showModal( id )
         </div>
       </div>
       <div class="control-group">
+        <label class="control-label" for="url-openie-get">OpenIE</label>
+          <div class="controls">
+           <label class="checkbox">
+             <input id="url-openie-get" type="checkbox" name="openie">
+             <a href="javascript:showModal( '#sPopup-openie' );">[?]</a>
+          </label>
+        </div>
+      </div>
+      <div class="control-group">
         <label class="control-label" for="url-validation-get">Validation</label>
         <div class="controls">
           <select id="url-validation-get" name="validation-mode" onclick="if(document.getElementById('url-validation-get').value.indexOf('validate') == 0) { document.getElementById('url-report-get').checked = true; }">
@@ -103,7 +112,7 @@ function showModal( id )
         <label class="control-label" for="url-annotate-get">Annotate</label>
           <div class="controls">
            <label class="checkbox">
-             <input id="url-annotate-get" type="checkbox" type="checkbox" name="annotate">
+             <input id="url-annotate-get" type="checkbox" name="annotate">
              <a href="javascript:showModal( '#sPopup-annotate' );">[?]</a>
           </label>
         </div>
@@ -149,6 +158,15 @@ function showModal( id )
         </div>
       </div>
       <div class="control-group">
+        <label class="control-label" for="openie-on-post">OpenIE</label>
+          <div class="controls">
+           <label class="checkbox">
+             <input id="url-openie-post" type="checkbox" name="openie">
+             <a href="javascript:showModal( '#sPopup-openie' );">[?]</a>
+          </label>
+        </div>
+      </div>
+      <div class="control-group">
         <label class="control-label" for="url-validation-post">Validation</label>
         <div class="controls">
           <select id="url-validation-post" name="validation-mode" onclick="if(document.getElementById('url-validation-post').value.indexOf('validate') == 0) { document.getElementById('url-report-post').checked = true; }">
@@ -172,7 +190,7 @@ function showModal( id )
         <label class="control-label" for="url-annotate-post">Annotate</label>
           <div class="controls">
            <label class="checkbox">
-             <input id="url-annotate-post" type="checkbox" type="checkbox" name="annotate">
+             <input id="url-annotate-post" type="checkbox" name="annotate">
              <a href="javascript:showModal( '#sPopup-annotate' );">[?]</a>
           </label>
         </div>
@@ -224,8 +242,10 @@ function showModal( id )
       </tr>
       <tr><th>annotate</th><td>If specified the output RDF will contain extractor specific scope comments.<br/>Possible values: <code>on</code>/<code>off</code></td></tr>
       <tr><th>report</th><td>If specified will produce a full XML report containing extraction and validation issues other than produced metadata.<br/>Possible values: <code>on</code>/<code>off</code></td></tr>
+      <tr><th>openie</th><td>If specified the <a href="https://github.com/allenai/openie-standalone" target="_blank">
+      Open Information Extraction (Open IE) system</a> will be activated (default off).<br/>Possible values: <code>on</code>/<code>off</code></td></tr>
     </table>
-    Such URL will return an HTTP <i>302</i> redirect to <code><span class="app-base-uri">http://...</span>any23/<em>format</em></code>.<br/>
+    Formatting the URL according to the above will return an HTTP <i>302</i> redirect to <code><span class="app-base-uri">http://...</span>any23/<em>format</em></code>.<br/>
     <p>The response is the input document converted to the desired output format.</p>
 
     <h3>Direct POST API</h3>
@@ -278,6 +298,8 @@ Content-Length: 174
       </tr>
       <tr><th>annotate</th><td>If specified the output RDF will contain extractor specific scope comments.<br/>Possible values: <code>on</code>/<code>off</code></td></tr>
       <tr><th>report</th><td>If specified will produce a full XML report containing extraction and validation issues other than produced metadata.<br/>Possible values: <code>on</code>/<code>off</code></td></tr>
+      <tr><th>openie</th><td>If specified the <a href="https://github.com/allenai/openie-standalone" target="_blank">
+      Open Information Extraction (Open IE) system</a> will be activated (default off).<br/>Possible values: <code>on</code>/<code>off</code></td></tr>
     </table>
 
     <h3>Output formats</h3>
@@ -285,11 +307,11 @@ Content-Length: 174
     <ul>
       <li><code>best</code> for content negotiation according to the client's <code>Accept</code> HTTP header</li>
       <li><code>turtle</code>, <code>ttl</code>, <code>n3</code> for
-        <a href="http://www.w3.org/TeamSubmission/turtle/" target="_blank">Turtle</a>/<a href="http://www.w3.org/DesignIssues/Notation3" target="_blank">N3</a></li>
+        <a href="https://www.w3.org/TR/turtle/" target="_blank">Turtle</a>/<a href="https://www.w3.org/TeamSubmission/n3/" target="_blank">N3</a></li>
       <li><code>ntriples</code>, <code>nt</code> for
-        <a href="http://www.w3.org/TR/rdf-testcases/#ntriples" target="_blank">N-Triples</a></li>
+        <a href="https://www.w3.org/TR/n-triples/" target="_blank">N-Triples</a></li>
       <li><code>nquads</code>, <code>nq</code> for
-        <a href="http://sw.deri.org/2008/07/n-quads/" target="_blank">N-Quads</a></li>
+        <a href="https://www.w3.org/TR/n-quads/" target="_blank">N-Quads</a></li>
       <li><code>trix</code> for
         <a href="http://www.w3.org/2004/03/trix/" target="_blank">TriX</a></li>
       <li><code>rdfxml</code>, <code>rdf</code>, <code>xml</code> for
@@ -323,6 +345,27 @@ Content-Length: 174
     <p><b>Apache Any23 v.${project.version} (${implementation.build.tstamp})</b></p>
     <p><a href="http://any23.apache.org/" target="_blank">Any23 project homepage</a> | Hosted at <a href="http://apache.org/" target="_blank">Apache Software Foundation</a></p>
 
+    <div id="sPopup-openie" class="modal hide fade">
+      <div class="modal-header">
+        <button type="button" class="close">×</button>
+        <h3>Open Information Extraction</h3>
+      </div>
+      <div class="modal-body">
+        <p>
+        If the <i>OpenIE</i> checkbox is selected, the <b>Any23</b> service will activate the
+        <a href="https://github.com/allenai/openie-standalone" target="_blank">Open Information Extraction (Open IE) system</a>, 
+        enhancing extraction results.</p>
+        <p>The Open IE system runs over sentences and creates extractions that represent relations in text, in the case 
+        of Any23, this results in triples. The confidence of relationships extracted from text are based on a 
+        configurable threshold established in 
+        <code>https://github.com/apache/any23/blob/master/api/src/main/resources/default-configuration.properties</code>.
+        </p>
+      </div>
+      <div class="modal-footer">
+        <a href="#" class="btn">Close</a>
+      </div>
+    </div>
+
     <div id="sPopup-fix" class="modal hide fade">
       <div class="modal-header">
         <button type="button" class="close" >×</button>
@@ -330,8 +373,8 @@ Content-Length: 174
       </div>
       <div class="modal-body">
         <p> 
-          The <b>Any23</b> service tries to fix some <a href="http://www.deri.ie/fileadmin/documents/DERI-TR-2009-07-28.pdf" target="_blank">common issues</a>
-          before performing a metadata extraction. The fixing is performed according a set of fully customizable rules. 
+          The <b>Any23</b> service tries to fix some common issues before performing a metadata 
+          extraction. The fixing is performed according a set of fully customizable rules.
         </p>
         <p>
           The following <i>Validation</i> options are available.

http://git-wip-us.apache.org/repos/asf/any23/blob/706e891c/service/src/main/webapp/resources/js/bootstrap-modal.js
----------------------------------------------------------------------
diff --git a/service/src/main/webapp/resources/js/bootstrap-modal.js b/service/src/main/webapp/resources/js/bootstrap-modal.js
index 38fd0c8..11b951e 100644
--- a/service/src/main/webapp/resources/js/bootstrap-modal.js
+++ b/service/src/main/webapp/resources/js/bootstrap-modal.js
@@ -17,11 +17,9 @@
  * limitations under the License.
  * ========================================================= */
 
-
 !function ($) {
 
-  "use strict"; // jshint ;_;
-
+  "use strict";
 
  /* MODAL CLASS DEFINITION
   * ====================== */
@@ -46,7 +44,8 @@
 
         this.$element.trigger(e)
 
-        if (this.isShown || e.isDefaultPrevented()) return
+        if (this.isShown || e.isDefaultPrevented())
+          return
 
         $('body').addClass('modal-open')
 
@@ -85,7 +84,8 @@
 
         this.$element.trigger(e)
 
-        if (!this.isShown || e.isDefaultPrevented()) return
+        if (!this.isShown || e.isDefaultPrevented())
+          return
 
         this.isShown = false
 
@@ -141,7 +141,8 @@
         this.$backdrop.click($.proxy(this.hide, this))
       }
 
-      if (doAnimate) this.$backdrop[0].offsetWidth // force reflow
+      if (doAnimate)
+        this.$backdrop[0].offsetWidth // force reflow
 
       this.$backdrop.addClass('in')
 
@@ -186,9 +187,12 @@
       var $this = $(this)
         , data = $this.data('modal')
         , options = $.extend({}, $.fn.modal.defaults, $this.data(), typeof option == 'object' && option)
-      if (!data) $this.data('modal', (data = new Modal(this, options)))
-      if (typeof option == 'string') data[option]()
-      else if (options.show) data.show()
+      if (!data)
+        $this.data('modal', (data = new Modal(this, options)))
+      if (typeof option == 'string')
+        data[option]()
+      else if (options.show)
+        data.show()
     })
   }
 

http://git-wip-us.apache.org/repos/asf/any23/blob/706e891c/src/site/apt/any23-plugins.apt
----------------------------------------------------------------------
diff --git a/src/site/apt/any23-plugins.apt b/src/site/apt/any23-plugins.apt
index f429e2d..b79a27a 100644
--- a/src/site/apt/any23-plugins.apt
+++ b/src/site/apt/any23-plugins.apt
@@ -49,11 +49,10 @@ export CLASSPATH_PREFIX=../../../plugins/basic-crawler/target/any23-basic-crawle
 
    * adding its <JAR> to the <$HOME/.any23/plugins> directory.
 
-   A plugin can be added to the <Apache Any23 library API> by using the
-   {{{./apidocs/org/apache/any23/plugin/Any23PluginManager.html}Any23PluginManager}}#createInstance(Configuration configuration, File... pluginLocations)
-   method.
-
-   TODO: plugin support in Apache Any23 Service
+   A plugin can be added to the <Apache Any23 library API> by first creating a static instance of
+   {{{./apidocs/org/apache/any23/plugin/Any23PluginManager.html}Any23PluginManager}}#getInstance().
+   Once this is done there is a variety of options to configure and register a plugins, etc. An example
+   of dynamic plugin loading can be seen via the OpenIE toggle in the Any23 Service.
 
     Any implementation of <ExtractorPlugin> will automatically registered to the
     {{{./apidocs/org/apache/any23/extractor/ExtractorRegistry.html}ExtractorRegistry}}.


[04/10] any23 git commit: Merge branch 'master' into ANy23-321

Posted by le...@apache.org.
Merge branch 'master' into ANy23-321


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/4f40fe02
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/4f40fe02
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/4f40fe02

Branch: refs/heads/master
Commit: 4f40fe0214ecaabb3c3d20cb44d425b196d0f782
Parents: 6660ed8 f36c5e1
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Mon Jan 8 09:42:26 2018 -0500
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Mon Jan 8 09:42:26 2018 -0500

----------------------------------------------------------------------
 src/site/site.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------



[08/10] any23 git commit: ANY23-321 Add openie toggle functionality to service

Posted by le...@apache.org.
ANY23-321 Add openie toggle functionality to service


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/b3806d3c
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/b3806d3c
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/b3806d3c

Branch: refs/heads/master
Commit: b3806d3c86422831fc08ac9068fac5984b772399
Parents: 69109f3
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Fri Feb 23 17:56:20 2018 -0800
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Fri Feb 23 17:56:20 2018 -0800

----------------------------------------------------------------------
 service/pom.xml | 32 +++-----------------------------
 1 file changed, 3 insertions(+), 29 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/b3806d3c/service/pom.xml
----------------------------------------------------------------------
diff --git a/service/pom.xml b/service/pom.xml
index d5b275f..b59706b 100644
--- a/service/pom.xml
+++ b/service/pom.xml
@@ -217,37 +217,11 @@
             <id>copy</id>
             <phase>prepare-package</phase>
             <goals>
-              <goal>copy</goal>
+              <goal>copy-dependencies</goal>
             </goals>
             <configuration>
-              <artifactItems>
-                <artifactItem>
-                  <groupId>org.apache.any23.plugins</groupId>
-                  <artifactId>apache-any23-openie</artifactId>
-                  <version>${project.version}</version>
-                  <outputDirectory>${output.directory}</outputDirectory>
-                </artifactItem>
-                <artifactItem>
-                  <groupId>org.allenai.openie</groupId>
-                  <artifactId>openie_2.11</artifactId>
-                  <version>${openie_2.11.version}</version>
-                  <outputDirectory>${output.directory}</outputDirectory>
-                </artifactItem>
-                <artifactItem>
-                  <groupId>org.allenai.openie</groupId>
-                  <artifactId>openie_2.11</artifactId>
-                  <version>${openie_2.11.version}</version>
-                  <type>pom</type>
-                  <outputDirectory>${output.directory}</outputDirectory>
-                </artifactItem>
-                <artifactItem>
-                  <groupId>edu.washington.cs.knowitall</groupId>
-                  <artifactId>openregex</artifactId>
-                  <version>${openregex.version}</version>
-                  <outputDirectory>${output.directory}</outputDirectory>
-                </artifactItem>
-              </artifactItems>
-              <!-- other configurations here -->
+              <includeScope>provided</includeScope>
+              <outputDirectory>${output.directory}</outputDirectory>
             </configuration>
           </execution>
         </executions>


[05/10] any23 git commit: ANY23-321 Add openie toggle functionality to service

Posted by le...@apache.org.
ANY23-321 Add openie toggle functionality to service


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/63ffc9e3
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/63ffc9e3
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/63ffc9e3

Branch: refs/heads/master
Commit: 63ffc9e3e8a8da0b4af7ca5b227f1e199e545227
Parents: 4f40fe0 482e780
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Fri Feb 2 21:55:12 2018 -0800
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Fri Feb 2 21:55:12 2018 -0800

----------------------------------------------------------------------
 LICENSE-header.txt                              |  41 --
 NOTICE.txt                                      |   2 +-
 RELEASE-NOTES.txt                               |  66 ++
 api/pom.xml                                     |   2 +-
 .../main/java/org/apache/any23/vocab/YAML.java  |  13 +-
 .../resources/default-configuration.properties  |   4 +
 cli/pom.xml                                     |   2 +-
 .../org/apache/any23/cli/SimpleRoverTest.java   |  13 +-
 .../org/apache/any23/cli/YAMLRoverTest.java     |  13 +-
 cli/src/test/resources/log4j.properties         |  15 +-
 core/pom.xml                                    |   6 +-
 .../extractor/html/EmbeddedJSONLDExtractor.java |   8 +-
 .../any23/extractor/html/HCardExtractor.java    |   3 +-
 .../any23/extractor/html/HTMLMetaExtractor.java |   6 +-
 .../apache/any23/extractor/html/JsoupUtils.java | 103 ++++
 .../any23/extractor/html/TagSoupParser.java     | 172 +++---
 .../html/TagSoupParsingConfiguration.java       | 181 ++++++
 .../any23/extractor/rdf/BaseRDFExtractor.java   |  46 +-
 .../html/EmbeddedJSONLDExtractorTest.java       |  14 +
 .../microdata/MicrodataParserTest.java          |   5 +-
 .../extractor/rdfa/RDFa11ExtractorTest.java     |  41 +-
 csvutils/pom.xml                                |   2 +-
 encoding/pom.xml                                |   2 +-
 mime/pom.xml                                    |   2 +-
 plugins/basic-crawler/pom.xml                   |  10 +-
 plugins/html-scraper/pom.xml                    |   4 +-
 plugins/integration-test/pom.xml                |   2 +-
 plugins/office-scraper/pom.xml                  |   4 +-
 plugins/openie/pom.xml                          |   2 +-
 pom.xml                                         |  14 +-
 service/pom.xml                                 |   2 +-
 .../main/assembly/NOTICE-server-embedded.txt    |   2 +-
 service/src/main/assembly/NOTICE-with-deps.txt  |   2 +-
 .../src/main/assembly/NOTICE-without-deps.txt   |   2 +-
 test-resources/pom.xml                          |   2 +-
 .../html-body-embedded-jsonld-extractor.html    |  37 ++
 ...head-and-body-embedded-jsonld-extractor.html |  47 ++
 .../test/resources/html/rdfa/rdfa-issue227.html |  40 ++
 .../html/rdfa/rdfa-issue268-and-317.html        | 613 +++++++++++++++++++
 .../html/rdfa/rdfa-issue271-and-317.html        | 251 ++++++++
 .../html/rdfa/rdfa-issue273-and-317.html        | 143 +++++
 .../html/rdfa/rdfa-issue326-and-267.html        |  20 +
 42 files changed, 1777 insertions(+), 182 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/63ffc9e3/plugins/integration-test/pom.xml
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/any23/blob/63ffc9e3/plugins/openie/pom.xml
----------------------------------------------------------------------
diff --cc plugins/openie/pom.xml
index 64c6806,0000000..0f34ad5
mode 100644,000000..100644
--- a/plugins/openie/pom.xml
+++ b/plugins/openie/pom.xml
@@@ -1,165 -1,0 +1,165 @@@
 +<?xml version="1.0" encoding="UTF-8"?>
 +<!--
 +  Licensed to the Apache Software Foundation (ASF) under one or more
 +  contributor license agreements.  See the NOTICE file distributed with
 +  this work for additional information regarding copyright ownership.
 +  The ASF licenses this file to You under the Apache License, Version 2.0
 +  (the "License"); you may not use this file except in compliance with
 +  the License.  You may obtain a copy of the License at
 +
 +   http://www.apache.org/licenses/LICENSE-2.0
 +
 +  Unless required by applicable law or agreed to in writing, software
 +  distributed under the License is distributed on an "AS IS" BASIS,
 +  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 +  See the License for the specific language governing permissions and
 +  limitations under the License.
 +-->
 +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
 +  <modelVersion>4.0.0</modelVersion>
 +
 +  <parent>
 +    <groupId>org.apache.any23</groupId>
 +    <artifactId>apache-any23</artifactId>
-     <version>2.2-SNAPSHOT</version>
++    <version>2.3-SNAPSHOT</version>
 +    <relativePath>../../pom.xml</relativePath>
 +  </parent>
 +
 +  <groupId>org.apache.any23.plugins</groupId>
 +  <artifactId>apache-any23-openie</artifactId>
 +
 +  <name>Apache Any23 :: Plugins :: OpenIE</name>
 +  <description>Open Information Extraction module.</description>
 +
 +  <repositories>
 +    <repository>
 +      <snapshots>
 +        <enabled>false</enabled>
 +      </snapshots>
 +      <id>bintray-allenai-maven</id>
 +      <name>bintray</name>
 +      <url>http://allenai.bintray.com/maven</url>
 +    </repository>
 +  </repositories>
 +  <pluginRepositories>
 +    <pluginRepository>
 +      <snapshots>
 +        <enabled>false</enabled>
 +      </snapshots>
 +      <id>bintray-allenai-maven</id>
 +      <name>bintray-plugins</name>
 +      <url>http://allenai.bintray.com/maven</url>
 +    </pluginRepository>
 +  </pluginRepositories>
 +
 +  <dependencies>
 +    <dependency>
 +      <groupId>org.apache.any23</groupId>
 +      <artifactId>apache-any23-core</artifactId>
 +      <version>${project.version}</version>
 +      <scope>provided</scope>
 +    </dependency>
 +    <dependency>
 +      <groupId>org.apache.any23</groupId>
 +      <artifactId>apache-any23-test-resources</artifactId>
 +      <version>${project.version}</version>
 +      <scope>test</scope>
 +      <type>test-jar</type>
 +    </dependency>
 +    <dependency>
 +      <groupId>org.allenai.openie</groupId>
 +      <artifactId>openie_2.11</artifactId>
 +      <version>${openie_2.11.version}</version>
 +      <scope>compile</scope>
 +    </dependency>
 +    <dependency>
 +      <groupId>org.allenai.openie</groupId>
 +      <artifactId>openie_2.11</artifactId>
 +      <version>${openie_2.11.version}</version>
 +      <scope>compile</scope>
 +      <type>pom</type>
 +    </dependency>
 +    <dependency>
 +      <groupId>edu.washington.cs.knowitall</groupId>
 +      <artifactId>openregex</artifactId>
 +      <version>${openregex.version}</version>
 +      <scope>runtime</scope>
 +    </dependency>
 +    <dependency>
 +      <groupId>junit</groupId>
 +      <artifactId>junit</artifactId>
 +      <scope>test</scope>
 +    </dependency>
 +    <dependency>
 +      <groupId>org.slf4j</groupId>
 +      <artifactId>slf4j-log4j12</artifactId>
 +      <scope>test</scope>
 +    </dependency>
 +  </dependencies>
 +
 +  <build>
 +    <plugins>
 +      <plugin>
 +        <groupId>org.apache.maven.plugins</groupId>
 +        <artifactId>maven-surefire-plugin</artifactId>
 +        <configuration>
 +          <skipTests>true</skipTests>
 +        </configuration>
 +      </plugin>
 +      <!-- Generates the distribution package -->
 +      <plugin>
 +        <groupId>org.apache.maven.plugins</groupId>
 +        <artifactId>maven-assembly-plugin</artifactId>
 +        <configuration>
 +          <appendAssemblyId>false</appendAssemblyId>
 +          <descriptors>
 +            <descriptor>${basedir}/src/main/assembly/bin.xml</descriptor>
 +          </descriptors>
 +        </configuration>
 +      </plugin>
 +    </plugins>
 +    <pluginManagement>
 +      <plugins>
 +        <plugin>
 +          <groupId>org.apache.maven.plugins</groupId>
 +          <artifactId>maven-assembly-plugin</artifactId>
 +          <version>${maven-assembly-plugin.version}</version>
 +          <executions>
 +            <execution>
 +              <id>assembly</id>
 +              <phase>package</phase>
 +              <goals>
 +                <goal>single</goal>
 +              </goals>
 +            </execution>
 +          </executions>
 +          <configuration>
 +            <attach>true</attach>
 +            <skipAssembly>true</skipAssembly>
 +            <tarLongFileMode>gnu</tarLongFileMode>
 +          </configuration>
 +        </plugin>
 +      </plugins>
 +    </pluginManagement>
 +  </build>
 +
 +  <profiles>
 +    <profile>
 +      <id>release</id>
 +      <build>
 +        <resources>
 +          <resource>
 +            <directory>${basedir}/../</directory>
 +            <targetPath>${project.build.directory}/apidocs/META-INF</targetPath>
 +            <includes>
 +              <include>LICENSE.txt</include>
 +              <include>NOTICE.txt</include>
 +            </includes>
 +          </resource>
 +        </resources>
 +      </build>
 +    </profile>
 +
 +  </profiles>
 +
 +</project>

http://git-wip-us.apache.org/repos/asf/any23/blob/63ffc9e3/pom.xml
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/any23/blob/63ffc9e3/service/pom.xml
----------------------------------------------------------------------


[02/10] any23 git commit: Merge branch 'master' into ANY23-321

Posted by le...@apache.org.
Merge branch 'master' into ANY23-321


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/b71142f1
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/b71142f1
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/b71142f1

Branch: refs/heads/master
Commit: b71142f14179d0feee53dde32f570e98fa9edbdc
Parents: 706e891 d2ace9c
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Wed Jan 3 00:16:07 2018 +0000
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Wed Jan 3 00:16:07 2018 +0000

----------------------------------------------------------------------
 api/src/main/java/org/apache/any23/vocab/YAML.java        |  7 +++++--
 .../apache/any23/extractor/yaml/ElementsProcessor.java    |  6 ++----
 core/src/main/java/org/apache/any23/rdf/RDFUtils.java     | 10 ++++------
 .../any23/extractor/yaml/ElementsProcessorTest.java       |  1 -
 .../apache/any23/extractor/yaml/YAMLExtractorTest.java    |  7 +------
 .../java/org/apache/any23/vocab/RDFSchemaUtilsTest.java   |  4 ++--
 .../org/apache/any23/extractor/yaml/test-null.yml         |  4 +++-
 7 files changed, 17 insertions(+), 22 deletions(-)
----------------------------------------------------------------------



[07/10] any23 git commit: ANY23-321 Add openie toggle functionality to service

Posted by le...@apache.org.
ANY23-321 Add openie toggle functionality to service


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/69109f36
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/69109f36
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/69109f36

Branch: refs/heads/master
Commit: 69109f36a2ee279c5f031423e16af6b49ea8dbfd
Parents: 073190b
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Fri Feb 23 09:58:54 2018 -0800
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Fri Feb 23 09:58:54 2018 -0800

----------------------------------------------------------------------
 .../src/main/java/org/apache/any23/servlet/Servlet.java   | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/69109f36/service/src/main/java/org/apache/any23/servlet/Servlet.java
----------------------------------------------------------------------
diff --git a/service/src/main/java/org/apache/any23/servlet/Servlet.java b/service/src/main/java/org/apache/any23/servlet/Servlet.java
index 1ab542c..154f41d 100644
--- a/service/src/main/java/org/apache/any23/servlet/Servlet.java
+++ b/service/src/main/java/org/apache/any23/servlet/Servlet.java
@@ -19,6 +19,8 @@ package org.apache.any23.servlet;
 
 import org.apache.any23.configuration.DefaultConfiguration;
 import org.apache.any23.extractor.ExtractionParameters;
+import org.apache.any23.extractor.ExtractorRegistry;
+import org.apache.any23.extractor.ExtractorRegistryImpl;
 import org.apache.any23.http.HTTPClient;
 import org.apache.any23.plugin.Any23PluginManager;
 import org.apache.any23.servlet.conneg.Any23Negotiator;
@@ -100,7 +102,13 @@ public class Servlet extends HttpServlet {
             File openIEJarPath = new File(webappClasspath.getParentFile().getPath() + "/lib/apache-any23-openie");
             boolean loadedJars = pManager.loadJARDir(openIEJarPath);
             if (loadedJars) {
-                LOG.info("Successful dynamic classloading of apache-any23-openie directory from webapp lib.");
+                ExtractorRegistry r = ExtractorRegistryImpl.getInstance();
+                try {
+                    pManager.getExtractors().forEachRemaining(r::register);
+                } catch (IOException e) {
+                    LOG.error("Error during dynamic classloading of JARs from OpenIE runtime directory {}", openIEJarPath.toString(), e);
+                }
+                LOG.info("Successful dynamic classloading of JARs from OpenIE runtime directory {}", openIEJarPath.toString());
             }
         }
         final ExtractionParameters eps = getExtractionParameters(req);


[09/10] any23 git commit: ANY23-321 Add openie toggle functionality to service

Posted by le...@apache.org.
ANY23-321 Add openie toggle functionality to service


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/71bf171a
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/71bf171a
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/71bf171a

Branch: refs/heads/master
Commit: 71bf171a3b64fbb0388373aabad645e812bf3a0f
Parents: b3806d3
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Tue Feb 27 10:11:57 2018 -0800
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Tue Feb 27 10:11:57 2018 -0800

----------------------------------------------------------------------
 .../plugin/extractor/openie/OpenIEExtractor.java  | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/71bf171a/plugins/openie/src/main/java/org/apache/any23/plugin/extractor/openie/OpenIEExtractor.java
----------------------------------------------------------------------
diff --git a/plugins/openie/src/main/java/org/apache/any23/plugin/extractor/openie/OpenIEExtractor.java b/plugins/openie/src/main/java/org/apache/any23/plugin/extractor/openie/OpenIEExtractor.java
index 1b6a9cf..d02b5a2 100644
--- a/plugins/openie/src/main/java/org/apache/any23/plugin/extractor/openie/OpenIEExtractor.java
+++ b/plugins/openie/src/main/java/org/apache/any23/plugin/extractor/openie/OpenIEExtractor.java
@@ -23,13 +23,12 @@ import javax.xml.transform.TransformerConfigurationException;
 import javax.xml.transform.TransformerFactoryConfigurationError;
 
 import org.apache.any23.extractor.Extractor;
+import org.apache.any23.extractor.IssueReport;
 import org.apache.any23.configuration.Configuration;
 import org.apache.any23.configuration.DefaultConfiguration;
 import org.apache.any23.extractor.ExtractionContext;
 import org.apache.any23.extractor.ExtractorDescription;
-import org.apache.any23.extractor.ExtractorFactory;
 import org.apache.any23.plugin.Author;
-import org.apache.any23.plugin.ExtractorPlugin;
 import org.apache.any23.rdf.RDFUtils;
 import org.apache.any23.util.StreamUtils;
 import org.apache.tika.Tika;
@@ -63,7 +62,7 @@ import scala.collection.Seq;
  * sentences representing relations in the text.
  */
 @Author(name="Lewis John McGibbney (lewismc@apache.org)")
-public class OpenIEExtractor implements Extractor.TagSoupDOMExtractor, ExtractorPlugin {
+public class OpenIEExtractor implements Extractor.TagSoupDOMExtractor {
 
     private static final Logger LOG = LoggerFactory.getLogger(OpenIEExtractor.class);
 
@@ -106,7 +105,13 @@ public class OpenIEExtractor implements Extractor.TagSoupDOMExtractor, Extractor
             LOG.error("Encountered error during OpenIE extraction.", e);
         } catch (TikaException e) {
             LOG.error("Encountered error whilst parsing InputStream with Tika.", e);
-        }
+        } catch (OutOfMemoryError e) {
+          //let the gc do its thing
+          openIE = null;
+          out.notifyIssue(IssueReport.IssueLevel.FATAL, "Not enough memory available to perform OpenIE extraction.", -1, -1);
+          LOG.error("Encountered OutOfMemoryError... increase JVM heap when running OpenIEExtractor.", e);
+          return;
+      }
 
         List<Instance> listExtractions = JavaConversions.seqAsJavaList(extractions);
         // for each extraction instance we can obtain a number of extraction elements
@@ -129,9 +134,4 @@ public class OpenIEExtractor implements Extractor.TagSoupDOMExtractor, Extractor
             }
         }
     }
-
-    @Override
-    public ExtractorFactory<?> getExtractorFactory() {
-      return (ExtractorFactory<?>) OpenIEExtractorFactory.getDescriptionInstance();
-    }
 }


[03/10] any23 git commit: Merge branch 'master' into ANY23-321

Posted by le...@apache.org.
Merge branch 'master' into ANY23-321


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/6660ed81
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/6660ed81
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/6660ed81

Branch: refs/heads/master
Commit: 6660ed8142c57274ce51b40af097e4cabf158d3b
Parents: b71142f 6d0606f
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Mon Jan 8 09:26:05 2018 -0500
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Mon Jan 8 09:26:05 2018 -0500

----------------------------------------------------------------------
 .../any23/cli/ExtractorDocumentationTest.java   |    2 -
 .../java/org/apache/any23/cli/RoverTest.java    |    2 -
 .../org/apache/any23/cli/SimpleRoverTest.java   |    2 -
 .../any23/extractor/csv/CSVExtractor.java       |   23 +-
 .../extractor/html/EmbeddedJSONLDExtractor.java |  363 ++--
 .../any23/extractor/html/HTMLMetaExtractor.java |   58 +-
 .../apache/any23/extractor/microdata/Item.java  |   10 +-
 .../extractor/microdata/ItemPropValue.java      |   31 +-
 .../any23/extractor/microdata/ItemScope.java    |   29 +-
 .../extractor/microdata/MicrodataExtractor.java |   35 +-
 .../extractor/microdata/MicrodataParser.java    |  136 +-
 .../any23/extractor/xpath/QuadTemplate.java     |    1 +
 .../any23/extractor/xpath/TemplateObject.java   |   39 +-
 .../any23/extractor/xpath/TemplateSubject.java  |   13 +-
 .../any23/extractor/yaml/ElementsProcessor.java |   24 +-
 .../any23/rdf/Any23ValueFactoryWrapper.java     |   61 +-
 .../java/org/apache/any23/rdf/RDFUtils.java     |   92 +-
 .../XMLValidationReportSerializer.java          |   21 +-
 .../any23/validator/rule/AboutNotURIRule.java   |    1 +
 .../validator/rule/MetaNameMisuseRule.java      |    1 +
 .../org/apache/any23/vocab/RDFSchemaUtils.java  |   24 +-
 .../any23/extractor/csv/CSVExtractorTest.java   |  178 +-
 .../html/AbstractExtractorTestCase.java         | 1592 ++++++++-------
 .../extractor/html/HCardExtractorTest.java      | 1852 +++++++++---------
 .../extractor/html/HListingExtractorTest.java   |    3 -
 .../microdata/MicrodataParserTest.java          |   81 +-
 .../any23/extractor/rdfa/RDFaExtractorTest.java |    2 -
 .../TemplateXPathExtractorRuleImplTest.java     |   24 +-
 .../any23/filter/IgnoreAccidentalRDFaTest.java  |    2 +-
 .../org/apache/any23/writer/JSONWriterTest.java |    4 -
 pom.xml                                         |    2 +-
 .../org/apache/any23/servlet/ServletTest.java   |    4 -
 ....2.1-non-normative-example-1-expected.nquads |    8 +-
 .../5.2.1-non-normative-example-1.html          |   48 +-
 ....2.1-non-normative-example-2-expected.nquads |   33 +-
 .../5.2.1-non-normative-example-2.html          |   16 +-
 .../microdata-basic-expected.properties         |    6 +-
 .../resources/microdata/microdata-basic.html    |   15 +-
 .../microdata-itemref-expected.properties       |   20 +-
 .../resources/microdata/microdata-itemref.html  |   46 +-
 .../microdata/microdata-json-serialization.json |    2 +-
 .../microdata/microdata-nested-expected.nquads  |   19 +-
 .../microdata-nested-expected.properties        |    4 +-
 .../resources/microdata/microdata-nested.html   |   33 +-
 .../microdata-richsnippet-expected.nquads       |   27 +-
 45 files changed, 2469 insertions(+), 2520 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/6660ed81/pom.xml
----------------------------------------------------------------------


[10/10] any23 git commit: Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/any23

Posted by le...@apache.org.
Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/any23


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/394d36a0
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/394d36a0
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/394d36a0

Branch: refs/heads/master
Commit: 394d36a0c15b4a5d07e808603ba108d92ff4df1c
Parents: 71bf171 66ce124
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Tue Feb 27 20:43:05 2018 -0800
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Tue Feb 27 20:43:05 2018 -0800

----------------------------------------------------------------------
 pom.xml | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/394d36a0/pom.xml
----------------------------------------------------------------------


[06/10] any23 git commit: Merge into master

Posted by le...@apache.org.
Merge into master


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/073190bd
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/073190bd
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/073190bd

Branch: refs/heads/master
Commit: 073190bd7cb948ce1faf5df7dae61eb8257416ce
Parents: 778d05e 63ffc9e
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Fri Feb 23 09:23:10 2018 -0800
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Fri Feb 23 09:23:10 2018 -0800

----------------------------------------------------------------------
 .../apache/any23/extractor/ExtractorGroup.java  |   1 +
 .../apache/any23/plugin/Any23PluginManager.java |  23 +--
 core/src/main/java/org/apache/any23/Any23.java  |   8 +-
 .../any23/extractor/ExtractorRegistryImpl.java  |  11 +-
 openie/pom.xml                                  | 152 -----------------
 .../any23/extractor/openie/OpenIEExtractor.java | 130 ---------------
 .../openie/OpenIEExtractorFactory.java          |  52 ------
 .../org.apache.any23.extractor.ExtractorFactory |   1 -
 .../any23/openie/OpenIEExtractorTest.java       |  88 ----------
 .../htmlscraper/HTMLScraperExtractor.java       |  12 +-
 plugins/integration-test/pom.xml                |   5 +
 .../java/org/apache/any23/plugin/PluginIT.java  |  11 +-
 plugins/openie/pom.xml                          | 165 +++++++++++++++++++
 .../extractor/openie/OpenIEExtractor.java       | 137 +++++++++++++++
 .../openie/OpenIEExtractorFactory.java          |  52 ++++++
 .../org.apache.any23.extractor.ExtractorFactory |   1 +
 .../any23/openie/OpenIEExtractorTest.java       |  88 ++++++++++
 pom.xml                                         |   6 +-
 service/README.md                               |  49 ++++++
 service/README.txt                              |  50 ------
 service/pom.xml                                 |  84 +++++++++-
 .../java/org/apache/any23/servlet/Servlet.java  |  48 +++++-
 service/src/main/resources/form.html            |  59 ++++++-
 .../main/webapp/resources/js/bootstrap-modal.js |  22 ++-
 src/site/apt/any23-plugins.apt                  |   9 +-
 25 files changed, 734 insertions(+), 530 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/073190bd/plugins/integration-test/pom.xml
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/any23/blob/073190bd/plugins/openie/pom.xml
----------------------------------------------------------------------
diff --cc plugins/openie/pom.xml
index 0000000,0f34ad5..64c6806
mode 000000,100644..100644
--- a/plugins/openie/pom.xml
+++ b/plugins/openie/pom.xml
@@@ -1,0 -1,165 +1,165 @@@
+ <?xml version="1.0" encoding="UTF-8"?>
+ <!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+ 
+    http://www.apache.org/licenses/LICENSE-2.0
+ 
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+ -->
+ <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+   <modelVersion>4.0.0</modelVersion>
+ 
+   <parent>
+     <groupId>org.apache.any23</groupId>
+     <artifactId>apache-any23</artifactId>
 -    <version>2.3-SNAPSHOT</version>
++    <version>2.2-SNAPSHOT</version>
+     <relativePath>../../pom.xml</relativePath>
+   </parent>
+ 
+   <groupId>org.apache.any23.plugins</groupId>
+   <artifactId>apache-any23-openie</artifactId>
+ 
+   <name>Apache Any23 :: Plugins :: OpenIE</name>
+   <description>Open Information Extraction module.</description>
+ 
+   <repositories>
+     <repository>
+       <snapshots>
+         <enabled>false</enabled>
+       </snapshots>
+       <id>bintray-allenai-maven</id>
+       <name>bintray</name>
+       <url>http://allenai.bintray.com/maven</url>
+     </repository>
+   </repositories>
+   <pluginRepositories>
+     <pluginRepository>
+       <snapshots>
+         <enabled>false</enabled>
+       </snapshots>
+       <id>bintray-allenai-maven</id>
+       <name>bintray-plugins</name>
+       <url>http://allenai.bintray.com/maven</url>
+     </pluginRepository>
+   </pluginRepositories>
+ 
+   <dependencies>
+     <dependency>
+       <groupId>org.apache.any23</groupId>
+       <artifactId>apache-any23-core</artifactId>
+       <version>${project.version}</version>
+       <scope>provided</scope>
+     </dependency>
+     <dependency>
+       <groupId>org.apache.any23</groupId>
+       <artifactId>apache-any23-test-resources</artifactId>
+       <version>${project.version}</version>
+       <scope>test</scope>
+       <type>test-jar</type>
+     </dependency>
+     <dependency>
+       <groupId>org.allenai.openie</groupId>
+       <artifactId>openie_2.11</artifactId>
+       <version>${openie_2.11.version}</version>
+       <scope>compile</scope>
+     </dependency>
+     <dependency>
+       <groupId>org.allenai.openie</groupId>
+       <artifactId>openie_2.11</artifactId>
+       <version>${openie_2.11.version}</version>
+       <scope>compile</scope>
+       <type>pom</type>
+     </dependency>
+     <dependency>
+       <groupId>edu.washington.cs.knowitall</groupId>
+       <artifactId>openregex</artifactId>
+       <version>${openregex.version}</version>
+       <scope>runtime</scope>
+     </dependency>
+     <dependency>
+       <groupId>junit</groupId>
+       <artifactId>junit</artifactId>
+       <scope>test</scope>
+     </dependency>
+     <dependency>
+       <groupId>org.slf4j</groupId>
+       <artifactId>slf4j-log4j12</artifactId>
+       <scope>test</scope>
+     </dependency>
+   </dependencies>
+ 
+   <build>
+     <plugins>
+       <plugin>
+         <groupId>org.apache.maven.plugins</groupId>
+         <artifactId>maven-surefire-plugin</artifactId>
+         <configuration>
+           <skipTests>true</skipTests>
+         </configuration>
+       </plugin>
+       <!-- Generates the distribution package -->
+       <plugin>
+         <groupId>org.apache.maven.plugins</groupId>
+         <artifactId>maven-assembly-plugin</artifactId>
+         <configuration>
+           <appendAssemblyId>false</appendAssemblyId>
+           <descriptors>
+             <descriptor>${basedir}/src/main/assembly/bin.xml</descriptor>
+           </descriptors>
+         </configuration>
+       </plugin>
+     </plugins>
+     <pluginManagement>
+       <plugins>
+         <plugin>
+           <groupId>org.apache.maven.plugins</groupId>
+           <artifactId>maven-assembly-plugin</artifactId>
+           <version>${maven-assembly-plugin.version}</version>
+           <executions>
+             <execution>
+               <id>assembly</id>
+               <phase>package</phase>
+               <goals>
+                 <goal>single</goal>
+               </goals>
+             </execution>
+           </executions>
+           <configuration>
+             <attach>true</attach>
+             <skipAssembly>true</skipAssembly>
+             <tarLongFileMode>gnu</tarLongFileMode>
+           </configuration>
+         </plugin>
+       </plugins>
+     </pluginManagement>
+   </build>
+ 
+   <profiles>
+     <profile>
+       <id>release</id>
+       <build>
+         <resources>
+           <resource>
+             <directory>${basedir}/../</directory>
+             <targetPath>${project.build.directory}/apidocs/META-INF</targetPath>
+             <includes>
+               <include>LICENSE.txt</include>
+               <include>NOTICE.txt</include>
+             </includes>
+           </resource>
+         </resources>
+       </build>
+     </profile>
+ 
+   </profiles>
+ 
+ </project>

http://git-wip-us.apache.org/repos/asf/any23/blob/073190bd/pom.xml
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/any23/blob/073190bd/service/pom.xml
----------------------------------------------------------------------