You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by le...@apache.org on 2019/10/01 03:35:20 UTC

[any23-plugins] 01/01: ANY23-448 Move service and plugins out of core

This is an automated email from the ASF dual-hosted git repository.

lewismc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/any23-plugins.git

commit 625e5e197ff8f5876334f36dad972e0b3013591e
Author: Lewis John McGibbney <le...@gmail.com>
AuthorDate: Mon Sep 30 20:35:08 2019 -0700

    ANY23-448 Move service and plugins out of core
---
 .gitignore                                         |    1 +
 CONTRIBUTING.md                                    |  129 +++
 LICENSE.md                                         |  271 +++++
 NOTICE.txt                                         |   27 +
 README.md                                          |   66 ++
 RELEASE-NOTES.txt                                  |  677 +++++++++++
 basic-crawler/.classpath                           |  135 +++
 basic-crawler/.project                             |   27 +
 .../.settings/org.eclipse.core.resources.prefs     |    6 +
 basic-crawler/.settings/org.eclipse.jdt.core.prefs |    6 +
 basic-crawler/.settings/org.eclipse.m2e.core.prefs |    4 +
 basic-crawler/pom.xml                              |  206 ++++
 .../src/main/assembly/LICENSE-with-deps.txt        |  251 +++++
 .../src/main/assembly/NOTICE-with-deps.txt         |   12 +
 basic-crawler/src/main/assembly/README.txt         |   77 ++
 basic-crawler/src/main/assembly/bin.xml            |   69 ++
 .../main/java/org/apache/any23/cli/Crawler.java    |  160 +++
 .../any23/plugin/crawler/CrawlerListener.java      |   36 +
 .../any23/plugin/crawler/DefaultWebCrawler.java    |   73 ++
 .../apache/any23/plugin/crawler/SharedData.java    |  121 ++
 .../apache/any23/plugin/crawler/SiteCrawler.java   |  270 +++++
 .../apache/any23/plugin/crawler/package-info.java  |   22 +
 .../META-INF/services/org.apache.any23.cli.Tool    |    1 +
 .../java/org/apache/any23/cli/CrawlerTest.java     |   97 ++
 .../any23/plugin/crawler/SiteCrawlerTest.java      |   83 ++
 basic-crawler/src/test/resources/log4j.properties  |   23 +
 html-scraper/.classpath                            |  200 ++++
 html-scraper/.project                              |   24 +
 .../.settings/org.eclipse.core.resources.prefs     |    6 +
 html-scraper/.settings/org.eclipse.jdt.core.prefs  |    6 +
 html-scraper/.settings/org.eclipse.m2e.core.prefs  |    4 +
 html-scraper/pom.xml                               |  124 +++
 .../src/main/assembly/LICENSE-with-deps.txt        |  212 ++++
 .../src/main/assembly/NOTICE-with-deps.txt         |    9 +
 html-scraper/src/main/assembly/README.txt          |   77 ++
 html-scraper/src/main/assembly/bin.xml             |   67 ++
 .../plugin/htmlscraper/HTMLScraperExtractor.java   |  140 +++
 .../htmlscraper/HTMLScraperExtractorFactory.java   |   42 +
 .../any23/plugin/htmlscraper/package-info.java     |   22 +
 .../org.apache.any23.extractor.ExtractorFactory    |    1 +
 .../htmlscraper/HTMLScraperExtractorTest.java      |   87 ++
 .../htmlscraper/html-scraper-extractor-test.html   |  493 +++++++++
 integration-test/pom.xml                           |  174 +++
 .../java/org/apache/any23/plugin/PluginIT.java     |  128 +++
 .../src/test/resources/log4j.properties            |   23 +
 office-scraper/.classpath                          |  199 ++++
 office-scraper/.project                            |   24 +
 .../.settings/org.eclipse.core.resources.prefs     |    6 +
 .../.settings/org.eclipse.jdt.core.prefs           |    6 +
 .../.settings/org.eclipse.m2e.core.prefs           |    4 +
 office-scraper/pom.xml                             |  114 ++
 .../src/main/assembly/LICENSE-with-deps.txt        |  212 ++++
 .../src/main/assembly/NOTICE-with-deps.txt         |    6 +
 office-scraper/src/main/assembly/README.txt        |   77 ++
 office-scraper/src/main/assembly/bin.xml           |   67 ++
 .../any23/plugin/officescraper/ExcelExtractor.java |  185 ++++
 .../officescraper/ExcelExtractorFactory.java       |   64 ++
 .../any23/plugin/officescraper/package-info.java   |   22 +
 .../main/java/org/apache/any23/vocab/Excel.java    |  143 +++
 .../org.apache.any23.extractor.ExtractorFactory    |    1 +
 .../plugin/officescraper/ExcelExtractorTest.java   |  130 +++
 .../plugin/officescraper/XSSFWorkbookTest.java     |   96 ++
 office-scraper/src/test/resources/log4j.properties |   34 +
 .../any23/plugin/officescraper/test1-workbook.xlsx |  Bin 0 -> 29878 bytes
 .../any23/plugin/officescraper/test2-workbook.xls  |  Bin 0 -> 25088 bytes
 openie/.classpath                                  |  239 ++++
 openie/.project                                    |   25 +
 openie/.settings/org.eclipse.core.resources.prefs  |    5 +
 openie/.settings/org.eclipse.jdt.core.prefs        |    6 +
 openie/.settings/org.eclipse.m2e.core.prefs        |    4 +
 openie/pom.xml                                     |  164 +++
 .../plugin/extractor/openie/OpenIEExtractor.java   |  149 +++
 .../extractor/openie/OpenIEExtractorFactory.java   |   52 +
 .../org.apache.any23.extractor.ExtractorFactory    |    1 +
 .../apache/any23/openie/OpenIEExtractorTest.java   |   88 ++
 pom.xml                                            | 1170 ++++++++++++++++++++
 76 files changed, 7982 insertions(+)

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..eb5a316
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+target
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..dc7a517
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,129 @@
+Contributing to Apache Any23 
+============================
+
+Summary
+-------
+This document covers how to contribute to the Any23 project. Any23 uses github PRs to manage code contributions and project manages source code development through the [Any23 JIRA instance](https://issues.apache.org/jira/browse/ANY23). 
+These instructions assume you have a GitHub.com account, so if you don't have one you will have to create one. Your proposed code changes will be published to your own fork of the Any23 project and you will submit a Pull Request for your changes to be added.
+
+_Lets get started!!!_
+
+Bug fixes
+---------
+
+It's very important that we can easily track bug fix commits, so their hashes should remain the same in all branches. 
+Therefore, a pull request (PR) that fixes a bug, should be sent against a release branch. 
+This can be either the "current release" or the "previous release", depending on which ones are maintained. 
+Since the goal is a stable master, bug fixes should be "merged forward" to the next branch in order: "previous release" -> "current release" -> master (in other words: old to new)
+
+Developing new features
+-----------------------
+
+Development should be done in a feature branch, branched off of master. 
+Send a PR(steps below) to get it into master (2x LGTM applies). 
+PR will only be merged when master is open, will be held otherwise until master is open again. 
+No back porting / cherry-picking features to existing branches!
+
+Fork the code 
+-------------
+
+In your browser, navigate to: [https://github.com/apache/any23](https://github.com/apache/any23)
+
+Fork whichever repository you wish to contribute to by clicking on the 'Fork' button on the top right hand side. The fork will happen and you will be taken to your own fork of the repository.  Copy the Git repository URL by clicking on the clipboard next to the URL on the right hand side of the page under '**HTTPS** clone URL'.  You will paste this URL when doing the following `git clone` command.
+
+On your computer, follow these steps to setup a local repository for working on ACS:
+
+``` bash
+$ git clone https://github.com/YOUR_ACCOUNT/any23.git
+$ cd any23
+$ git remote add upstream https://github.com/apache/any23.git
+$ git checkout master
+$ git fetch upstream
+$ git rebase upstream/master
+```
+
+Making changes
+--------------
+
+It is important that you create a new branch to make changes on and that you do not change the `master` branch (other than to rebase in changes from `upstream/master`).  In this example we will assume you will be making your changes to a branch called `ANY23-XXX`.  This `ANY23-XXX` is named after the issue you have created within the [Any23 JIRA instance](https://issues.apache.org/jira/browse/ANY23). Therefore `ANY23-XXX` will be created on your local repository and will be pushed to you [...]
+
+It is best practice to create a new branch each time you want to contribute to the project and only track the changes for that pull request in this branch.
+
+``` bash
+$ git checkout -b ANY23-XXX
+   (make your changes)
+$ git status
+$ git add .
+$ git commit -a -m "ANY23-XXX Descriptive title of ANY23-XXX"
+```
+
+> The `-b` specifies that you want to create a new branch called `ANY23-XXX`.  You only specify `-b` the first time you checkout because you are creating a new branch.  Once the `ANY23-XXX` branch exists, you can later switch to it with only `git checkout ANY23-XXX`.
+> Note that the commit message comprises the JIRA issue number and title... this makes explicit reference between Github and JIRA for improved project management.
+
+
+Rebase `ANY23-XXX` to include updates from `upstream/master`
+------------------------------------------------------------
+
+It is important that you maintain an up-to-date `master` branch in your local repository.  This is done by rebasing in the code changes from `upstream/master` (the official Any23 project repository) into your local repository.  You will want to do this before you start working on a feature as well as right before you submit your changes as a pull request.  We recommend you do this process periodically while you work to make sure you are working off the most recent project code.
+
+This process will do the following:
+
+1. Checkout your local `master` branch
+2. Synchronize your local `master` branch with the `upstream/master` so you have all the latest changes from the project
+3. Rebase the latest project code into your `ANY23-XXX` branch so it is up-to-date with the upstream code
+
+``` bash
+$ git checkout master
+$ git fetch upstream
+$ git rebase upstream/master
+$ git checkout ANY23-XXX
+$ git rebase master
+```
+
+> Now your `ANY23-XXX` branch is up-to-date with all the code in `upstream/master`.
+
+
+Make a GitHub Pull Request to contribute your changes
+-----------------------------------------------------
+
+When you are happy with your changes and you are ready to contribute them, you will create a Pull Request on GitHub to do so. This is done by pushing your local changes to your forked repository (default remote name is `origin`) and then initiating a pull request on GitHub.
+
+Please include JIRA id, detailed information about the bug/feature, what all tests are executed, how the reviewer can test this feature etc. Incase of UI PRs, a screenshot is preferred.
+
+> **IMPORTANT:** Make sure you have rebased your `ANY23-XXX` branch to include the latest code from `upstream/master` _before_ you do this.
+
+``` bash
+$ git push origin master
+$ git push origin ANY23-XXX
+```
+
+Now that the `ANY23-XXX` branch has been pushed to your GitHub repository, you can initiate the pull request.  
+
+To initiate the pull request, do the following:
+
+1. In your browser, navigate to your forked repository: [https://github.com/YOUR_ACCOUNT/any23](https://github.com/YOUR_ACCOUNT/any23).
+2. Click the new button called '**Compare & pull request**' that showed up just above the main area in your forked repository
+3. Validate the pull request will be into the upstream `master` and will be from your `ANY23-XXX` branch
+4. Enter a detailed description of the work you have done and then click '**Send pull request**'
+
+If you are requested to make modifications to your proposed changes, make the changes locally on your `ANY23-XXX` branch, re-push the `ANY23-XXX` branch to your fork.  The existing pull request should automatically pick up the change and update accordingly.
+
+
+Cleaning up after a successful pull request
+-------------------------------------------
+
+Once the `ANY23-XXX` branch has been committed into the `upstream/master` branch, your local `ANY23-XXX` branch and the `origin/ANY23-XXX` branch are no longer needed.  If you want to make additional changes, restart the process with a new branch.
+
+> **IMPORTANT:** Make sure that your changes are in `upstream/master` before you delete your `ANY23-XXX` and `origin/ANY23-XXX` branches!
+
+You can delete these deprecated branches with the following:
+
+``` bash
+$ git checkout master
+$ git branch -D ANY23-XXX
+$ git push origin :ANY23-XXX
+```
+
+Release Principles
+------------------
+See the [HowTo Release Apache Any23](http://any23.apache.org/release-howto.html) guide.
\ No newline at end of file
diff --git a/LICENSE.md b/LICENSE.md
new file mode 100644
index 0000000..ac9bdec
--- /dev/null
+++ b/LICENSE.md
@@ -0,0 +1,271 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+   
+----------------------------------------------------------------------------------
+
+The following files contain code from the jQuery Foundation (http://jquery.com)
+
+./service/src/main/resources/form.html
+./service/src/main/webapp/resources/js/bootstrap-modal.js
+./service/src/main/webapp/resources/js/jquery-1.7.2.min.js
+./plugins/html-scraper/target/test-classes/org/apache/any23/plugin/htmlscraper/html-scraper-extractor-test.html
+./test-resources/src/test/resources/application/xhtml/blank-file-header.xhtml
+./test-resources/src/test/resources/microformats/hcard/infinite-loop.html
+./test-resources/src/test/resources/microformats/hcard/linkedin-michelemostarda.html
+./test-resources/src/test/resources/html/rdfa/drupal-test-frontpage.html
+./test-resources/src/test/resources/html/rdfa/drupal-test-frontpage.html
+
+    Copyright 2013 jQuery Foundation and other contributors
+    http://jquery.com/
+
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal in the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, sublicense, and/or sell copies of the Software, and to
+    permit persons to whom the Software is furnished to do so, subject to
+    the following conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+    LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+    OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+    
+----------------------------------------------------------------------------------------
+
+This software contains inclusions of source from Eclipse RDF4J which is licensed
+under the Eclipse Distribution License (a BSD-style license) 
+
+   Copyright (c) 2007, Eclipse Foundation, Inc. and its licensors.
+   Copyright Aduna (http://www.aduna-software.com/) 2001-2013
+
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+
+   * Redistributions of source code must retain the above copyright notice, this
+     list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above copyright notice,
+     this list of conditions and the following disclaimer in the documentation
+     and/or other materials provided with the distribution.
+   * Neither the name of the Eclipse Foundation, Inc. nor the names of its
+     contributors may be used to endorse or promote products derived from this
+     software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+   ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+   WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+   DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+   ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+   LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+   ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/NOTICE.txt b/NOTICE.txt
new file mode 100644
index 0000000..ebabe12
--- /dev/null
+++ b/NOTICE.txt
@@ -0,0 +1,27 @@
+Apache Any23
+Copyright 2011-2019 The Apache Software Foundation
+Copyright 2008-2011 Digital Enterprise Research Institute (DERI)
+
+This product includes software developed by
+The Apache Software Foundation (http://www.apache.org/).
+
+This product includes software developed by the jQuery 
+Foundation (http://jquery.org/) under an MIT license.
+
+This product includes software developed by Eclipse RDF4J
+(http://rdf4j.org/) under the Eclipse Distribution License v1.0.
+
+This product includes software developed by Andrey Somov
+(https://bitbucket.org/asomov/snakeyaml) under the Apache License
+v2.0
+
+This product includes software developed by The University of 
+Washington (UW), Professor Oren Etzioni, Professor Mausam,
+Michael Schmitz, (Developers) Open IE 4 Software
+(C) 2011-2012, University of Washington.  All rights reserved.
+US patent number 7,877,343 and 12/970,155 patent pending
+(https://github.com/allenai/openie-standalone) under the Open IE 4 
+Software License Agreement
+
+This product includes software developed by Hans Brende
+(https://github.com/HansBrende/f8) under an MIT license.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..26c4091
--- /dev/null
+++ b/README.md
@@ -0,0 +1,66 @@
+# Any23 Plugins
+
+This is the root dir of the Any23 Plugins module.
+
+A plugin is an extension of the Any23 core and can be plugged using
+the Plugin Manager capabilities.
+
+# Plugins
+
+## basic-crawler
+
+A CLI tool which extends the Rover CLI adding crawler specific
+capabilities.
+
+## html-scraper
+
+The HTML scraper is able to convert any HTML page to triples
+containing the text scraped from the page.
+
+## office-scraper
+
+The Office scraper is able to convert the main MS Office compatible
+formats and convert them to triples.
+
+## integration-test
+
+This module contains the integration tests for all the defined plugins.
+
+# Generate Plugin Packaging
+
+To generate the desired plugin package, navigate to the plugin directory and execute 
+```
+mvn package
+```
+e.g. to generate the basic-crawler plugin package
+```
+$cd $ANY23-HOME/plugins/basic-crawler
+$ mvn package
+```
+From the basic-crawler directory this generates
+```
+.
+|-- pom.xml
+|-- src
+|   |-- main
+|   |   |-- assembly
+|   |   `-- java
+|   `-- test
+`-- target
+    |-- any23-basic-crawler-${version}.jar
+    |-- apache-any23-basic-crawler-${version}-bin.tar.gz <<<
+    |-- apache-any23-basic-crawler-${version}-bin.zip <<<
+    |-- archive-tmp
+    |-- classes
+    |   |-- META-INF
+    |   `-- org
+    |-- generated-sources
+    |-- maven-archiver
+    |-- maven-shared-archive-resources
+    |-- surefire
+    |-- surefire-reports
+    `-- test-classes
+...
+```
+Plugin specific README's can be found in either ./target/*.tar.gz || ./target/*.zip (annotated above with '<<<'), where much more detailed information sources can be located.
+  
diff --git a/RELEASE-NOTES.txt b/RELEASE-NOTES.txt
new file mode 100644
index 0000000..44c4c4a
--- /dev/null
+++ b/RELEASE-NOTES.txt
@@ -0,0 +1,677 @@
+             Apache Any23 2.3
+              Release Notes
+           10/02/2019 (dd/mm/yyy)
+
+Sub-task
+
+    [ANY23-184] - Update Javadoc in o.a.a.extractor.microdata.*
+    [ANY23-356] - Update dependencies
+    [ANY23-357] - Resolve mockito deprecation warnings
+    [ANY23-358] - Resolve junit.framework deprecation warnings & RDFa11Parser deprecation warnings
+    [ANY23-359] - Resolve org.apache.commons.io.IOUtils deprecation warning
+    [ANY23-360] - Resolve Xerces deprecation warnings
+    [ANY23-361] - Resolve Tika deprecation warning
+    [ANY23-362] - Resolve rdf4j deprecation warnings
+    [ANY23-363] - Update httpclient/httpcore to version 4.5.6/4.4.10
+    [ANY23-364] - Resolve POI deprecation warnings
+    [ANY23-365] - Resolve additional warnings
+    [ANY23-366] - Resolve additional warnings in build
+    [ANY23-369] - Resolve overlapping classes
+    [ANY23-388] - It should be possible to configure the NTriplesWriter to use unicode points
+    [ANY23-404] - Make MicrodataExtractor compliant with default registry
+    [ANY23-405] - Parse microdata property values correctly
+    [ANY23-407] - Allow microdata itemids to be created from relative URLs
+    [ANY23-408] - Use document IRI as default namespace in microdata strict mode
+    [ANY23-409] - Allow multiple microdata itemtype values
+    [ANY23-410] - Fix microdata itemrefs
+
+Bug
+
+    [ANY23-13] - Verify why the maven-changelog-plugin doesn't work properly
+    [ANY23-16] - Property URI generation for Microdata/schema.org
+    [ANY23-17] - problem detecting media type for turtle content with comment at the top
+    [ANY23-55] - any23 is not following the redirection
+    [ANY23-67] - Microdata extraction using obsolete RDF conversion scheme
+    [ANY23-154] - Not able to extract microdata in few test cases
+    [ANY23-167] - Microdata itemscope properties incorrectly attached
+    [ANY23-169] - Incorrect interpretation of relative and absolute paths with Microdata
+    [ANY23-188] - NPE when ICBMExtractor#getDescription()#getExtractorLabel() called
+    [ANY23-237] - Fix RDFa test 0087: stylesheet reserved word is stripped out
+    [ANY23-245] - Infinite loop on some malformed markup
+    [ANY23-322] - Any23 embedded service is broken
+    [ANY23-329] - master branch broken with pom.xml any23 version
+    [ANY23-331] - Tool service implementations declared in wrong module?
+    [ANY23-334] - SingleDocumentExtraction.createExtractionContext() uses UUID as defaultLanguage
+    [ANY23-336] - Parsing json-ld content takes prohibitively long time
+    [ANY23-337] - BenchmarkTripleHandler does not report accurate extraction interval times
+    [ANY23-338] - Json-ld comment parsing fails in rare cases
+    [ANY23-339] - Microdata extractor can sometime merge two different itemscopes into one
+    [ANY23-340] - Any23 extraction does not pass Nutch plugin test
+    [ANY23-344] - MicrodataExtractor not resolving urls correctly
+    [ANY23-345] - MicrodataExtractorTest has a duplicated test
+    [ANY23-346] - rdf4j versions 2.3.0, 2.3.1 contain a regression: we need to switch back to version 2.2.4
+    [ANY23-347] - RDFParseException: the prefix "pw" is not bound
+    [ANY23-348] - IllegalArgumentException in MicrodataExtractor
+    [ANY23-349] - MicrodataExtractor errors for links that are telephone numbers
+    [ANY23-350] - RDFParseException: "icon" must be followed by ' = ' character
+    [ANY23-351] - NullPointerException in HCardExtractor
+    [ANY23-353] - RDFParseException: datatype rdf:langString requires a language tag
+    [ANY23-367] - latest.stable.released property is never used and out of date
+    [ANY23-368] - Jenkins builds are failing after running out of disk space
+    [ANY23-372] - LGPL-licensed transitive dependency
+    [ANY23-373] - Web page /install.html: software version variable was not decoded.
+    [ANY23-376] - IllegalArgumentException: invalid property name ''
+    [ANY23-377] - Microdata extractor replaces empty strings with "Null"
+    [ANY23-378] - JsonParseException caused by trailing commas in JSON-LD
+    [ANY23-379] - RDFa SAXParseException: invalid XML character
+    [ANY23-380] - RDFa SAXParseException: attribute was already specified
+    [ANY23-381] - JsonParseException: Illegal unquoted character
+    [ANY23-382] - Distinguish between fatal and recoverable json-ld parsing errors
+    [ANY23-383] - JsonParseException: Unexpected character 0x2028
+    [ANY23-386] - Item's properties are in the wrong item since the 2.2
+    [ANY23-387] - Possible OutOfMemoryError with bad deeply nested HTML
+    [ANY23-389] - RDFa extraction breaks when base element uses relative href
+    [ANY23-391] - ICAL vocab uses class "vcalendar" instead of "Vcalendar"
+    [ANY23-392] - Lunching maven-jetty-plugin: Problem accessing /apache-any23-service/resources/form.html
+    [ANY23-395] - any23.org 500 Internal Server Error
+    [ANY23-406] - Cannot suppress Tika warnings
+    [ANY23-411] - Use Content-Type to help determine encoding
+    [ANY23-415] - NTriplesExtractor tries all text/plain files, causing numerous fatal issues
+    [ANY23-416] - NTriplesExtractor does not recognize "application/n-triples" mimetype
+    [ANY23-420] - Handle Json+ld extraction failure
+    [ANY23-425] - iCal, jCal, xCal extractors aren't listed in META-INF/services
+
+New Feature
+
+    [ANY23-81] - Interactive web service
+
+Improvement
+
+    [ANY23-38] - Use a single logging tool: slf4j
+    [ANY23-190] - any23.org homepage busted on IE11
+    [ANY23-212] - Improve naming convention for service output files
+    [ANY23-215] - Forward slashes in URL's should not be escaped in RDF output
+    [ANY23-231] - Make JSON Reporting output pretty print
+    [ANY23-240] - Option to process html tags as spaces in Microdata
+    [ANY23-323] - Update Eclipse RDF4J version to 2.3
+    [ANY23-332] - Plugin-specific properties shouldn't be declared in default-configuration.properties
+    [ANY23-341] - Remove dependency on defunct commons-httpclient 3.1
+    [ANY23-343] - Upgrade to jsonld-java v 0.12.0
+    [ANY23-352] - Update to rdf4j version 2.3.2
+    [ANY23-354] - Clean up dependencies
+    [ANY23-355] - Deprecate RDFa11Parser since Rio implementations are used instead
+    [ANY23-374] - Invalid nested item takes out everything
+    [ANY23-385] - Improve charset detection for (x)html documents
+    [ANY23-390] - Implement ICal, JCal, XCal extractors
+    [ANY23-393] - Any23 master to build under JDK 10.X
+    [ANY23-394] - JSON-LD Extractions Flag Errors in Google's Structured Data Tooling
+    [ANY23-396] - Overhaul WriterFactory API
+    [ANY23-399] - Upgrade Apache parent POM to version 21
+    [ANY23-401] - Upgrade to Tika 1.19.1
+    [ANY23-402] - Deprecate JSONWriter, JSONWriterFactory
+    [ANY23-403] - Upgrade to RDF4J 2.4.0
+    [ANY23-414] - Support reverse itemprops in microdata
+    [ANY23-418] - Take another look at encoding detection
+    [ANY23-419] - Add J2EE depednencies such that service runs under JDK11
+    [ANY23-424] - Update dependencies
+
+Test
+
+    [ANY23-422] - Error message when any23 cli tool used
+
+Task
+
+    [ANY23-333] - Augment use of Any23PluginManager in How to Register a Plugin documentation
+    [ANY23-423] - Update POM for the move to gitbox.
+
+             Apache Any23 2.2
+              Release Notes
+              25/01/2018 (dd/mm/yyy)
+
+Sub-task
+
+    [ANY23-155] - Test failure: testRunOnHTTPResource(org.apache.any23.cli.MicrodataParserTest)
+    [ANY23-267] - Entire extractions fail due to "The element type 'meta' must be terminated by the matching end-tag </meta>"
+    [ANY23-268] - Entire extraction task fails due to "Element type "t.length" must be followed by either attribute specifications, ">" or "/>"
+
+Bug
+
+    [ANY23-12] - character are wrongly encoded in rdfxml output
+    [ANY23-131] - Nested Microdata are not extracted
+    [ANY23-140] - Revise Any23 tests to remove fetching of web content
+    [ANY23-166] - Parsing crashes with attributes that don't use quotes
+    [ANY23-201] - Service Regularly Times Out on DBPedia Queries
+    [ANY23-227] - not extracting opengraph rdfa
+    [ANY23-228] - Invalid URI
+    [ANY23-230] - any23.org redirects to single slash URI
+    [ANY23-256] - MicrodataParserTest failing locally but not on Jenkins
+    [ANY23-260] - Get Any23 listed as an Application capable of using DBPedia
+    [ANY23-266] - Fix Issues with Failing WebService Examples
+    [ANY23-271] - Address "...The entity "raquo" was referenced, but not declared" SAXParseException
+    [ANY23-273] - The content of elements must consist of well-formed character data or markup - no bogus comments
+    [ANY23-303] - JsonLdError: loading remote context failed: http://schema.org/
+    [ANY23-306] - Absent binaries for version 2.0
+    [ANY23-312] - Triple sub-pred-null should not be added into outcome. Change traversing method.
+    [ANY23-314] - Service fails to return extraction in case of extraction error
+    [ANY23-316] - Yaml parser does not halndle intentional null value
+    [ANY23-317] - Any23 fails when dealing with JavaScript
+    [ANY23-318] - ExtractionException handling in BaseRDFExtractor.java kills entire extraction
+    [ANY23-326] - parsing unclosed meta and input tags fails
+
+New Feature
+
+    [ANY23-8] - Write a separate tool for RDFa/microformat detection tool usable in crawlers
+    [ANY23-233] - Add local extraction cache to Any23 service
+
+Improvement
+
+    [ANY23-106] - Gracefully shut down Any23 service
+    [ANY23-213] - Implement JSOn reporting for the Any23 service
+    [ANY23-214] - ë (e-umlaut or diaeresis) not decoded in RDF output
+    [ANY23-249] - Update all W3C and other Standards Compliance within Any23
+    [ANY23-280] - Refactor ContentExtractor to improve extraction flexibility
+    [ANY23-291] - JSON-LD should be looked up in entire HTML document, not just in <head>
+    [ANY23-298] - Revisit the OGP.java vocabulary and update it
+    [ANY23-309] - "Scraper" misspelled as "Scarper" on Downloads webpage
+    [ANY23-319] - Upgrade jsonld-java dependency to 0.11.1
+    [ANY23-324] - Replace net.sourceforge.nekohtml with jsoup
+    [ANY23-325] - Any23 incompatible with http://rdfa.info/test-suite/#
+
+Test
+
+    [ANY23-320] - Address @Ignore tests in Any23
+
+Wish
+
+    [ANY23-210] - Address 1.0 Release Review Discrepancies
+
+Task
+
+    [ANY23-40] - Complete Documentation for Plugin Management system
+
+
+			 Apache Any23 2.1
+			  Release Notes
+		      14/09/2017 (dd/mm/yyy)
+
+Bug
+
+    [ANY23-244] - Broken Links on Web-Site
+    [ANY23-282] - Replacement for all Sindice namespaces and URI's
+    [ANY23-304] - Add extractor for OpenIE
+    [ANY23-305] - Missing appender in command line tool
+    [ANY23-308] - Adding option "-d" to yaml file parsing gives error
+    [ANY23-310] - Rover displays wrong statistical values
+
+Improvement
+
+    [ANY23-206] - Overhaul Any23 site documentation
+    [ANY23-301] - Forward all logs into STDERR stream
+
+New Feature
+
+    [ANY23-257] - Support OWL as an input format
+
+Task
+
+    [ANY23-283] - access to analysis.apache.org
+
+			 Apache Any23 2.0
+			  Release Notes
+		      03/02/2017 (dd/mm/yyy)
+Sub-task
+
+    [ANY23-243] - Overhaul and update README.txt
+
+Bug
+
+    [ANY23-79] - No execute permissions in command line tool
+    [ANY23-92] - NQuadsParser does not require whitespace between elements
+    [ANY23-99] - NQuadsWriter should force ASCII in OutputStream constructor
+    [ANY23-153] - Automatically Generate EARL reports for Any23 RDF Parsers
+    [ANY23-176] - DOC: Apache Any23 Installation Guide
+    [ANY23-200] - Build revision is not correctly defined
+    [ANY23-219] - rover is does not work with -f nquads option
+    [ANY23-235] - NQuads links broken on Supported Formats Page
+    [ANY23-236] - Port Any23 site to Apache CMS
+    [ANY23-248] - NTriplesWriter on hadoop : issue with MIME type/Upgrade sesame dependencies to 2.7.14
+    [ANY23-252] - JSON-LD format MIME type is not detected
+    [ANY23-253] - JSON-LD cannot be processed by Rover
+    [ANY23-255] - apache-any23-quads dependency should not be <scope> test in core pom.xml
+    [ANY23-265] - ThreadSafety issue in ItemPropValue
+    [ANY23-272] - Service fails to start with any23server.bat
+    [ANY23-277] - Any23 master branch will not build to to build due to lacking maven-assembly-plugin
+    [ANY23-279] - Fix EmbeddedJSONLDExtractor ExtractorDescription getDescription() implementation
+    [ANY23-296] - Tar complains about groupid value being too big
+    [ANY23-302] - rover JSON output is not valid
+
+Improvement
+
+    [ANY23-80] - Split out command line tools into a separate module
+    [ANY23-163] - VocabPrinter tool broken with No writer factory available for RDF format N-Quads (mimeTypes=text/x-nquads; ext=nq)
+    [ANY23-185] - Add missing <meta> element attributes to HTMLMetaExtractor
+    [ANY23-207] - Implement Microformats2
+    [ANY23-246] - Add Open Graph Protocol and Facebook prefixes to popular.prefixes
+    [ANY23-247] - FIX Attribute name "itemscope" associated with an element type "html" must be followed by the ' = ' character.
+    [ANY23-250] - Upgrade to Tika 1.7
+    [ANY23-261] - Tiny typo in Data Extraction documentation source example
+    [ANY23-263] - Upgrade to Tika 1.14
+    [ANY23-274] - Change any23.microdata.ns.default configuration value to http://schema.org
+    [ANY23-276] - Upgrade sesame dependencies to RDF4J
+    [ANY23-278] - Upgrade all Maven plugin versions in parent pom.xml
+    [ANY23-293] - Package log4j configuration with core appassembler
+    [ANY23-297] - Any23 doesn't build under JDK1.8
+    [ANY23-299] - Missing YAML to RDF parser
+    [ANY23-300] - Ignore NetBeans configuration files
+
+Task
+
+    [ANY23-141] - Upgrade OpenRDF Sesame to 2.7.0
+    [ANY23-242] - Address issues with 1.1 #1 RC
+
+Wish
+
+    [ANY23-19] - Abstract away any specific RDF APIs
+    [ANY23-226] - Extract JSON-LD embedded in HTML
+
+                         Apache Any23 1.1
+                          Release Notes
+                      15/10/2014 (dd/mm/yyyy)
+Bug
+
+    [ANY23-205] - Remove xrefs from Any23 site and replave with Git(hub) links
+    [ANY23-220] - Run crawler plugin on Apache Any23 site
+    [ANY23-234] - No writer factory available for RDF format N-Quads (mimeTypes=text/x-nquads; ext=nq)
+
+Improvement
+
+    [ANY23-157] - Update Any23 site to accommodate move to Git.
+    [ANY23-197] - Extract embedded json-ld from html documents
+    [ANY23-204] - fix url encoding problem : PR#3
+    [ANY23-209] - Bug in site generation
+    [ANY23-221] - Enable JSON-LD as an input format for the WebService at any23.org
+    [ANY23-238] - Fix generation of BNode name for microdata when 'itemid' is given without a value.
+
+New Feature
+
+    [ANY23-7] - Performance test suite
+    [ANY23-160] - [SECURITY] Frame injection vulnerability in published Javadoc
+
+Task
+
+    [ANY23-222] - Push 1.1-SNAPSHOT artifacts to the Any23 website
+                           
+
+                           Apache Any23 1.0
+                             Release Notes
+                         09/05/2014 (dd/mm/yyyy)
+
+Sub-task
+
+    [ANY23-148] - Programmes Ontology
+
+Bug
+
+    [ANY23-100] - Issue with RDFa extractor while processing nested properties
+    [ANY23-135] - Any23 RDFa Extractor ignores multiple prefix and property statements
+    [ANY23-136] - Some RDFa tests have incorrect expected results
+    [ANY23-168] - RDFa properties in <meta> elements not picked up
+    [ANY23-170] - Dependency error org.apache.commons:commons-csv:1.0-SNAPSHOT-rev1148315
+    [ANY23-172] - Fix minor issues with Any23 0.9.0 RC
+    [ANY23-173] - Please delete old releases from mirroring system
+    [ANY23-174] - Incorrect RDFa extractions
+    [ANY23-203] - Update version revisions from 0.9.1 to 1.0
+
+Improvement
+
+    [ANY23-65] - Update to RDFa extraction stylesheet
+    [ANY23-128] - html-rdfa11 extractor fails on mailto: anchors
+    [ANY23-130] - Improve aesthetics of the output format when straying from default java.io.PrintStream
+    [ANY23-137] - RDFa parser implementation proposal
+    [ANY23-179] - Improve Javadoc and throwing of IllegalArgumentException in Any23#createDocumentSource
+    [ANY23-180] - Create an Apache hosted jail running an Any23 service instance
+    [ANY23-181] - Upgrade NekoHTML to 1.9.20
+
+New Feature
+
+    [ANY23-134] - Create o.a.a.extractor.tika Parser and Extractor implementations
+    [ANY23-177] - Add support for JSON-LD
+
+Task
+
+    [ANY23-162] - Add package.java for all LKIFCore classes
+
+                           Apache Any23 0.9.0
+                             Release Notes
+                         28/10/2013 (dd/mm/yyyy)
+
+Sub-task
+
+    [ANY23-142] - LKIF-Core Vocabulary
+    [ANY23-143] - LRICore Vocabulary
+
+Bug
+
+    [ANY23-111] - Any23 raises an unmanaged exception from the Microdata parser
+    [ANY23-115] - Empty spans seem to break ANY23
+    [ANY23-161] - Fix service file generation
+    [ANY23-165] - "Invalid content" error if TITLE precedes encoding declaration in the document
+    [ANY23-171] - form.html not in correct location in service.
+
+Improvement
+
+    [ANY23-47] - Migrate basic-crawler classes to org.apache.nutch
+    [ANY23-164] - office-scraper ExcelExtractorFactory.java to accept application/x-tika-ooxml and application/x-tika-msoffice formats
+
+New Feature
+
+    [ANY23-120] - Split CLI tools out into a new module
+
+Task
+
+    [ANY23-122] - Cleanup Distribution Mirrors
+
+                           Apache Any23 0.8.0
+                             Release Notes
+                         01/05/2013 (dd/mm/yyyy)
+                         
+Sub-task
+
+    [ANY23-109] - Missing tika-config.xml in o.a.a.mime
+    [ANY23-110] - DOAP Vocabulary
+
+Bug
+
+    [ANY23-44] - error when parsing a document from http://www.afdsi.org/docs/test/html/RDFa/_food-stream_.htm
+    [ANY23-78] - Download page links are broken
+    [ANY23-108] - Broken schema.org microdata extraction
+    [ANY23-112] - Fix incubation disclaimer
+    [ANY23-113] - Remove dependencies from parent pom.xml file
+    [ANY23-116] - Empty values are skipped when reading tab separated CSV.
+    [ANY23-156] - Add logging dependencies to plugins and service
+
+Improvement
+
+    [ANY23-2] - Add support for hreview-aggregate microformat.
+    [ANY23-26] - Upgrade dependency to Apache Tika 1.2
+    [ANY23-46] - Update Any23 web service
+    [ANY23-83] - Remove hardcoded formats throughout Any23 to make it useful as a library
+    [ANY23-101] - Use RDFFormat.NQUADS in nquads module
+    [ANY23-139] - Simplify site deploy plugging the maven-scm-publish-plugin
+    [ANY23-144] - Implement comprehensive naming of o.a.a.api.vocab classes
+
+New Feature
+
+    [ANY23-4] - Integrate W3C's RDFa test suite and pass all tests
+    [ANY23-85] - Split NQuads out into its own module
+    [ANY23-96] - Add user agent string to basic-crawler
+    [ANY23-117] - Split Mime type detection out into its own module
+    [ANY23-118] - Split Encoding detection out into its own module
+
+Task
+
+    [ANY23-41] - Write basic-crawler plugin documentation
+    [ANY23-125] - Drop the Incubating DISCLAIMER
+                         
+
+                             Apache Any23 0.7.0-incubating
+                              Release Notes
+                              25/06/2012
+
+Sub-task
+
+    [ANY23-25] - Update all Maven POM's in trunk
+    [ANY23-31] - Move any23 site documentation out of trunk and into its own SVN directory
+    [ANY23-53] - Bad Web Service documentation
+
+Bug
+
+    [ANY23-14] - Add support for Extractor sub results
+    [ANY23-20] - The Any23 PluginManager fails handing resource paths containing spaces.
+    [ANY23-34] - Plugin Integration Test Fails
+    [ANY23-37] - LGPL'ed components cannot be included in distribution packages
+    [ANY23-42] - Fix issue in RDFa11Parser.java is not resolving relative URIs correctly
+    [ANY23-49] - N3/NQ parsers ignoring stopAtFirstError flag
+    [ANY23-58] - HCardExtractor infinite loop and memory exhaustion
+    [ANY23-62] - ExtractionResultImpl loses all issues generated by sub extractions
+    [ANY23-73] - The ToolRunner CLI driver -p (--plugins-dir) option doesn't work because parsed after the Tool list loading
+    [ANY23-77] - Facing a infinite loop problem in version 0.6.1 - Verify
+    [ANY23-78] - Download page links are broken
+    [ANY23-87] - Bogus arguement in o.a.a.cli.CrawlerTest
+    [ANY23-88] - any23 script -v or --version option doesn't display actual version
+    [ANY23-94] - The Microdata CLI tool doesn't work anymore
+    [ANY23-95] - Activate the IgnoreAccidentalRDFa filter for the Any23 Service instance
+    [ANY23-97] - The test suite was not running all tests, minor regressions occurred
+
+Improvement
+
+    [ANY23-18] - Add a new extractor for RDFa using java-rdfa
+    [ANY23-28] - Document munging of Any23 history to CHANGES.txt
+    [ANY23-32] - replace hardcoded bash script with generated via appassembler
+    [ANY23-33] - Replace proprietary SUN imports from Any23 classes.
+    [ANY23-45] - Improve issue verification support in Extractor tests
+    [ANY23-50] - Simplify plugin loading avoiding the classpath scanning
+    [ANY23-56] - Change repo-ext to Any23 SVN mirrior repo.
+    [ANY23-63] - The Any23 web service doesn't return the Issue Report generated by activated Extractors, hiding major metadata issues
+    [ANY23-64] - Improve CLI uage aesthetics
+    [ANY23-70] - Establish searchable list archives
+    [ANY23-71] - improve the current CLI engine
+    [ANY23-74] - Disable domain triple generation in default configuration
+    [ANY23-75] - Improve runtime of the Microdata extractor on documents with many relations.
+    [ANY23-76] - Improve runtime of the Microformat extractor on documents with many relations.
+    [ANY23-82] - Don't use explicit reference to Log4j classes
+    [ANY23-86] - Better logging in SiteCrawlerTest
+
+New Feature
+
+    [ANY23-9] - Prepare a dedicated homepage for Any23
+    [ANY23-29] - Migrate code base to ASF infrastructure
+    [ANY23-57] - Create Any23 History documentation and add to site
+    [ANY23-59] - Create KEYS file for Any23
+    [ANY23-68] - Create Powered By documentation/page
+    [ANY23-102] - Any23 DOAP file
+
+Task
+
+    [ANY23-21] - Migrate all packages and classes to ORG.APACHE.ANY23
+    [ANY23-27] - Import revisions r1547 to r1607 from Google Code SVN to ASF SVN
+    [ANY23-36] - Merge GCode specific CHANGES.txt report in main changes.xml
+    [ANY23-39] - Write Down Overall Architecture Document to help new developers maintaining the Any23 core
+    [ANY23-48] - Update Documentation (Site + READMEs) to reflect changes in shell script usage
+    [ANY23-52] - Remove non ASF logos from Any23 Service page
+    [ANY23-66] - Fix Javadoc
+
+==========================================================================
+
+                             Apache Any23 0.6.1
+                              Release Notes
+
+Fixes
+
+ * Improved MIMEType detection for CSV input. [172, 176]
+
+==========================================================================
+
+                             Apache Any23 0.6.0
+                              Release Notes
+
+Fixes
+
+ * Fixed several bugs. [151, 153, 154, 155, 156, 164, 168]
+ * Removed unused Apache Any23 dependencies. [162]
+ * Introduced parent POM dependencyManagement. [163]
+ * Minor code refactoring. [142]
+ * Updated project documentation. [161]
+
+Enhancements
+
+ * Added support for Microdata [114, 141, 144, 145, 152, 157]
+ * Added RDFa 1.1 support for new prefix specification. [143]
+ * Added CSV Extractor (RDFizer). [150, 165]
+ * Added HTML/META Extractor. [148, 149]
+ * Improved Configuration programmatic management. [147]
+ * Added several flags to control metadata triples generation. [146]
+ * Improved nesting relationship explicitation in Microformat extractors. [80]
+ * Major Extractor interface refactoring. [160, 167]
+ * Improved TagSoup Extractor based error reporting. [159]
+ * Added command-line tool to print out the Apache Any23 declared vocabularies. [114]
+
+==========================================================================
+
+                              Apache Any23 0.6.0-M2
+                                Release Notes
+
+The release 0.6.0-M2 introduces major fixes on M1 milestone
+[154, 155, 156] and improves Configuration [147] and Microdata
+ error management[157].
+
+==========================================================================
+
+                             Apache Any23 0.6.0-M1
+                               Release Notes
+
+The release 0.6.0-M1 is an early preview of the
+Microdata support. [114]
+
+==========================================================================
+
+                             Apache Any23 0.5.0
+                              Release Notes
+
+Fixes
+
+ * Fixed wrong conversion of a generic XML file to RDF. [131]
+ * Fixed usage of 'base' tag when resolving relative URIs
+   in RDFa. [75]
+ * Fixed error parsing Turtle data. [87]
+ * Fixed issue with escaping in NQuads parser. [126]
+ * Fixed XML DTD validation attempt. [95]
+ * Fixed concurrent modification exception in
+   ExtractionContentBlocker filter. [86]
+ * Fixed mime type detection of direct input when source
+   contains blank chars. [83, 90]
+ * Fixed reporting when producing no triples. [79]
+ * Fixed any23-service packaging, added profile for excluding
+   embedded dependencies. [113]
+
+Enhancements
+
+ * Improved extraction report: added list of 
+   activated extractors. [89]
+ * Improved extraction of HTML link element. [133]
+ * Added XPath HTML extractor. [124]
+ * Added HRecipe Microformat extractor. [103]
+ * Added plugin support for Apache Any23. [111]
+ * Implemented HTML Scraper Plugin. [123]
+ * Upgraded to Sesame 2.4.0. [136]
+ * Upgraded to Jetty 8.0.0 [138]
+ * Upgraded maven-site-plugin. [85]
+ * Added flags to exclude metadata triples [134]
+ * Added removal of CSS related triples. [135]
+ * Improved overall documentation. [130]
+ * Overall POM refactoring. [125]
+
+==========================================================================
+
+                             Apache Any23 0.4.0 
+                              Release Notes
+
+* The any23-service module has been separated from the any23-core module,
+  the Ant build system has been dropped. [Issue 44]
+* Added support for HTML metadata (RDFa / Microformats) validation
+  and correction (validator). [Issue 77]
+* Added flag to disable the nesting relationship property 
+  enrichment. [Issue 67]
+* Improved coverage of Microformats tests. [Issue 65]
+* Improved documentation. [Issue 44]
+* Various code consolidation. [Issues 68, 69, 70, 71, 72, 73, 74, 77]
+
+==========================================================================
+
+	                         Apache Any23 0.3.0 
+                              Release Notes
+
+* Added detection and enrichment of nested microformats. [Issue #61]
+* Added detection and support of N-Quads as input and output format. [Issue #7]
+* General Improvements in RDFa extraction. [Issue #12, Issue #14]
+* Added support of Turtle embedded in HTML script tag. [Issue #62]
+* Improvement in encoding support. [Issue #43]
+* Improvement in Core API. [Issue #27]
+* Improved support for Species Microformat. [Issue #63]
+* General Code prettification.
+
+==========================================================================
+
+	                         Apache Any23 0.2.2 
+                              Release Notes
+
+* Fixed dependency management on Maven. A second level dependency of Xerces
+  introduced a conflict on the java.xml.transform API causing wrong XSLT 
+  transformations within RDFa extractor.
+
+==========================================================================
+
+	                         Apache Any23 0.2.1 
+                              Release Notes
+
+* Major applyFix on Tika configuration management. This applyFix solves the 
+  auto detection of the main Semantic Web related formats.
+
+==========================================================================
+
+                            Apache Any23 0.2
+                             Release Notes
+
+============
+Introduction
+============
+
+This release features a redesigned API and incorporating enhancements and
+bug fixes that have accumulated since the 0.1 release.
+Apart  from  some  new  or changed dependencies on the underlying libraries,
+this  version  comes  with an improved unit test coverage and other features
+like the automatic charset encoding detection and an improved documentation.
+Maven build system has been introduced.
+
+
+==================================
+Summary of major changes since 0.1
+==================================
+
+* Redesigned Java API
+    - Input from string, stream, file, or URI
+    - Allow choosing which extractors to use
+    - Report origin of triples (document/extractor) to client processors
+    - Various processors/serializers for extracted triples
+* Added flexible command-line tool for easy testing
+* Vastly improved website and documentation
+* Media type and encoding detection via Apache Tika
+* Switched RDF library from Jena to Sesame
+* Added Maven build
+* Better RDF extraction from Microformats
+* Extractors now come with an example file to document typical in- and output
+* Major refactoring
+* Lots and lots of bugfixes
+
+=================
+Supported formats
+=================
+
+* RDF/XML
+* Notation3 and Turtle
+* N-Triples
+* RDFa
+
+Various microformats, see http://sindice.com/developers/microformat on Sindice Microformats support.
+
+===================
+Dependency Upgrade
+===================
+
+CyberNeko Html parser has been upgraded to 1.9.14.
+
+Apache Tika 0.3 has been replaced with 0.6, with the
+new  support  for  the automatic encoding detection.
+
+EOF
+
diff --git a/basic-crawler/.classpath b/basic-crawler/.classpath
new file mode 100755
index 0000000..dcf5234
--- /dev/null
+++ b/basic-crawler/.classpath
@@ -0,0 +1,135 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+  <classpathentry kind="src" path="src/test/java" output="target/test-classes" including="**/*.java"/>
+  <classpathentry kind="src" path="src/test/resources" output="target/test-classes" excluding="**/*.java"/>
+  <classpathentry kind="src" path="src/main/java" including="**/*.java"/>
+  <classpathentry kind="src" path="src/main/resources" excluding="**/*.java"/>
+  <classpathentry kind="src" path="target/maven-shared-archive-resources" excluding="**/*.java"/>
+  <classpathentry kind="output" path="target/classes"/>
+  <classpathentry kind="var" path="M2_REPO/javax/xml/bind/jaxb-api/2.3.0/jaxb-api-2.3.0.jar" sourcepath="M2_REPO/javax/xml/bind/jaxb-api/2.3.0/jaxb-api-2.3.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/javax/annotation/javax.annotation-api/1.3.2/javax.annotation-api-1.3.2.jar" sourcepath="M2_REPO/javax/annotation/javax.annotation-api/1.3.2/javax.annotation-api-1.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/javax/inject/javax.inject/1/javax.inject-1.jar" sourcepath="M2_REPO/javax/inject/javax.inject/1/javax.inject-1-sources.jar"/>
+  <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-model/3.0.0/rdf4j-model-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-model/3.0.0/rdf4j-model-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-util/3.0.0/rdf4j-util-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-util/3.0.0/rdf4j-util-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/slf4j/slf4j-api/1.7.28/slf4j-api-1.7.28.jar" sourcepath="M2_REPO/org/slf4j/slf4j-api/1.7.28/slf4j-api-1.7.28-sources.jar"/>
+  <classpathentry kind="src" path="/apache-any23-core"/>
+  <classpathentry kind="src" path="/apache-any23-api"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-api/3.0.0/rdf4j-rio-api-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-api/3.0.0/rdf4j-rio-api-3.0.0-sources.jar"/>
+  <classpathentry kind="src" path="/apache-any23-csvutils"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/commons/commons-csv/1.6/commons-csv-1.6.jar" sourcepath="M2_REPO/org/apache/commons/commons-csv/1.6/commons-csv-1.6-sources.jar"/>
+  <classpathentry kind="src" path="/apache-any23-mime"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-turtle/3.0.0/rdf4j-rio-turtle-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-turtle/3.0.0/rdf4j-rio-turtle-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-datatypes/3.0.0/rdf4j-rio-datatypes-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-datatypes/3.0.0/rdf4j-rio-datatypes-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-languages/3.0.0/rdf4j-rio-languages-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-languages/3.0.0/rdf4j-rio-languages-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/commons-io/commons-io/2.6/commons-io-2.6.jar" sourcepath="M2_REPO/commons-io/commons-io/2.6/commons-io-2.6-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-ntriples/3.0.0/rdf4j-rio-ntriples-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-ntriples/3.0.0/rdf4j-rio-ntriples-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-n3/3.0.0/rdf4j-rio-n3-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-n3/3.0.0/rdf4j-rio-n3-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-nquads/3.0.0/rdf4j-rio-nquads-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-nquads/3.0.0/rdf4j-rio-nquads-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/tika/tika-core/1.22/tika-core-1.22.jar" sourcepath="M2_REPO/org/apache/tika/tika-core/1.22/tika-core-1.22-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/tika/tika-parsers/1.22/tika-parsers-1.22.jar" sourcepath="M2_REPO/org/apache/tika/tika-parsers/1.22/tika-parsers-1.22-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/glassfish/jaxb/jaxb-runtime/2.3.2/jaxb-runtime-2.3.2.jar" sourcepath="M2_REPO/org/glassfish/jaxb/jaxb-runtime/2.3.2/jaxb-runtime-2.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/jakarta/xml/bind/jakarta.xml.bind-api/2.3.2/jakarta.xml.bind-api-2.3.2.jar" sourcepath="M2_REPO/jakarta/xml/bind/jakarta.xml.bind-api/2.3.2/jakarta.xml.bind-api-2.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/glassfish/jaxb/txw2/2.3.2/txw2-2.3.2.jar" sourcepath="M2_REPO/org/glassfish/jaxb/txw2/2.3.2/txw2-2.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/sun/istack/istack-commons-runtime/3.0.8/istack-commons-runtime-3.0.8.jar" sourcepath="M2_REPO/com/sun/istack/istack-commons-runtime/3.0.8/istack-commons-runtime-3.0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/jvnet/staxex/stax-ex/1.8.1/stax-ex-1.8.1.jar" sourcepath="M2_REPO/org/jvnet/staxex/stax-ex/1.8.1/stax-ex-1.8.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/sun/xml/fastinfoset/FastInfoset/1.2.16/FastInfoset-1.2.16.jar" sourcepath="M2_REPO/com/sun/xml/fastinfoset/FastInfoset/1.2.16/FastInfoset-1.2.16-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/sun/activation/jakarta.activation/1.2.1/jakarta.activation-1.2.1.jar" sourcepath="M2_REPO/com/sun/activation/jakarta.activation/1.2.1/jakarta.activation-1.2.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/xerces/xercesImpl/2.12.0/xercesImpl-2.12.0.jar" sourcepath="M2_REPO/xerces/xercesImpl/2.12.0/xercesImpl-2.12.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/xml-apis/xml-apis/1.4.01/xml-apis-1.4.01.jar" sourcepath="M2_REPO/xml-apis/xml-apis/1.4.01/xml-apis-1.4.01-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/commons/commons-lang3/3.9/commons-lang3-3.9.jar" sourcepath="M2_REPO/org/apache/commons/commons-lang3/3.9/commons-lang3-3.9-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/james/apache-mime4j-core/0.8.3/apache-mime4j-core-0.8.3.jar" sourcepath="M2_REPO/org/apache/james/apache-mime4j-core/0.8.3/apache-mime4j-core-0.8.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/james/apache-mime4j-dom/0.8.3/apache-mime4j-dom-0.8.3.jar" sourcepath="M2_REPO/org/apache/james/apache-mime4j-dom/0.8.3/apache-mime4j-dom-0.8.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/commons/commons-compress/1.18/commons-compress-1.18.jar" sourcepath="M2_REPO/org/apache/commons/commons-compress/1.18/commons-compress-1.18-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/epam/parso/2.0.11/parso-2.0.11.jar" sourcepath="M2_REPO/com/epam/parso/2.0.11/parso-2.0.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/brotli/dec/0.1.2/dec-0.1.2.jar" sourcepath="M2_REPO/org/brotli/dec/0.1.2/dec-0.1.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/commons-codec/commons-codec/1.11/commons-codec-1.11.jar" sourcepath="M2_REPO/commons-codec/commons-codec/1.11/commons-codec-1.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/poi/poi/4.1.0/poi-4.1.0.jar" sourcepath="M2_REPO/org/apache/poi/poi/4.1.0/poi-4.1.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/commons/commons-collections4/4.3/commons-collections4-4.3.jar" sourcepath="M2_REPO/org/apache/commons/commons-collections4/4.3/commons-collections4-4.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/commons/commons-math3/3.6.1/commons-math3-3.6.1.jar" sourcepath="M2_REPO/org/apache/commons/commons-math3/3.6.1/commons-math3-3.6.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/poi/poi-scratchpad/4.1.0/poi-scratchpad-4.1.0.jar" sourcepath="M2_REPO/org/apache/poi/poi-scratchpad/4.1.0/poi-scratchpad-4.1.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/poi/poi-ooxml/4.1.0/poi-ooxml-4.1.0.jar" sourcepath="M2_REPO/org/apache/poi/poi-ooxml/4.1.0/poi-ooxml-4.1.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/poi/poi-ooxml-schemas/4.1.0/poi-ooxml-schemas-4.1.0.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/xmlbeans/xmlbeans/3.1.0/xmlbeans-3.1.0.jar" sourcepath="M2_REPO/org/apache/xmlbeans/xmlbeans/3.1.0/xmlbeans-3.1.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/github/virtuald/curvesapi/1.06/curvesapi-1.06.jar" sourcepath="M2_REPO/com/github/virtuald/curvesapi/1.06/curvesapi-1.06-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/ccil/cowan/tagsoup/tagsoup/1.2.1/tagsoup-1.2.1.jar" sourcepath="M2_REPO/org/ccil/cowan/tagsoup/tagsoup/1.2.1/tagsoup-1.2.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/jdom/jdom2/2.0.6/jdom2-2.0.6.jar" sourcepath="M2_REPO/org/jdom/jdom2/2.0.6/jdom2-2.0.6-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/github/openjson/openjson/1.0.11/openjson-1.0.11.jar" sourcepath="M2_REPO/com/github/openjson/openjson/1.0.11/openjson-1.0.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/slf4j/jul-to-slf4j/1.7.28/jul-to-slf4j-1.7.28.jar" sourcepath="M2_REPO/org/slf4j/jul-to-slf4j/1.7.28/jul-to-slf4j-1.7.28-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/slf4j/jcl-over-slf4j/1.7.28/jcl-over-slf4j-1.7.28.jar" sourcepath="M2_REPO/org/slf4j/jcl-over-slf4j/1.7.28/jcl-over-slf4j-1.7.28-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/httpcomponents/httpclient/4.5.10/httpclient-4.5.10.jar" sourcepath="M2_REPO/org/apache/httpcomponents/httpclient/4.5.10/httpclient-4.5.10-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/httpcomponents/httpcore/4.4.12/httpcore-4.4.12.jar" sourcepath="M2_REPO/org/apache/httpcomponents/httpcore/4.4.12/httpcore-4.4.12-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/httpcomponents/httpmime/4.5.10/httpmime-4.5.10.jar" sourcepath="M2_REPO/org/apache/httpcomponents/httpmime/4.5.10/httpmime-4.5.10-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/google/protobuf/protobuf-java/3.9.0/protobuf-java-3.9.0.jar" sourcepath="M2_REPO/com/google/protobuf/protobuf-java/3.9.0/protobuf-java-3.9.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/beust/jcommander/1.72/jcommander-1.72.jar" sourcepath="M2_REPO/com/beust/jcommander/1.72/jcommander-1.72-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/google/guava/guava/22.0/guava-22.0.jar" sourcepath="M2_REPO/com/google/guava/guava/22.0/guava-22.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/google/code/findbugs/jsr305/3.0.2/jsr305-3.0.2.jar" sourcepath="M2_REPO/com/google/code/findbugs/jsr305/3.0.2/jsr305-3.0.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/google/errorprone/error_prone_annotations/2.1.3/error_prone_annotations-2.1.3.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/google/j2objc/j2objc-annotations/1.1/j2objc-annotations-1.1.jar" sourcepath="M2_REPO/com/google/j2objc/j2objc-annotations/1.1/j2objc-annotations-1.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/codehaus/mojo/animal-sniffer-annotations/1.14/animal-sniffer-annotations-1.14.jar" sourcepath="M2_REPO/org/codehaus/mojo/animal-sniffer-annotations/1.14/animal-sniffer-annotations-1.14-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/java/dev/jna/jna/5.3.1/jna-5.3.1.jar" sourcepath="M2_REPO/net/java/dev/jna/jna/5.3.1/jna-5.3.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/jsoup/jsoup/1.12.1/jsoup-1.12.1.jar" sourcepath="M2_REPO/org/jsoup/jsoup/1.12.1/jsoup-1.12.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/mchange/c3p0/0.9.5.4/c3p0-0.9.5.4.jar" sourcepath="M2_REPO/com/mchange/c3p0/0.9.5.4/c3p0-0.9.5.4-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/mchange/mchange-commons-java/0.2.15/mchange-commons-java-0.2.15.jar" sourcepath="M2_REPO/com/mchange/mchange-commons-java/0.2.15/mchange-commons-java-0.2.15-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/fasterxml/jackson/core/jackson-core/2.9.10/jackson-core-2.9.10.jar" sourcepath="M2_REPO/com/fasterxml/jackson/core/jackson-core/2.9.10/jackson-core-2.9.10-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/fasterxml/jackson/core/jackson-databind/2.9.10/jackson-databind-2.9.10.jar" sourcepath="M2_REPO/com/fasterxml/jackson/core/jackson-databind/2.9.10/jackson-databind-2.9.10-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/fasterxml/jackson/core/jackson-annotations/2.9.10/jackson-annotations-2.9.10.jar" sourcepath="M2_REPO/com/fasterxml/jackson/core/jackson-annotations/2.9.10/jackson-annotations-2.9.10-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/pdfbox/jbig2-imageio/3.0.2/jbig2-imageio-3.0.2.jar" sourcepath="M2_REPO/org/apache/pdfbox/jbig2-imageio/3.0.2/jbig2-imageio-3.0.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/github/jai-imageio/jai-imageio-core/1.4.0/jai-imageio-core-1.4.0.jar" sourcepath="M2_REPO/com/github/jai-imageio/jai-imageio-core/1.4.0/jai-imageio-core-1.4.0-sources.jar"/>
+  <classpathentry kind="src" path="/apache-any23-encoding"/>
+  <classpathentry kind="var" path="M2_REPO/org/rypt/f8/1.1/f8-1.1.jar" sourcepath="M2_REPO/org/rypt/f8/1.1/f8-1.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/httpcomponents/httpclient-cache/4.5.10/httpclient-cache-4.5.10.jar" sourcepath="M2_REPO/org/apache/httpcomponents/httpclient-cache/4.5.10/httpclient-cache-4.5.10-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sf/biweekly/biweekly/0.6.3/biweekly-0.6.3.jar" sourcepath="M2_REPO/net/sf/biweekly/biweekly/0.6.3/biweekly-0.6.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/github/mangstadt/vinnie/2.0.2/vinnie-2.0.2.jar" sourcepath="M2_REPO/com/github/mangstadt/vinnie/2.0.2/vinnie-2.0.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-binary/3.0.0/rdf4j-rio-binary-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-binary/3.0.0/rdf4j-rio-binary-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-rdfjson/3.0.0/rdf4j-rio-rdfjson-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-rdfjson/3.0.0/rdf4j-rio-rdfjson-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-rdfxml/3.0.0/rdf4j-rio-rdfxml-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-rdfxml/3.0.0/rdf4j-rio-rdfxml-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-trix/3.0.0/rdf4j-rio-trix-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-trix/3.0.0/rdf4j-rio-trix-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-trig/3.0.0/rdf4j-rio-trig-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-trig/3.0.0/rdf4j-rio-trig-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-jsonld/3.0.0/rdf4j-rio-jsonld-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-jsonld/3.0.0/rdf4j-rio-jsonld-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/github/jsonld-java/jsonld-java/0.12.5/jsonld-java-0.12.5.jar" sourcepath="M2_REPO/com/github/jsonld-java/jsonld-java/0.12.5/jsonld-java-0.12.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-repository-sail/3.0.0/rdf4j-repository-sail-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-repository-sail/3.0.0/rdf4j-repository-sail-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-repository-api/3.0.0/rdf4j-repository-api-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-repository-api/3.0.0/rdf4j-repository-api-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-query/3.0.0/rdf4j-query-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-query/3.0.0/rdf4j-query-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-sail-api/3.0.0/rdf4j-sail-api-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-sail-api/3.0.0/rdf4j-sail-api-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-queryalgebra-model/3.0.0/rdf4j-queryalgebra-model-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-queryalgebra-model/3.0.0/rdf4j-queryalgebra-model-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-http-client/3.0.0/rdf4j-http-client-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-http-client/3.0.0/rdf4j-http-client-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-http-protocol/3.0.0/rdf4j-http-protocol-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-http-protocol/3.0.0/rdf4j-http-protocol-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-queryresultio-api/3.0.0/rdf4j-queryresultio-api-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-queryresultio-api/3.0.0/rdf4j-queryresultio-api-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-queryresultio-binary/3.0.0/rdf4j-queryresultio-binary-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-queryresultio-binary/3.0.0/rdf4j-queryresultio-binary-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-queryparser-api/3.0.0/rdf4j-queryparser-api-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-queryparser-api/3.0.0/rdf4j-queryparser-api-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-queryalgebra-evaluation/3.0.0/rdf4j-queryalgebra-evaluation-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-queryalgebra-evaluation/3.0.0/rdf4j-queryalgebra-evaluation-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-repository-sparql/3.0.0/rdf4j-repository-sparql-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-repository-sparql/3.0.0/rdf4j-repository-sparql-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-queryparser-sparql/3.0.0/rdf4j-queryparser-sparql-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-queryparser-sparql/3.0.0/rdf4j-queryparser-sparql-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-queryresultio-sparqlxml/3.0.0/rdf4j-queryresultio-sparqlxml-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-queryresultio-sparqlxml/3.0.0/rdf4j-queryresultio-sparqlxml-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/mapdb/mapdb/1.0.8/mapdb-1.0.8.jar" sourcepath="M2_REPO/org/mapdb/mapdb/1.0.8/mapdb-1.0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-sail-memory/3.0.0/rdf4j-sail-memory-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-sail-memory/3.0.0/rdf4j-sail-memory-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-sail-base/3.0.0/rdf4j-sail-base-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-sail-base/3.0.0/rdf4j-sail-base-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/semarglproject/semargl-rdf4j/0.7/semargl-rdf4j-0.7.jar" sourcepath="M2_REPO/org/semarglproject/semargl-rdf4j/0.7/semargl-rdf4j-0.7-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/semarglproject/semargl-core/0.7/semargl-core-0.7.jar" sourcepath="M2_REPO/org/semarglproject/semargl-core/0.7/semargl-core-0.7-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/semarglproject/semargl-rdfa/0.7/semargl-rdfa-0.7.jar" sourcepath="M2_REPO/org/semarglproject/semargl-rdfa/0.7/semargl-rdfa-0.7-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/semarglproject/semargl-rdf/0.7/semargl-rdf-0.7.jar" sourcepath="M2_REPO/org/semarglproject/semargl-rdf/0.7/semargl-rdf-0.7-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sourceforge/owlapi/owlapi-rio/5.1.11/owlapi-rio-5.1.11.jar" sourcepath="M2_REPO/net/sourceforge/owlapi/owlapi-rio/5.1.11/owlapi-rio-5.1.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sourceforge/owlapi/owlapi-parsers/5.1.11/owlapi-parsers-5.1.11.jar" sourcepath="M2_REPO/net/sourceforge/owlapi/owlapi-parsers/5.1.11/owlapi-parsers-5.1.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sourceforge/owlapi/owlapi-api/5.1.11/owlapi-api-5.1.11.jar" sourcepath="M2_REPO/net/sourceforge/owlapi/owlapi-api/5.1.11/owlapi-api-5.1.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/commons/commons-rdf-api/0.5.0/commons-rdf-api-0.5.0.jar" sourcepath="M2_REPO/org/apache/commons/commons-rdf-api/0.5.0/commons-rdf-api-0.5.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/github/vsonnier/hppcrt/0.7.5/hppcrt-0.7.5.jar" sourcepath="M2_REPO/com/github/vsonnier/hppcrt/0.7.5/hppcrt-0.7.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/github/ben-manes/caffeine/caffeine/2.6.1/caffeine-2.6.1.jar" sourcepath="M2_REPO/com/github/ben-manes/caffeine/caffeine/2.6.1/caffeine-2.6.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sourceforge/owlapi/owlapi-apibinding/5.1.11/owlapi-apibinding-5.1.11.jar" sourcepath="M2_REPO/net/sourceforge/owlapi/owlapi-apibinding/5.1.11/owlapi-apibinding-5.1.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sourceforge/owlapi/owlapi-impl/5.1.11/owlapi-impl-5.1.11.jar" sourcepath="M2_REPO/net/sourceforge/owlapi/owlapi-impl/5.1.11/owlapi-impl-5.1.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sourceforge/owlapi/owlapi-oboformat/5.1.11/owlapi-oboformat-5.1.11.jar" sourcepath="M2_REPO/net/sourceforge/owlapi/owlapi-oboformat/5.1.11/owlapi-oboformat-5.1.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sourceforge/owlapi/owlapi-tools/5.1.11/owlapi-tools-5.1.11.jar" sourcepath="M2_REPO/net/sourceforge/owlapi/owlapi-tools/5.1.11/owlapi-tools-5.1.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/yaml/snakeyaml/1.23/snakeyaml-1.23.jar" sourcepath="M2_REPO/org/yaml/snakeyaml/1.23/snakeyaml-1.23-sources.jar"/>
+  <classpathentry kind="src" path="/apache-any23-cli"/>
+  <classpathentry kind="src" path="/apache-any23-office-scraper"/>
+  <classpathentry kind="src" path="/apache-any23-html-scraper"/>
+  <classpathentry kind="var" path="M2_REPO/net/sourceforge/nekohtml/nekohtml/1.9.22/nekohtml-1.9.22.jar" sourcepath="M2_REPO/net/sourceforge/nekohtml/nekohtml/1.9.22/nekohtml-1.9.22-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/edu/uci/ics/crawler4j/4.4.0/crawler4j-4.4.0.jar" sourcepath="M2_REPO/edu/uci/ics/crawler4j/4.4.0/crawler4j-4.4.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/checkerframework/checker-compat-qual/2.0.0/checker-compat-qual-2.0.0.jar"/>
+  <classpathentry kind="var" path="M2_REPO/commons-logging/commons-logging/1.2/commons-logging-1.2.jar" sourcepath="M2_REPO/commons-logging/commons-logging/1.2/commons-logging-1.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/sleepycat/je/5.0.84/je-5.0.84.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/slf4j/slf4j-log4j12/1.7.28/slf4j-log4j12-1.7.28.jar" sourcepath="M2_REPO/org/slf4j/slf4j-log4j12/1.7.28/slf4j-log4j12-1.7.28-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/log4j/log4j/1.2.17/log4j-1.2.17.jar" sourcepath="M2_REPO/log4j/log4j/1.2.17/log4j-1.2.17-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/junit/junit/4.12/junit-4.12.jar" sourcepath="M2_REPO/junit/junit/4.12/junit-4.12-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/hamcrest/hamcrest-core/1.3/hamcrest-core-1.3.jar" sourcepath="M2_REPO/org/hamcrest/hamcrest-core/1.3/hamcrest-core-1.3-sources.jar"/>
+</classpath>
\ No newline at end of file
diff --git a/basic-crawler/.project b/basic-crawler/.project
new file mode 100755
index 0000000..3b77bbc
--- /dev/null
+++ b/basic-crawler/.project
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+  <name>apache-any23-basic-crawler</name>
+  <comment>Any23 plugin for crawling sites. NO_M2ECLIPSE_SUPPORT: Project files created with the maven-eclipse-plugin are not supported in M2Eclipse.</comment>
+  <projects>
+    <project>apache-any23-core</project>
+    <project>apache-any23-api</project>
+    <project>apache-any23-csvutils</project>
+    <project>apache-any23-mime</project>
+    <project>apache-any23-encoding</project>
+    <project>apache-any23-cli</project>
+    <project>apache-any23-office-scraper</project>
+    <project>apache-any23-html-scraper</project>
+  </projects>
+  <buildSpec>
+    <buildCommand>
+      <name>org.eclipse.jdt.core.javabuilder</name>
+    </buildCommand>
+    <buildCommand>
+      <name>org.eclipse.m2e.core.maven2Builder</name>
+    </buildCommand>
+  </buildSpec>
+  <natures>
+    <nature>org.eclipse.jdt.core.javanature</nature>
+    <nature>org.eclipse.m2e.core.maven2Nature</nature>
+  </natures>
+</projectDescription>
\ No newline at end of file
diff --git a/basic-crawler/.settings/org.eclipse.core.resources.prefs b/basic-crawler/.settings/org.eclipse.core.resources.prefs
new file mode 100755
index 0000000..29abf99
--- /dev/null
+++ b/basic-crawler/.settings/org.eclipse.core.resources.prefs
@@ -0,0 +1,6 @@
+eclipse.preferences.version=1
+encoding//src/main/java=UTF-8
+encoding//src/main/resources=UTF-8
+encoding//src/test/java=UTF-8
+encoding//src/test/resources=UTF-8
+encoding/<project>=UTF-8
diff --git a/basic-crawler/.settings/org.eclipse.jdt.core.prefs b/basic-crawler/.settings/org.eclipse.jdt.core.prefs
new file mode 100755
index 0000000..b8947ec
--- /dev/null
+++ b/basic-crawler/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,6 @@
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
+org.eclipse.jdt.core.compiler.compliance=1.8
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
+org.eclipse.jdt.core.compiler.release=disabled
+org.eclipse.jdt.core.compiler.source=1.8
diff --git a/basic-crawler/.settings/org.eclipse.m2e.core.prefs b/basic-crawler/.settings/org.eclipse.m2e.core.prefs
new file mode 100755
index 0000000..f897a7f
--- /dev/null
+++ b/basic-crawler/.settings/org.eclipse.m2e.core.prefs
@@ -0,0 +1,4 @@
+activeProfiles=
+eclipse.preferences.version=1
+resolveWorkspaceProjects=true
+version=1
diff --git a/basic-crawler/pom.xml b/basic-crawler/pom.xml
new file mode 100644
index 0000000..685a1e7
--- /dev/null
+++ b/basic-crawler/pom.xml
@@ -0,0 +1,206 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.any23</groupId>
+    <artifactId>apache-any23</artifactId>
+    <version>2.4-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.any23.plugins</groupId>
+  <artifactId>apache-any23-basic-crawler</artifactId>
+
+  <name>Apache Any23 :: Plugins :: Basic Crawler</name>
+  <description>Any23 plugin for crawling sites.</description>
+
+  <dependencies>
+    <!-- RDF4J -->
+    <dependency>
+      <groupId>org.eclipse.rdf4j</groupId>
+      <artifactId>rdf4j-model</artifactId>
+      <scope>provided</scope>
+    </dependency>
+
+    <!-- Any23 Core. -->
+    <dependency>
+      <groupId>org.apache.any23</groupId>
+      <artifactId>apache-any23-core</artifactId>
+      <version>2.4-SNAPSHOT</version>
+      <scope>provided</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.any23</groupId>
+      <artifactId>apache-any23-core</artifactId>
+      <version>2.4-SNAPSHOT</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.any23</groupId>
+      <artifactId>apache-any23-cli</artifactId>
+      <version>2.4-SNAPSHOT</version>
+      <scope>provided</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.any23</groupId>
+      <artifactId>apache-any23-cli</artifactId>
+      <version>2.4-SNAPSHOT</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    
+    <!-- Crawler4j -->
+    <dependency>
+      <groupId>edu.uci.ics</groupId>
+      <artifactId>crawler4j</artifactId>
+      <version>4.4.0</version>
+      <type>jar</type>
+      <scope>compile</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>ch.qos.logback</groupId>
+          <artifactId>logback-classic</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>ch.qos.logback</groupId>
+          <artifactId>logback-core</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <!-- The CLI interfaces -->
+    <dependency>
+      <groupId>com.beust</groupId>
+      <artifactId>jcommander</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    
+    <!-- Logging -->
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
+      <scope>provided</scope>
+    </dependency>
+
+    <!-- BEGIN: Test Dependencies -->
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <!-- END: Test Dependencies -->
+
+  </dependencies>
+
+  <build>
+
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <configuration>
+          <!-- Solve concurrency issues in Crawler4j internal status. -->
+          <forkCount>1</forkCount>
+          <reuseForks>false</reuseForks>
+        </configuration>
+      </plugin>
+
+      <!-- Generates the distribution package -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-assembly-plugin</artifactId>
+        <configuration>
+          <appendAssemblyId>false</appendAssemblyId>
+          <descriptors>
+            <descriptor>${basedir}/src/main/assembly/bin.xml</descriptor>
+          </descriptors>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jar-plugin</artifactId>
+        <executions>
+          <execution>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <version>3.0.0</version>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+            <configuration>
+              <transformers>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+                  <manifestEntries>
+                    <Main-Class>org.apache.any23.cli.Crawler</Main-Class>
+                    <Build-Number>${implementation.build}</Build-Number>
+                  </manifestEntries>
+                </transformer>
+              </transformers>
+              <filters>
+                <filter>
+                  <artifact>*:*</artifact>
+                  <excludes>
+                    <exclude>META-INF/*.SF</exclude>
+                    <exclude>META-INF/*.DSA</exclude>
+                    <exclude>META-INF/*.RSA</exclude>
+                  </excludes>
+                </filter>
+              </filters>
+              <finalName>${project.artifactId}-uber-${project.version}</finalName>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+
+  <profiles>
+    <profile>
+      <id>release</id>
+      <build>
+        <resources>
+          <resource>
+            <directory>${basedir}/../../</directory>
+            <targetPath>${project.build.directory}/apidocs/META-INF</targetPath>
+            <includes>
+              <include>LICENSE.txt</include>
+              <include>NOTICE.txt</include>
+            </includes>
+          </resource>
+        </resources>
+      </build>
+    </profile>
+  </profiles>
+
+</project>
diff --git a/basic-crawler/src/main/assembly/LICENSE-with-deps.txt b/basic-crawler/src/main/assembly/LICENSE-with-deps.txt
new file mode 100644
index 0000000..55ffc5a
--- /dev/null
+++ b/basic-crawler/src/main/assembly/LICENSE-with-deps.txt
@@ -0,0 +1,251 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+APACHE ANY23 DEPENDENCIES:
+
+The Apache Any23 distribution packages include a number of dependencies with
+separate copyright notices and license terms. Your use of the source
+code for the these dependencies is subject to the terms and
+conditions of the following licenses.
+
+For the Apache HttpClient component (http://hc.apache.org/httpcomponents-client-ga/)
+This is licensed under the The Apache Software License, Version 2.0, see above
+
+For the crawler4j component (http://code.google.com/p/crawler4j/)
+This is licensed under the The Apache Software License, Version 2.0, see above
+
+For the sleepycat component (http://www.oracle.com/database/berkeley-db/index.html)
+Copyright (c) 2002, 2011 Oracle and/or its affiliates.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+3. Redistributions in any form must be accompanied by information on
+   how to obtain complete source code for the Oracle Berkeley DB
+   Java Edition software and any accompanying software that uses the
+   Oracle Berkeley DB Java Edition software.  The source code must
+   either be included in the distribution or be available for no
+   more than the cost of distribution plus a nominal fee, and must be
+   freely redistributable under reasonable conditions.  For an
+   executable file, complete source code means the source code for all
+   modules it contains.  It does not include source code for modules or
+   files that typically accompany the major components of the operating
+   system on which the executable file runs.
+
+THIS SOFTWARE IS PROVIDED BY ORACLE ``AS IS'' AND ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR
+NON-INFRINGEMENT, ARE DISCLAIMED.  IN NO EVENT SHALL ORACLE BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/basic-crawler/src/main/assembly/NOTICE-with-deps.txt b/basic-crawler/src/main/assembly/NOTICE-with-deps.txt
new file mode 100644
index 0000000..d28669a
--- /dev/null
+++ b/basic-crawler/src/main/assembly/NOTICE-with-deps.txt
@@ -0,0 +1,12 @@
+Apache Any23
+Copyright 2011-2017 The Apache Software Foundation
+Copyright 2008-2011 Digital Enterprise Research Institute (DERI)
+
+This product includes software developed by
+The Apache Software Foundation (http://www.apache.org/).
+
+This product includes software developed by
+UniMI (http://www.unimi.it/)
+
+This product includes software developed by
+Yasser Ganjisaffar (http://code.google.com/p/crawler4j/)
diff --git a/basic-crawler/src/main/assembly/README.txt b/basic-crawler/src/main/assembly/README.txt
new file mode 100644
index 0000000..0b3bf28
--- /dev/null
+++ b/basic-crawler/src/main/assembly/README.txt
@@ -0,0 +1,77 @@
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+
+${project.name} (${implementation.build}; ${maven.build.timestamp})
+
+  What is it?
+  -----------
+
+  ${project.description}
+
+  Documentation
+  -------------
+
+  The most up-to-date documentation can be found at ${project.parent.url}.
+
+  Release Notes
+  -------------
+
+  The full list of changes can be found at ${project.parent.url}/changes-report.html.
+
+  System Requirements
+  -------------------
+
+  JDK:
+    ${javac.target.version} or above. (see http://www.oracle.com/technetwork/java/)
+  Memory:
+    No minimum requirement.
+  Disk:
+    No minimum requirement.
+  Operating System:
+    No minimum requirement. On Windows, Windows NT and above or Cygwin is required for
+    the startup scripts. Tested on Windows XP, Fedora Core and Mac OS X.
+
+  Installing Apache Any23
+  ----------------
+
+** Windows 2000/XP
+
+  1) Unzip the distribution archive, i.e. apache-${project.build.finalName}-bin.zip
+        The subdirectory apache-${project.build.finalName} will be created from the archive.
+
+  2) Copy the jar files under C:\Documents and Settings\<username>\.any23\plugins
+
+** Unix-based Operating Systems (Linux, Solaris and Mac OS X)
+
+  1) Extract the distribution archive, i.e. apache-${project.build.finalName}-bin.tar.gz.
+        The subdirectory apache-${project.build.finalName} will be created from the archive.
+
+  2) Copy the jar files under ~/.any23/plugins
+
+  Licensing
+  ---------
+
+  Please see the file called LICENSE.TXT
+
+  Apache Any23 URLS
+  ----------
+
+  Home Page:          ${project.parent.url}/
+  Downloads:          ${project.parent.url}/download.html
+  Release Notes:      ${project.parent.url}/changes-report.html
+  Mailing Lists:      ${project.parent.url}/mail-lists.html
+  Source Code:        ${project.parent.scm.url}
+  Issue Tracking:     ${project.issueManagement.url}
+  Available Plugins:  ${project.parent.url}/plugins.html
diff --git a/basic-crawler/src/main/assembly/bin.xml b/basic-crawler/src/main/assembly/bin.xml
new file mode 100644
index 0000000..6617afb
--- /dev/null
+++ b/basic-crawler/src/main/assembly/bin.xml
@@ -0,0 +1,69 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.1 http://maven.apache.org/xsd/assembly-1.1.1.xsd">
+
+  <id>bin</id>
+  <formats>
+    <format>tar.gz</format>
+    <format>zip</format>
+  </formats>
+  <includeBaseDirectory>true</includeBaseDirectory>
+  <baseDirectory>${project.build.finalName}</baseDirectory>
+
+  <!-- Use the N&L files which apply to the included dependencies -->
+  <files>
+    <file>
+      <source>${basedir}/src/main/assembly/LICENSE-with-deps.txt</source>
+      <destName>LICENSE.txt</destName>
+      <outputDirectory/>
+      <fileMode>666</fileMode>
+    </file>
+    <file>
+      <source>${basedir}/src/main/assembly/NOTICE-with-deps.txt</source>
+      <destName>NOTICE.txt</destName>
+      <outputDirectory/>
+      <fileMode>666</fileMode>
+    </file>
+    <file>
+      <source>${basedir}/src/main/assembly/README.txt</source>
+      <filtered>true</filtered>
+      <outputDirectory/>
+      <fileMode>666</fileMode>
+    </file>
+    <file>
+      <source>${basedir}/../RELEASE-NOTES.txt</source>
+      <outputDirectory/>
+      <fileMode>666</fileMode>
+    </file>
+  </files>
+
+  <dependencySets>
+    <dependencySet>
+      <useProjectArtifact>true</useProjectArtifact>
+      <outputDirectory>/lib</outputDirectory>
+      <includes>
+        <include>${project.groupId}:${project.artifactId}</include>
+        <include>edu.uci.ics:crawler4j</include>
+        <include>org.apache.httpcomponents:*</include>
+        <include>com.sleepycat:*</include>
+      </includes>
+    </dependencySet>
+  </dependencySets>
+
+</assembly>
diff --git a/basic-crawler/src/main/java/org/apache/any23/cli/Crawler.java b/basic-crawler/src/main/java/org/apache/any23/cli/Crawler.java
new file mode 100644
index 0000000..66b167b
--- /dev/null
+++ b/basic-crawler/src/main/java/org/apache/any23/cli/Crawler.java
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.cli;
+
+import com.beust.jcommander.IStringConverter;
+import com.beust.jcommander.Parameter;
+import com.beust.jcommander.ParameterException;
+import com.beust.jcommander.Parameters;
+import com.beust.jcommander.converters.FileConverter;
+import edu.uci.ics.crawler4j.crawler.Page;
+import edu.uci.ics.crawler4j.parser.HtmlParseData;
+import edu.uci.ics.crawler4j.parser.ParseData;
+import org.apache.any23.plugin.crawler.CrawlerListener;
+import org.apache.any23.plugin.crawler.SiteCrawler;
+import org.apache.any23.source.StringDocumentSource;
+
+import java.io.File;
+import java.net.URL;
+import java.util.UUID;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
+import static java.lang.String.format;
+
+/**
+ * Implementation of a <b>CLI crawler</b> based on
+ * {@link Rover}.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+@Parameters(commandNames = "crawler", commandDescription = "Any23 Crawler Command Line Tool.")
+public class Crawler extends Rover {
+
+    private final Object roverLock = new Object();
+
+    @Parameter(
+       names = { "-pf", "--pagefilter" },
+       description = "Regex used to filter out page URLs during crawling.",
+       converter = PatterConverter.class
+    )
+    private Pattern pageFilter = Pattern.compile( SiteCrawler.DEFAULT_PAGE_FILTER_RE );
+
+    @Parameter(
+       names = { "-sf", "--storagefolder" },
+       description = "Folder used to store crawler temporary data.",
+       converter = FileConverter.class
+    )
+    private File storageFolder = new File(System.getProperty("java.io.tmpdir"), "crawler-metadata-" + UUID.randomUUID().toString());
+
+    @Parameter(names = { "-nc", "--numcrawlers" }, description = "Sets the number of crawlers.")
+    private int numCrawlers = SiteCrawler.DEFAULT_NUM_OF_CRAWLERS;
+
+    @Parameter(names = { "-mp", "--maxpages" }, description = "Max number of pages before interrupting crawl.")
+    private int maxPages = Integer.MAX_VALUE;
+
+    @Parameter(names = { "-md", "--maxdepth" }, description = "Max allowed crawler depth.")
+    private int maxDepth = Integer.MAX_VALUE;
+
+    @Parameter(names = { "-pd", "--politenessdelay" }, description = "Politeness delay in milliseconds.")
+    private int politenessDelay = Integer.MAX_VALUE;
+
+    @Override
+    public void run() throws Exception {
+        super.configure();
+
+        if (inputIRIs.size() != 1) {
+            throw new IllegalArgumentException("Expected just one seed.");
+        }
+        final URL seed = new URL(inputIRIs.get( 0 ));
+
+        if ( storageFolder.isFile() ) {
+            throw new IllegalStateException( format( "Storage folder %s can not be a file, must be a directory",
+                                                     storageFolder ) );
+        }
+
+        if ( !storageFolder.exists() ) {
+            if ( !storageFolder.mkdirs() ) {
+                throw new IllegalStateException(
+                        format( "Storage folder %s can not be created, please verify you have enough permissions",
+                                                         storageFolder ) );
+            }
+        }
+
+        final SiteCrawler siteCrawler = new SiteCrawler( storageFolder );
+        siteCrawler.setNumOfCrawlers( numCrawlers );
+        siteCrawler.setMaxPages( maxPages );
+        siteCrawler.setMaxDepth( maxDepth );
+        siteCrawler.setPolitenessDelay(politenessDelay);
+
+        siteCrawler.addListener(new CrawlerListener() {
+            @Override
+            public void visitedPage(Page page) {
+                final String pageURL = page.getWebURL().getURL();
+                System.err.println( format("Processing page: [%s]", pageURL) );
+
+                final ParseData parseData = page.getParseData();
+                if (parseData instanceof HtmlParseData) {
+                    final HtmlParseData htmlParseData = (HtmlParseData) parseData;
+                    try {
+                        synchronized (roverLock) {
+                            Crawler.super.performExtraction(
+                                    new StringDocumentSource(
+                                            htmlParseData.getHtml(),
+                                            pageURL
+
+                                    )
+                            );
+                        }
+                    } catch (Exception e) {
+                        System.err.println(format("Error while processing page [%s], error: %s .",
+                                                  pageURL, e.getMessage())
+                        );
+                    }
+                }
+            }
+        });
+
+        Runtime.getRuntime().addShutdownHook( new Thread() {
+            @Override
+            public void run() {
+                try {
+                    System.err.println( Crawler.super.printReports() );
+                    // siteCrawler.stop(); // TODO: cause shutdown hanging.
+                } catch (Exception e) {
+                    e.printStackTrace(System.err);
+                }
+            }
+        });
+        siteCrawler.start(seed, pageFilter, true);
+    }
+
+    public static final class PatterConverter implements IStringConverter<Pattern> {
+
+        @Override
+        public Pattern convert( String value ) {
+            try {
+                return Pattern.compile( value );
+            } catch (PatternSyntaxException pse) {
+                throw new ParameterException( format("Invalid page filter, '%s' must be a regular expression.", value) );
+            }
+        }
+
+    }
+
+}
diff --git a/basic-crawler/src/main/java/org/apache/any23/plugin/crawler/CrawlerListener.java b/basic-crawler/src/main/java/org/apache/any23/plugin/crawler/CrawlerListener.java
new file mode 100644
index 0000000..31a492b
--- /dev/null
+++ b/basic-crawler/src/main/java/org/apache/any23/plugin/crawler/CrawlerListener.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.plugin.crawler;
+
+import edu.uci.ics.crawler4j.crawler.Page;
+
+/**
+ * Defines a listener for a {@link SiteCrawler}.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public interface CrawlerListener {
+
+    /**
+     * Notifies to the listener that a page has been discovered.
+     *
+     * @param page the page data.
+     */
+    void visitedPage(Page page);
+
+}
diff --git a/basic-crawler/src/main/java/org/apache/any23/plugin/crawler/DefaultWebCrawler.java b/basic-crawler/src/main/java/org/apache/any23/plugin/crawler/DefaultWebCrawler.java
new file mode 100644
index 0000000..2e43445
--- /dev/null
+++ b/basic-crawler/src/main/java/org/apache/any23/plugin/crawler/DefaultWebCrawler.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.plugin.crawler;
+
+import edu.uci.ics.crawler4j.crawler.Page;
+import edu.uci.ics.crawler4j.crawler.WebCrawler;
+import edu.uci.ics.crawler4j.url.WebURL;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.regex.Pattern;
+
+/**
+ * Default {@link WebCrawler} implementation.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public class DefaultWebCrawler extends WebCrawler {
+
+    private static final Logger logger = LoggerFactory.getLogger(DefaultWebCrawler.class);
+
+    /**
+     * Shared data reference.
+     */
+    private final SharedData sharedData = SharedData.getInstance();
+
+    /**
+     * Page filter pattern.
+     */
+    private final Pattern pattern = sharedData.getPattern();
+
+    /**
+     * Override this method to specify whether the given URL should be visited or not.
+     */
+
+    @Override
+    public boolean shouldVisit(Page referringPage, WebURL url) {
+        if (!super.shouldVisit(referringPage, url))
+            return false;
+        if (url.getURL() == null)
+            return false;
+        final String href = url.getURL().toLowerCase();
+        if (!href.startsWith(sharedData.getSeed()))
+            return false;
+        return pattern == null || !pattern.matcher(href).matches();
+    }
+
+    /**
+     * Override this method to implement the single page processing logic.
+     */
+    @Override
+    public void visit(Page page) {
+        logger.trace("Visiting page: " + page.getWebURL().getURL());
+        sharedData.notifyPage(page);
+    }
+
+}
+
diff --git a/basic-crawler/src/main/java/org/apache/any23/plugin/crawler/SharedData.java b/basic-crawler/src/main/java/org/apache/any23/plugin/crawler/SharedData.java
new file mode 100644
index 0000000..d1d5ca2
--- /dev/null
+++ b/basic-crawler/src/main/java/org/apache/any23/plugin/crawler/SharedData.java
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.plugin.crawler;
+
+import edu.uci.ics.crawler4j.crawler.Page;
+
+import java.util.List;
+import java.util.regex.Pattern;
+
+/**
+ * This class hosts shared data structures accessible
+ * to all the {@link DefaultWebCrawler} instances
+ * run by the {@link SiteCrawler}.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public class SharedData {
+
+    /**
+     * Singleton instance.
+     */
+    private static SharedData instance;
+
+    /**
+     * Crawl seed.
+     */
+    private final String seed;
+
+    /**
+     * Crawl page filter pattern.
+     */
+    private final Pattern pattern;
+
+    /**
+     * List of crawler listeners.
+     */
+    private final List<CrawlerListener> listeners;
+
+//    /**
+//     * Output triple handler.
+//     */
+//    private final TripleHandler tripleHandler;
+
+    /**
+     * @return the singleton instance.
+     */
+    protected static SharedData getInstance() {
+        if(instance == null) throw new IllegalStateException("The configuration has not yet initialized.");
+        return instance;
+    }
+
+    /**
+     * Initializes the crawler data.
+     *
+     * @param seed crawler seed.
+     * @param regex page filter regex.
+     * @param listeners the listeners to be notified of the crawler activity.
+     */
+    protected static void setCrawlData(String seed, Pattern regex, List<CrawlerListener> listeners) {
+        instance = new SharedData(seed, regex, listeners);
+    }
+
+    /**
+     * Internal constructor.
+     *
+     * @param seed
+     * @param pattern
+     * @param listeners
+     */
+    private SharedData(String seed, Pattern pattern, List<CrawlerListener> listeners) {
+        if(seed == null || seed.trim().length() == 0)
+            throw new IllegalArgumentException(
+                String.format("Invalid seed '%s'", seed)
+            );
+
+        this.seed      = seed;
+        this.pattern   = pattern;
+        this.listeners = listeners;
+    }
+
+    /**
+     * @return crawl seed.
+     */
+    protected String getSeed() {
+        return seed;
+    }
+
+    /**
+     * @return page filter pattern.
+     */
+    protected Pattern getPattern() {
+        return pattern;
+    }
+
+    /**
+     * Notifies all listeners that a page has been discovered.
+     *
+     * @param page the discovered page.
+     */
+    protected void notifyPage(Page page) {
+        for(CrawlerListener listener : listeners) {
+            listener.visitedPage(page);
+        }
+    }
+
+}
diff --git a/basic-crawler/src/main/java/org/apache/any23/plugin/crawler/SiteCrawler.java b/basic-crawler/src/main/java/org/apache/any23/plugin/crawler/SiteCrawler.java
new file mode 100644
index 0000000..7fd0d48
--- /dev/null
+++ b/basic-crawler/src/main/java/org/apache/any23/plugin/crawler/SiteCrawler.java
@@ -0,0 +1,270 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.plugin.crawler;
+
+import edu.uci.ics.crawler4j.crawler.CrawlConfig;
+import edu.uci.ics.crawler4j.crawler.CrawlController;
+import edu.uci.ics.crawler4j.crawler.WebCrawler;
+import edu.uci.ics.crawler4j.fetcher.PageFetcher;
+import edu.uci.ics.crawler4j.robotstxt.RobotstxtConfig;
+import edu.uci.ics.crawler4j.robotstxt.RobotstxtServer;
+
+import java.io.File;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.regex.Pattern;
+
+/**
+ * A basic <em>site crawler</em> to extract semantic content
+ * of small/medium size sites.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public class SiteCrawler {
+
+    public static final String DEFAULT_PAGE_FILTER_RE =
+        ".*(\\.(" +
+                    "css|js"                            +
+                    "|bmp|gif|jpe?g|png|tiff?"          +
+                    "|mid|mp2|mp3|mp4|wav|wma"          +
+                    "|avi|mov|mpeg|ram|m4v|wmv|rm|smil" +
+                    "|pdf"        +
+                    "|swf"        +
+                    "|zip|rar|gz" +
+                    "|xml|txt"    +
+        "))$";
+
+    /**
+     * Default number of crawler instances.
+     */
+    public static final int DEFAULT_NUM_OF_CRAWLERS = 10;
+
+    /**
+     * Default crawler implementation.
+     */
+    public static final Class<? extends WebCrawler> DEFAULT_WEB_CRAWLER = DefaultWebCrawler.class;
+
+    /**
+     * Default filter applied to skip contents.
+     */
+    public final Pattern defaultFilters = Pattern.compile(DEFAULT_PAGE_FILTER_RE);
+
+    /**
+     * The crawler threads controller.
+     */
+    private final CrawlController controller;
+
+    /**
+     * Crawler listeners.
+     */
+    private final List<CrawlerListener> listeners = new ArrayList<CrawlerListener>();
+
+    /**
+     * Actual number of crawler instances.
+     */
+    private int numOfCrawlers = DEFAULT_NUM_OF_CRAWLERS;
+
+    /**
+     * Actual web crawler.
+     */
+    private Class<? extends WebCrawler> webCrawler = DEFAULT_WEB_CRAWLER;
+
+    /**
+     * Internal crawler configuration.
+     */
+    private final CrawlConfig crawlConfig;
+
+    /**
+     * Internal executor service.
+     */
+    private ExecutorService service;
+
+    /**
+     * Constructor.
+     *
+     * @param storageFolder location used to store the temporary data structures used by the crawler.
+     */
+    public SiteCrawler(File storageFolder) {
+        try {
+            crawlConfig = new CrawlConfig();
+            crawlConfig.setCrawlStorageFolder( storageFolder.getAbsolutePath() );
+            crawlConfig.setUserAgentString("Apache Any23 Web Crawler");
+            
+            final PageFetcher pageFetcher = new PageFetcher(crawlConfig);
+
+            RobotstxtConfig robotstxtConfig = new RobotstxtConfig();
+            final RobotstxtServer robotstxtServer = new RobotstxtServer(robotstxtConfig, pageFetcher);
+            
+            controller = new CrawlController(crawlConfig, pageFetcher, robotstxtServer);
+        } catch (Exception e) {
+            throw new IllegalArgumentException("Error while initializing crawler controller.", e);
+        }
+    }
+
+    /**
+     * @return number of crawler instances.
+     */
+    public int getNumOfCrawlers() {
+        return numOfCrawlers;
+    }
+
+    /**
+     * Sets the number of crawler instances.
+     *
+     * @param n an integer &gt;= 0.
+     */
+    public void setNumOfCrawlers(int n) {
+        if(n <=0) throw new IllegalArgumentException("Invalid number of crawlers, must be > 0 .");
+        this.numOfCrawlers = n;
+    }
+
+    public Class<? extends WebCrawler> getWebCrawler() {
+        return webCrawler;
+    }
+
+    /**
+     * Sets the actual crawler class.
+     *
+     * @param c a not <code>class</code>.
+     */
+    public void setWebCrawler(Class<? extends WebCrawler> c) {
+        if(c == null) throw new NullPointerException("c cannot be null.");
+        this.webCrawler = c;
+    }
+
+    /**
+     * @return the max allowed crawl depth, <code>-1</code> means no limit.
+     */
+    public int getMaxDepth() {
+        return crawlConfig.getMaxDepthOfCrawling();
+    }
+
+    /**
+     * Sets the maximum depth.
+     *
+     * @param maxDepth maximum allowed depth. <code>-1</code> means no limit.
+     */
+    public void setMaxDepth(int maxDepth) {
+        if(maxDepth < -1 || maxDepth == 0) throw new IllegalArgumentException("Invalid maxDepth, must be -1 or > 0");
+        crawlConfig.setMaxDepthOfCrawling(maxDepth);
+    }
+
+    /**
+     * @return max number of allowed pages.
+     */
+    public int getMaxPages() {
+        return crawlConfig.getMaxPagesToFetch();
+    }
+
+    /**
+     * Sets the maximum collected pages.
+     *
+     * @param maxPages maximum allowed pages. <code>-1</code> means no limit.
+     */
+    public void setMaxPages(int maxPages) {
+        if(maxPages < -1 || maxPages == 0) throw new IllegalArgumentException("Invalid maxPages, must be -1 or > 0");
+        crawlConfig.setMaxPagesToFetch(maxPages);
+    }
+
+    /**
+     * @return the politeness delay in milliseconds.
+     */
+    public int getPolitenessDelay() {
+        return crawlConfig.getPolitenessDelay();
+    }
+
+    /**
+     * Sets the politeness delay.
+     *
+     * @param millis delay in milliseconds.
+     */
+    public void setPolitenessDelay(int millis) {
+        if(millis >= 0) crawlConfig.setPolitenessDelay(millis);
+    }
+
+    /**
+     * Registers a {@link CrawlerListener} to this crawler.
+     *
+     * @param listener a {@link org.apache.any23.plugin.crawler.CrawlerListener} 
+     * implementation which listens for page discovery
+     */
+    public void addListener(CrawlerListener listener) {
+        listeners.add(listener);
+    }
+
+    /**
+     * Deregisters a {@link CrawlerListener} from this crawler.
+     *
+     * @param listener a {@link org.apache.any23.plugin.crawler.CrawlerListener} 
+     * implementation which listens for page discovery
+     */
+    public void removeListener(CrawlerListener listener) {
+        listeners.remove(listener);
+    }
+
+    /**
+     * Starts the crawling process.
+     *
+     * @param seed the starting URL for the crawler process.
+     * @param filters filters to be applied to the crawler process. Can be <code>null</code>.
+     * @param wait if <code>true</code> the process will wait for the crawler termination.
+     * @throws Exception if an error occurred during crawler initiation
+     */
+    public synchronized void start(
+            final URL seed, final Pattern filters, final boolean wait
+    ) throws Exception {
+        SharedData.setCrawlData(seed.toExternalForm(), filters, Collections.synchronizedList(listeners) );
+        controller.addSeed(seed.toExternalForm());
+        final Runnable internalRunnable = new Runnable() {
+            @Override
+            public void run() {
+                controller.start(getWebCrawler(), getNumOfCrawlers());
+            }
+        };
+        if(wait) {
+            internalRunnable.run();
+        } else {
+            if(service != null) throw new IllegalStateException("Another service seems to run.");
+            service = Executors.newSingleThreadExecutor();
+            service.execute(internalRunnable);
+        }
+    }
+
+    /**
+     * Starts the crawler process with the {@link #defaultFilters}.
+     *
+     * @param seed the starting URL for the crawler process.
+     * @param wait if <code>true</code> the process will wait for the crawler termination.
+     * @throws Exception if an error occurred during crawler initiation
+     */
+    public void start(final URL seed, final boolean wait) throws Exception {
+        start(seed, defaultFilters, wait);
+    }
+
+    /**
+     * Interrupts the crawler process if started with <code>wait</code> flag == <code>false</code>.
+     */
+    public synchronized void stop() {
+        service.shutdownNow();
+    }
+
+}
diff --git a/basic-crawler/src/main/java/org/apache/any23/plugin/crawler/package-info.java b/basic-crawler/src/main/java/org/apache/any23/plugin/crawler/package-info.java
new file mode 100644
index 0000000..450ba8b
--- /dev/null
+++ b/basic-crawler/src/main/java/org/apache/any23/plugin/crawler/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ *  <i>Commandline</i> extension of {@link org.apache.any23.cli.Rover} able
+ *  to extract semantic content from an entire <i>site</i>.
+ */
+package org.apache.any23.plugin.crawler;
diff --git a/basic-crawler/src/main/resources/META-INF/services/org.apache.any23.cli.Tool b/basic-crawler/src/main/resources/META-INF/services/org.apache.any23.cli.Tool
new file mode 100644
index 0000000..cd000e0
--- /dev/null
+++ b/basic-crawler/src/main/resources/META-INF/services/org.apache.any23.cli.Tool
@@ -0,0 +1 @@
+org.apache.any23.cli.Crawler
\ No newline at end of file
diff --git a/basic-crawler/src/test/java/org/apache/any23/cli/CrawlerTest.java b/basic-crawler/src/test/java/org/apache/any23/cli/CrawlerTest.java
new file mode 100644
index 0000000..0e6cd29
--- /dev/null
+++ b/basic-crawler/src/test/java/org/apache/any23/cli/CrawlerTest.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.cli;
+
+import org.apache.any23.Any23OnlineTestBase;
+import org.apache.any23.rdf.RDFUtils;
+import org.apache.any23.util.FileUtils;
+import org.junit.Test;
+import org.eclipse.rdf4j.model.Statement;
+import org.eclipse.rdf4j.rio.RDFFormat;
+import org.eclipse.rdf4j.rio.RDFHandlerException;
+import org.eclipse.rdf4j.rio.RDFParseException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Test case for {@link Crawler} CLI.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public class CrawlerTest extends Any23OnlineTestBase {
+
+    public static final Logger logger = LoggerFactory.getLogger(CrawlerTest.class);
+
+    @Test
+    public void testCLI() throws IOException, RDFHandlerException, RDFParseException {
+        assumeOnlineAllowed();
+
+        final File outFile = File.createTempFile("crawler-test", ".nq", tempDirectory);
+        outFile.delete();
+        logger.info( "Outfile: " + outFile.getAbsolutePath() );
+
+        final Future<?> future = Executors.newSingleThreadExecutor().submit(
+            new Runnable() {
+                @Override
+                public void run() {
+                    try {
+                        ToolRunner.main(
+                                String.format(
+                                        "crawler -f nquads --maxpages 50 --maxdepth 1 --politenessdelay 500 -o %s " +
+                                        "http://any23.apache.org",
+                                        outFile.getAbsolutePath()
+                                ).split(" ")
+                        );
+                    } catch (Exception e) {
+                        e.printStackTrace();
+                    }
+                }
+            }
+        );
+
+        try {
+            future.get(10, TimeUnit.SECONDS);
+        } catch (Exception e) {
+            // OK.
+            if( ! (e instanceof TimeoutException) ) {
+                e.printStackTrace();
+            }
+        }
+        assertTrue("The output file has not been created.", outFile.exists());
+
+        final String[] lines = FileUtils.readFileLines(outFile);
+        final StringBuilder allLinesExceptLast = new StringBuilder();
+        for (int i = 0; i < lines.length - 1; i++) {
+            allLinesExceptLast.append(lines[i]);
+            allLinesExceptLast.append("\n");
+        }
+
+        final Statement[] statements = RDFUtils.parseRDF(RDFFormat.NQUADS, allLinesExceptLast.toString());
+        assertTrue(statements.length > 0);
+    }
+
+}
diff --git a/basic-crawler/src/test/java/org/apache/any23/plugin/crawler/SiteCrawlerTest.java b/basic-crawler/src/test/java/org/apache/any23/plugin/crawler/SiteCrawlerTest.java
new file mode 100644
index 0000000..5c7fc02
--- /dev/null
+++ b/basic-crawler/src/test/java/org/apache/any23/plugin/crawler/SiteCrawlerTest.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.plugin.crawler;
+
+import edu.uci.ics.crawler4j.crawler.Page;
+import org.apache.any23.Any23OnlineTestBase;
+import org.junit.Assert;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.net.URL;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.Iterator;
+
+/**
+ * Test case for {@link SiteCrawler}.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public class SiteCrawlerTest extends Any23OnlineTestBase {
+
+    public static final Logger logger = LoggerFactory.getLogger(SiteCrawlerTest.class);
+
+    /**
+     * Tests the main crawler use case.
+     *
+     * @throws Exception if there is an error asserting test data
+     */
+    @Test
+    public void testSiteCrawling() throws Exception {
+        assumeOnlineAllowed();
+
+        File tmpFile = File.createTempFile("site-crawler-test", ".storage");
+        tmpFile.delete();
+
+        final SiteCrawler controller = new SiteCrawler(tmpFile);
+        controller.setMaxPages(100);
+        logger.info("Crawler4j: Setting max num of pages to: " + controller.getMaxPages());
+        controller.setPolitenessDelay(500);
+        logger.info("Crawler4j: Setting Politeness delay to: " + controller.getPolitenessDelay() + "ms");
+
+        final Set<String> distinctPages = new HashSet<String>();
+        controller.addListener(new CrawlerListener() {
+            @Override
+            public void visitedPage(Page page) {
+                distinctPages.add( page.getWebURL().getURL() );
+                Iterator<String> it = distinctPages.iterator();
+                while (it.hasNext()) {
+                    logger.info("Crawler4j: Fetching page - " + it.next());
+                }
+            }
+        });
+
+        controller.start( new URL("http://any23.apache.org/"), false);
+
+        synchronized (this) {
+            this.wait(15 * 1000);
+        }
+        controller.stop();
+
+        logger.info("Distinct pages: " + distinctPages.size());
+        Assert.assertTrue("Expected some page crawled.", distinctPages.size() > 0);
+    }
+
+}
diff --git a/basic-crawler/src/test/resources/log4j.properties b/basic-crawler/src/test/resources/log4j.properties
new file mode 100644
index 0000000..d715976
--- /dev/null
+++ b/basic-crawler/src/test/resources/log4j.properties
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+log4j.rootCategory=INFO, stdout
+
+log4j.appender.stdout.Threshold=INFO
+
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+
+log4j.appender.stdout.layout.ConversionPattern=%5p [%t] %m%n
diff --git a/html-scraper/.classpath b/html-scraper/.classpath
new file mode 100755
index 0000000..2f5b530
--- /dev/null
+++ b/html-scraper/.classpath
@@ -0,0 +1,200 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+  <classpathentry kind="src" path="src/test/java" output="target/test-classes" including="**/*.java"/>
+  <classpathentry kind="src" path="src/test/resources" output="target/test-classes" excluding="**/*.java"/>
+  <classpathentry kind="src" path="src/main/java" including="**/*.java"/>
+  <classpathentry kind="src" path="src/main/resources" excluding="**/*.java"/>
+  <classpathentry kind="src" path="target/maven-shared-archive-resources" excluding="**/*.java"/>
+  <classpathentry kind="output" path="target/classes"/>
+  <classpathentry kind="var" path="M2_REPO/javax/xml/bind/jaxb-api/2.3.0/jaxb-api-2.3.0.jar" sourcepath="M2_REPO/javax/xml/bind/jaxb-api/2.3.0/jaxb-api-2.3.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/javax/annotation/javax.annotation-api/1.3.2/javax.annotation-api-1.3.2.jar" sourcepath="M2_REPO/javax/annotation/javax.annotation-api/1.3.2/javax.annotation-api-1.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/javax/measure/unit-api/1.0/unit-api-1.0.jar" sourcepath="M2_REPO/javax/measure/unit-api/1.0/unit-api-1.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/javax/inject/javax.inject/1/javax.inject-1.jar" sourcepath="M2_REPO/javax/inject/javax.inject/1/javax.inject-1-sources.jar"/>
+  <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-model/3.0.0/rdf4j-model-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-model/3.0.0/rdf4j-model-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-util/3.0.0/rdf4j-util-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-util/3.0.0/rdf4j-util-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/slf4j/slf4j-api/1.7.28/slf4j-api-1.7.28.jar" sourcepath="M2_REPO/org/slf4j/slf4j-api/1.7.28/slf4j-api-1.7.28-sources.jar"/>
+  <classpathentry kind="src" path="/apache-any23-core"/>
+  <classpathentry kind="src" path="/apache-any23-api"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-api/3.0.0/rdf4j-rio-api-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-api/3.0.0/rdf4j-rio-api-3.0.0-sources.jar"/>
+  <classpathentry kind="src" path="/apache-any23-csvutils"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/commons/commons-csv/1.6/commons-csv-1.6.jar" sourcepath="M2_REPO/org/apache/commons/commons-csv/1.6/commons-csv-1.6-sources.jar"/>
+  <classpathentry kind="src" path="/apache-any23-mime"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-turtle/3.0.0/rdf4j-rio-turtle-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-turtle/3.0.0/rdf4j-rio-turtle-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-datatypes/3.0.0/rdf4j-rio-datatypes-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-datatypes/3.0.0/rdf4j-rio-datatypes-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-languages/3.0.0/rdf4j-rio-languages-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-languages/3.0.0/rdf4j-rio-languages-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/commons-io/commons-io/2.6/commons-io-2.6.jar" sourcepath="M2_REPO/commons-io/commons-io/2.6/commons-io-2.6-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-ntriples/3.0.0/rdf4j-rio-ntriples-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-ntriples/3.0.0/rdf4j-rio-ntriples-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-n3/3.0.0/rdf4j-rio-n3-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-n3/3.0.0/rdf4j-rio-n3-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-nquads/3.0.0/rdf4j-rio-nquads-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-nquads/3.0.0/rdf4j-rio-nquads-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/tika/tika-core/1.22/tika-core-1.22.jar" sourcepath="M2_REPO/org/apache/tika/tika-core/1.22/tika-core-1.22-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/tika/tika-parsers/1.22/tika-parsers-1.22.jar" sourcepath="M2_REPO/org/apache/tika/tika-parsers/1.22/tika-parsers-1.22-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/glassfish/jaxb/jaxb-runtime/2.3.2/jaxb-runtime-2.3.2.jar" sourcepath="M2_REPO/org/glassfish/jaxb/jaxb-runtime/2.3.2/jaxb-runtime-2.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/jakarta/xml/bind/jakarta.xml.bind-api/2.3.2/jakarta.xml.bind-api-2.3.2.jar" sourcepath="M2_REPO/jakarta/xml/bind/jakarta.xml.bind-api/2.3.2/jakarta.xml.bind-api-2.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/glassfish/jaxb/txw2/2.3.2/txw2-2.3.2.jar" sourcepath="M2_REPO/org/glassfish/jaxb/txw2/2.3.2/txw2-2.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/sun/istack/istack-commons-runtime/3.0.8/istack-commons-runtime-3.0.8.jar" sourcepath="M2_REPO/com/sun/istack/istack-commons-runtime/3.0.8/istack-commons-runtime-3.0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/jvnet/staxex/stax-ex/1.8.1/stax-ex-1.8.1.jar" sourcepath="M2_REPO/org/jvnet/staxex/stax-ex/1.8.1/stax-ex-1.8.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/sun/xml/fastinfoset/FastInfoset/1.2.16/FastInfoset-1.2.16.jar" sourcepath="M2_REPO/com/sun/xml/fastinfoset/FastInfoset/1.2.16/FastInfoset-1.2.16-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/sun/activation/jakarta.activation/1.2.1/jakarta.activation-1.2.1.jar" sourcepath="M2_REPO/com/sun/activation/jakarta.activation/1.2.1/jakarta.activation-1.2.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/xerces/xercesImpl/2.12.0/xercesImpl-2.12.0.jar" sourcepath="M2_REPO/xerces/xercesImpl/2.12.0/xercesImpl-2.12.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/xml-apis/xml-apis/1.4.01/xml-apis-1.4.01.jar" sourcepath="M2_REPO/xml-apis/xml-apis/1.4.01/xml-apis-1.4.01-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/commons/commons-lang3/3.9/commons-lang3-3.9.jar" sourcepath="M2_REPO/org/apache/commons/commons-lang3/3.9/commons-lang3-3.9-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/gagravarr/vorbis-java-tika/0.8/vorbis-java-tika-0.8.jar" sourcepath="M2_REPO/org/gagravarr/vorbis-java-tika/0.8/vorbis-java-tika-0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/tallison/jmatio/1.5/jmatio-1.5.jar" sourcepath="M2_REPO/org/tallison/jmatio/1.5/jmatio-1.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/james/apache-mime4j-core/0.8.3/apache-mime4j-core-0.8.3.jar" sourcepath="M2_REPO/org/apache/james/apache-mime4j-core/0.8.3/apache-mime4j-core-0.8.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/james/apache-mime4j-dom/0.8.3/apache-mime4j-dom-0.8.3.jar" sourcepath="M2_REPO/org/apache/james/apache-mime4j-dom/0.8.3/apache-mime4j-dom-0.8.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/commons/commons-compress/1.18/commons-compress-1.18.jar" sourcepath="M2_REPO/org/apache/commons/commons-compress/1.18/commons-compress-1.18-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/tukaani/xz/1.8/xz-1.8.jar" sourcepath="M2_REPO/org/tukaani/xz/1.8/xz-1.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/epam/parso/2.0.11/parso-2.0.11.jar" sourcepath="M2_REPO/com/epam/parso/2.0.11/parso-2.0.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/brotli/dec/0.1.2/dec-0.1.2.jar" sourcepath="M2_REPO/org/brotli/dec/0.1.2/dec-0.1.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/commons-codec/commons-codec/1.11/commons-codec-1.11.jar" sourcepath="M2_REPO/commons-codec/commons-codec/1.11/commons-codec-1.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/pdfbox/pdfbox/2.0.16/pdfbox-2.0.16.jar" sourcepath="M2_REPO/org/apache/pdfbox/pdfbox/2.0.16/pdfbox-2.0.16-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/pdfbox/fontbox/2.0.16/fontbox-2.0.16.jar" sourcepath="M2_REPO/org/apache/pdfbox/fontbox/2.0.16/fontbox-2.0.16-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/pdfbox/pdfbox-tools/2.0.16/pdfbox-tools-2.0.16.jar" sourcepath="M2_REPO/org/apache/pdfbox/pdfbox-tools/2.0.16/pdfbox-tools-2.0.16-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/pdfbox/jempbox/1.8.16/jempbox-1.8.16.jar" sourcepath="M2_REPO/org/apache/pdfbox/jempbox/1.8.16/jempbox-1.8.16-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/bouncycastle/bcmail-jdk15on/1.62/bcmail-jdk15on-1.62.jar" sourcepath="M2_REPO/org/bouncycastle/bcmail-jdk15on/1.62/bcmail-jdk15on-1.62-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/bouncycastle/bcprov-jdk15on/1.62/bcprov-jdk15on-1.62.jar" sourcepath="M2_REPO/org/bouncycastle/bcprov-jdk15on/1.62/bcprov-jdk15on-1.62-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/bouncycastle/bcpkix-jdk15on/1.62/bcpkix-jdk15on-1.62.jar" sourcepath="M2_REPO/org/bouncycastle/bcpkix-jdk15on/1.62/bcpkix-jdk15on-1.62-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/poi/poi/4.1.0/poi-4.1.0.jar" sourcepath="M2_REPO/org/apache/poi/poi/4.1.0/poi-4.1.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/commons/commons-collections4/4.3/commons-collections4-4.3.jar" sourcepath="M2_REPO/org/apache/commons/commons-collections4/4.3/commons-collections4-4.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/commons/commons-math3/3.6.1/commons-math3-3.6.1.jar" sourcepath="M2_REPO/org/apache/commons/commons-math3/3.6.1/commons-math3-3.6.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/poi/poi-scratchpad/4.1.0/poi-scratchpad-4.1.0.jar" sourcepath="M2_REPO/org/apache/poi/poi-scratchpad/4.1.0/poi-scratchpad-4.1.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/poi/poi-ooxml/4.1.0/poi-ooxml-4.1.0.jar" sourcepath="M2_REPO/org/apache/poi/poi-ooxml/4.1.0/poi-ooxml-4.1.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/poi/poi-ooxml-schemas/4.1.0/poi-ooxml-schemas-4.1.0.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/xmlbeans/xmlbeans/3.1.0/xmlbeans-3.1.0.jar" sourcepath="M2_REPO/org/apache/xmlbeans/xmlbeans/3.1.0/xmlbeans-3.1.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/github/virtuald/curvesapi/1.06/curvesapi-1.06.jar" sourcepath="M2_REPO/com/github/virtuald/curvesapi/1.06/curvesapi-1.06-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/healthmarketscience/jackcess/jackcess/3.0.1/jackcess-3.0.1.jar" sourcepath="M2_REPO/com/healthmarketscience/jackcess/jackcess/3.0.1/jackcess-3.0.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/healthmarketscience/jackcess/jackcess-encrypt/3.0.0/jackcess-encrypt-3.0.0.jar" sourcepath="M2_REPO/com/healthmarketscience/jackcess/jackcess-encrypt/3.0.0/jackcess-encrypt-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/ccil/cowan/tagsoup/tagsoup/1.2.1/tagsoup-1.2.1.jar" sourcepath="M2_REPO/org/ccil/cowan/tagsoup/tagsoup/1.2.1/tagsoup-1.2.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/ow2/asm/asm/7.2-beta/asm-7.2-beta.jar" sourcepath="M2_REPO/org/ow2/asm/asm/7.2-beta/asm-7.2-beta-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/googlecode/mp4parser/isoparser/1.1.22/isoparser-1.1.22.jar" sourcepath="M2_REPO/com/googlecode/mp4parser/isoparser/1.1.22/isoparser-1.1.22-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/drewnoakes/metadata-extractor/2.11.0/metadata-extractor-2.11.0.jar" sourcepath="M2_REPO/com/drewnoakes/metadata-extractor/2.11.0/metadata-extractor-2.11.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/adobe/xmp/xmpcore/5.1.3/xmpcore-5.1.3.jar" sourcepath="M2_REPO/com/adobe/xmp/xmpcore/5.1.3/xmpcore-5.1.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/de/l3s/boilerpipe/boilerpipe/1.1.0/boilerpipe-1.1.0.jar" sourcepath="M2_REPO/de/l3s/boilerpipe/boilerpipe/1.1.0/boilerpipe-1.1.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/rometools/rome/1.12.1/rome-1.12.1.jar" sourcepath="M2_REPO/com/rometools/rome/1.12.1/rome-1.12.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/rometools/rome-utils/1.12.1/rome-utils-1.12.1.jar" sourcepath="M2_REPO/com/rometools/rome-utils/1.12.1/rome-utils-1.12.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/jdom/jdom2/2.0.6/jdom2-2.0.6.jar" sourcepath="M2_REPO/org/jdom/jdom2/2.0.6/jdom2-2.0.6-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/gagravarr/vorbis-java-core/0.8/vorbis-java-core-0.8.jar" sourcepath="M2_REPO/org/gagravarr/vorbis-java-core/0.8/vorbis-java-core-0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/googlecode/juniversalchardet/juniversalchardet/1.0.3/juniversalchardet-1.0.3.jar" sourcepath="M2_REPO/com/googlecode/juniversalchardet/juniversalchardet/1.0.3/juniversalchardet-1.0.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/codelibs/jhighlight/1.0.3/jhighlight-1.0.3.jar" sourcepath="M2_REPO/org/codelibs/jhighlight/1.0.3/jhighlight-1.0.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/pff/java-libpst/0.8.1/java-libpst-0.8.1.jar" sourcepath="M2_REPO/com/pff/java-libpst/0.8.1/java-libpst-0.8.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/github/junrar/junrar/4.0.0/junrar-4.0.0.jar" sourcepath="M2_REPO/com/github/junrar/junrar/4.0.0/junrar-4.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/cxf/cxf-rt-rs-client/3.3.2/cxf-rt-rs-client-3.3.2.jar" sourcepath="M2_REPO/org/apache/cxf/cxf-rt-rs-client/3.3.2/cxf-rt-rs-client-3.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/cxf/cxf-rt-transports-http/3.3.2/cxf-rt-transports-http-3.3.2.jar" sourcepath="M2_REPO/org/apache/cxf/cxf-rt-transports-http/3.3.2/cxf-rt-transports-http-3.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/cxf/cxf-core/3.3.2/cxf-core-3.3.2.jar" sourcepath="M2_REPO/org/apache/cxf/cxf-core/3.3.2/cxf-core-3.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/fasterxml/woodstox/woodstox-core/5.0.3/woodstox-core-5.0.3.jar" sourcepath="M2_REPO/com/fasterxml/woodstox/woodstox-core/5.0.3/woodstox-core-5.0.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/codehaus/woodstox/stax2-api/3.1.4/stax2-api-3.1.4.jar" sourcepath="M2_REPO/org/codehaus/woodstox/stax2-api/3.1.4/stax2-api-3.1.4-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/ws/xmlschema/xmlschema-core/2.2.4/xmlschema-core-2.2.4.jar" sourcepath="M2_REPO/org/apache/ws/xmlschema/xmlschema-core/2.2.4/xmlschema-core-2.2.4-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/glassfish/jaxb/jaxb-xjc/2.3.2/jaxb-xjc-2.3.2.jar" sourcepath="M2_REPO/org/glassfish/jaxb/jaxb-xjc/2.3.2/jaxb-xjc-2.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/glassfish/jaxb/xsom/2.3.2/xsom-2.3.2.jar" sourcepath="M2_REPO/org/glassfish/jaxb/xsom/2.3.2/xsom-2.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/sun/xml/bind/external/relaxng-datatype/2.3.2/relaxng-datatype-2.3.2.jar" sourcepath="M2_REPO/com/sun/xml/bind/external/relaxng-datatype/2.3.2/relaxng-datatype-2.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/glassfish/jaxb/codemodel/2.3.2/codemodel-2.3.2.jar" sourcepath="M2_REPO/org/glassfish/jaxb/codemodel/2.3.2/codemodel-2.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/sun/xml/bind/external/rngom/2.3.2/rngom-2.3.2.jar" sourcepath="M2_REPO/com/sun/xml/bind/external/rngom/2.3.2/rngom-2.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/sun/xml/dtd-parser/dtd-parser/1.4.1/dtd-parser-1.4.1.jar" sourcepath="M2_REPO/com/sun/xml/dtd-parser/dtd-parser/1.4.1/dtd-parser-1.4.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/sun/istack/istack-commons-tools/3.0.8/istack-commons-tools-3.0.8.jar" sourcepath="M2_REPO/com/sun/istack/istack-commons-tools/3.0.8/istack-commons-tools-3.0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/ant/ant/1.10.5/ant-1.10.5.jar" sourcepath="M2_REPO/org/apache/ant/ant/1.10.5/ant-1.10.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/ant/ant-launcher/1.10.5/ant-launcher-1.10.5.jar" sourcepath="M2_REPO/org/apache/ant/ant-launcher/1.10.5/ant-launcher-1.10.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/cxf/cxf-rt-frontend-jaxrs/3.3.2/cxf-rt-frontend-jaxrs-3.3.2.jar" sourcepath="M2_REPO/org/apache/cxf/cxf-rt-frontend-jaxrs/3.3.2/cxf-rt-frontend-jaxrs-3.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/jakarta/ws/rs/jakarta.ws.rs-api/2.1.5/jakarta.ws.rs-api-2.1.5.jar" sourcepath="M2_REPO/jakarta/ws/rs/jakarta.ws.rs-api/2.1.5/jakarta.ws.rs-api-2.1.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/cxf/cxf-rt-security/3.3.2/cxf-rt-security-3.3.2.jar" sourcepath="M2_REPO/org/apache/cxf/cxf-rt-security/3.3.2/cxf-rt-security-3.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/commons/commons-exec/1.3/commons-exec-1.3.jar" sourcepath="M2_REPO/org/apache/commons/commons-exec/1.3/commons-exec-1.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/opennlp/opennlp-tools/1.9.1/opennlp-tools-1.9.1.jar" sourcepath="M2_REPO/org/apache/opennlp/opennlp-tools/1.9.1/opennlp-tools-1.9.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/googlecode/json-simple/json-simple/1.1.1/json-simple-1.1.1.jar" sourcepath="M2_REPO/com/googlecode/json-simple/json-simple/1.1.1/json-simple-1.1.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/github/openjson/openjson/1.0.11/openjson-1.0.11.jar" sourcepath="M2_REPO/com/github/openjson/openjson/1.0.11/openjson-1.0.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/google/code/gson/gson/2.8.5/gson-2.8.5.jar" sourcepath="M2_REPO/com/google/code/gson/gson/2.8.5/gson-2.8.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/slf4j/jul-to-slf4j/1.7.28/jul-to-slf4j-1.7.28.jar" sourcepath="M2_REPO/org/slf4j/jul-to-slf4j/1.7.28/jul-to-slf4j-1.7.28-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/slf4j/jcl-over-slf4j/1.7.28/jcl-over-slf4j-1.7.28.jar" sourcepath="M2_REPO/org/slf4j/jcl-over-slf4j/1.7.28/jcl-over-slf4j-1.7.28-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/edu/ucar/netcdf4/4.5.5/netcdf4-4.5.5.jar" sourcepath="M2_REPO/edu/ucar/netcdf4/4.5.5/netcdf4-4.5.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/edu/ucar/cdm/4.5.5/cdm-4.5.5.jar" sourcepath="M2_REPO/edu/ucar/cdm/4.5.5/cdm-4.5.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/edu/ucar/udunits/4.5.5/udunits-4.5.5.jar" sourcepath="M2_REPO/edu/ucar/udunits/4.5.5/udunits-4.5.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/joda-time/joda-time/2.2/joda-time-2.2.jar" sourcepath="M2_REPO/joda-time/joda-time/2.2/joda-time-2.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/jcip/jcip-annotations/1.0/jcip-annotations-1.0.jar" sourcepath="M2_REPO/net/jcip/jcip-annotations/1.0/jcip-annotations-1.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/edu/ucar/httpservices/4.5.5/httpservices-4.5.5.jar" sourcepath="M2_REPO/edu/ucar/httpservices/4.5.5/httpservices-4.5.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/httpcomponents/httpclient/4.5.10/httpclient-4.5.10.jar" sourcepath="M2_REPO/org/apache/httpcomponents/httpclient/4.5.10/httpclient-4.5.10-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/httpcomponents/httpcore/4.4.12/httpcore-4.4.12.jar" sourcepath="M2_REPO/org/apache/httpcomponents/httpcore/4.4.12/httpcore-4.4.12-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/httpcomponents/httpmime/4.5.10/httpmime-4.5.10.jar" sourcepath="M2_REPO/org/apache/httpcomponents/httpmime/4.5.10/httpmime-4.5.10-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/quartz-scheduler/quartz/2.2.0/quartz-2.2.0.jar" sourcepath="M2_REPO/org/quartz-scheduler/quartz/2.2.0/quartz-2.2.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/google/protobuf/protobuf-java/3.9.0/protobuf-java-3.9.0.jar" sourcepath="M2_REPO/com/google/protobuf/protobuf-java/3.9.0/protobuf-java-3.9.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sf/ehcache/ehcache-core/2.6.2/ehcache-core-2.6.2.jar" sourcepath="M2_REPO/net/sf/ehcache/ehcache-core/2.6.2/ehcache-core-2.6.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/itadaki/bzip2/0.9.1/bzip2-0.9.1.jar" sourcepath="M2_REPO/org/itadaki/bzip2/0.9.1/bzip2-0.9.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/beust/jcommander/1.72/jcommander-1.72.jar" sourcepath="M2_REPO/com/beust/jcommander/1.72/jcommander-1.72-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/google/guava/guava/22.0/guava-22.0.jar" sourcepath="M2_REPO/com/google/guava/guava/22.0/guava-22.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/google/guava/failureaccess/1.0.1/failureaccess-1.0.1.jar" sourcepath="M2_REPO/com/google/guava/failureaccess/1.0.1/failureaccess-1.0.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/google/guava/listenablefuture/9999.0-empty-to-avoid-conflict-with-guava/listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/google/code/findbugs/jsr305/3.0.2/jsr305-3.0.2.jar" sourcepath="M2_REPO/com/google/code/findbugs/jsr305/3.0.2/jsr305-3.0.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/checkerframework/checker-qual/2.8.1/checker-qual-2.8.1.jar" sourcepath="M2_REPO/org/checkerframework/checker-qual/2.8.1/checker-qual-2.8.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/google/errorprone/error_prone_annotations/2.3.2/error_prone_annotations-2.3.2.jar" sourcepath="M2_REPO/com/google/errorprone/error_prone_annotations/2.3.2/error_prone_annotations-2.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/google/j2objc/j2objc-annotations/1.3/j2objc-annotations-1.3.jar" sourcepath="M2_REPO/com/google/j2objc/j2objc-annotations/1.3/j2objc-annotations-1.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/codehaus/mojo/animal-sniffer-annotations/1.17/animal-sniffer-annotations-1.17.jar" sourcepath="M2_REPO/org/codehaus/mojo/animal-sniffer-annotations/1.17/animal-sniffer-annotations-1.17-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/edu/ucar/grib/4.5.5/grib-4.5.5.jar" sourcepath="M2_REPO/edu/ucar/grib/4.5.5/grib-4.5.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/java/dev/jna/jna/5.3.1/jna-5.3.1.jar" sourcepath="M2_REPO/net/java/dev/jna/jna/5.3.1/jna-5.3.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/jsoup/jsoup/1.12.1/jsoup-1.12.1.jar" sourcepath="M2_REPO/org/jsoup/jsoup/1.12.1/jsoup-1.12.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/mchange/c3p0/0.9.5.4/c3p0-0.9.5.4.jar" sourcepath="M2_REPO/com/mchange/c3p0/0.9.5.4/c3p0-0.9.5.4-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/mchange/mchange-commons-java/0.2.15/mchange-commons-java-0.2.15.jar" sourcepath="M2_REPO/com/mchange/mchange-commons-java/0.2.15/mchange-commons-java-0.2.15-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/sis/core/sis-utility/0.8/sis-utility-0.8.jar" sourcepath="M2_REPO/org/apache/sis/core/sis-utility/0.8/sis-utility-0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/opengis/geoapi/3.0.1/geoapi-3.0.1.jar" sourcepath="M2_REPO/org/opengis/geoapi/3.0.1/geoapi-3.0.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/sis/storage/sis-netcdf/0.8/sis-netcdf-0.8.jar" sourcepath="M2_REPO/org/apache/sis/storage/sis-netcdf/0.8/sis-netcdf-0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/sis/storage/sis-storage/0.8/sis-storage-0.8.jar" sourcepath="M2_REPO/org/apache/sis/storage/sis-storage/0.8/sis-storage-0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/sis/core/sis-feature/0.8/sis-feature-0.8.jar" sourcepath="M2_REPO/org/apache/sis/core/sis-feature/0.8/sis-feature-0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/sis/core/sis-metadata/0.8/sis-metadata-0.8.jar" sourcepath="M2_REPO/org/apache/sis/core/sis-metadata/0.8/sis-metadata-0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/sis/core/sis-referencing/0.8/sis-referencing-0.8.jar" sourcepath="M2_REPO/org/apache/sis/core/sis-referencing/0.8/sis-referencing-0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/edu/usc/ir/sentiment-analysis-parser/0.1/sentiment-analysis-parser-0.1.jar" sourcepath="M2_REPO/edu/usc/ir/sentiment-analysis-parser/0.1/sentiment-analysis-parser-0.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/fasterxml/jackson/core/jackson-core/2.9.10/jackson-core-2.9.10.jar" sourcepath="M2_REPO/com/fasterxml/jackson/core/jackson-core/2.9.10/jackson-core-2.9.10-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/fasterxml/jackson/core/jackson-databind/2.9.10/jackson-databind-2.9.10.jar" sourcepath="M2_REPO/com/fasterxml/jackson/core/jackson-databind/2.9.10/jackson-databind-2.9.10-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/fasterxml/jackson/core/jackson-annotations/2.9.10/jackson-annotations-2.9.10.jar" sourcepath="M2_REPO/com/fasterxml/jackson/core/jackson-annotations/2.9.10/jackson-annotations-2.9.10-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/pdfbox/jbig2-imageio/3.0.2/jbig2-imageio-3.0.2.jar" sourcepath="M2_REPO/org/apache/pdfbox/jbig2-imageio/3.0.2/jbig2-imageio-3.0.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/github/jai-imageio/jai-imageio-core/1.4.0/jai-imageio-core-1.4.0.jar" sourcepath="M2_REPO/com/github/jai-imageio/jai-imageio-core/1.4.0/jai-imageio-core-1.4.0-sources.jar"/>
+  <classpathentry kind="src" path="/apache-any23-encoding"/>
+  <classpathentry kind="var" path="M2_REPO/org/rypt/f8/1.1/f8-1.1.jar" sourcepath="M2_REPO/org/rypt/f8/1.1/f8-1.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/httpcomponents/httpclient-cache/4.5.10/httpclient-cache-4.5.10.jar" sourcepath="M2_REPO/org/apache/httpcomponents/httpclient-cache/4.5.10/httpclient-cache-4.5.10-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sf/biweekly/biweekly/0.6.3/biweekly-0.6.3.jar" sourcepath="M2_REPO/net/sf/biweekly/biweekly/0.6.3/biweekly-0.6.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/github/mangstadt/vinnie/2.0.2/vinnie-2.0.2.jar" sourcepath="M2_REPO/com/github/mangstadt/vinnie/2.0.2/vinnie-2.0.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-binary/3.0.0/rdf4j-rio-binary-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-binary/3.0.0/rdf4j-rio-binary-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-rdfjson/3.0.0/rdf4j-rio-rdfjson-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-rdfjson/3.0.0/rdf4j-rio-rdfjson-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-rdfxml/3.0.0/rdf4j-rio-rdfxml-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-rdfxml/3.0.0/rdf4j-rio-rdfxml-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-trix/3.0.0/rdf4j-rio-trix-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-trix/3.0.0/rdf4j-rio-trix-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-trig/3.0.0/rdf4j-rio-trig-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-trig/3.0.0/rdf4j-rio-trig-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-jsonld/3.0.0/rdf4j-rio-jsonld-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-jsonld/3.0.0/rdf4j-rio-jsonld-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/github/jsonld-java/jsonld-java/0.12.5/jsonld-java-0.12.5.jar" sourcepath="M2_REPO/com/github/jsonld-java/jsonld-java/0.12.5/jsonld-java-0.12.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-repository-sail/3.0.0/rdf4j-repository-sail-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-repository-sail/3.0.0/rdf4j-repository-sail-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-repository-api/3.0.0/rdf4j-repository-api-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-repository-api/3.0.0/rdf4j-repository-api-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-query/3.0.0/rdf4j-query-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-query/3.0.0/rdf4j-query-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-sail-api/3.0.0/rdf4j-sail-api-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-sail-api/3.0.0/rdf4j-sail-api-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-queryalgebra-model/3.0.0/rdf4j-queryalgebra-model-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-queryalgebra-model/3.0.0/rdf4j-queryalgebra-model-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-http-client/3.0.0/rdf4j-http-client-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-http-client/3.0.0/rdf4j-http-client-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-http-protocol/3.0.0/rdf4j-http-protocol-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-http-protocol/3.0.0/rdf4j-http-protocol-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-queryresultio-api/3.0.0/rdf4j-queryresultio-api-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-queryresultio-api/3.0.0/rdf4j-queryresultio-api-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-queryresultio-binary/3.0.0/rdf4j-queryresultio-binary-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-queryresultio-binary/3.0.0/rdf4j-queryresultio-binary-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-queryparser-api/3.0.0/rdf4j-queryparser-api-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-queryparser-api/3.0.0/rdf4j-queryparser-api-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-queryalgebra-evaluation/3.0.0/rdf4j-queryalgebra-evaluation-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-queryalgebra-evaluation/3.0.0/rdf4j-queryalgebra-evaluation-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-repository-sparql/3.0.0/rdf4j-repository-sparql-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-repository-sparql/3.0.0/rdf4j-repository-sparql-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-queryparser-sparql/3.0.0/rdf4j-queryparser-sparql-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-queryparser-sparql/3.0.0/rdf4j-queryparser-sparql-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-queryresultio-sparqlxml/3.0.0/rdf4j-queryresultio-sparqlxml-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-queryresultio-sparqlxml/3.0.0/rdf4j-queryresultio-sparqlxml-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/mapdb/mapdb/1.0.8/mapdb-1.0.8.jar" sourcepath="M2_REPO/org/mapdb/mapdb/1.0.8/mapdb-1.0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-sail-memory/3.0.0/rdf4j-sail-memory-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-sail-memory/3.0.0/rdf4j-sail-memory-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-sail-base/3.0.0/rdf4j-sail-base-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-sail-base/3.0.0/rdf4j-sail-base-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/semarglproject/semargl-rdf4j/0.7/semargl-rdf4j-0.7.jar" sourcepath="M2_REPO/org/semarglproject/semargl-rdf4j/0.7/semargl-rdf4j-0.7-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/semarglproject/semargl-core/0.7/semargl-core-0.7.jar" sourcepath="M2_REPO/org/semarglproject/semargl-core/0.7/semargl-core-0.7-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/semarglproject/semargl-rdfa/0.7/semargl-rdfa-0.7.jar" sourcepath="M2_REPO/org/semarglproject/semargl-rdfa/0.7/semargl-rdfa-0.7-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/semarglproject/semargl-rdf/0.7/semargl-rdf-0.7.jar" sourcepath="M2_REPO/org/semarglproject/semargl-rdf/0.7/semargl-rdf-0.7-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sourceforge/owlapi/owlapi-rio/5.1.11/owlapi-rio-5.1.11.jar" sourcepath="M2_REPO/net/sourceforge/owlapi/owlapi-rio/5.1.11/owlapi-rio-5.1.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sourceforge/owlapi/owlapi-parsers/5.1.11/owlapi-parsers-5.1.11.jar" sourcepath="M2_REPO/net/sourceforge/owlapi/owlapi-parsers/5.1.11/owlapi-parsers-5.1.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sourceforge/owlapi/owlapi-api/5.1.11/owlapi-api-5.1.11.jar" sourcepath="M2_REPO/net/sourceforge/owlapi/owlapi-api/5.1.11/owlapi-api-5.1.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/commons/commons-rdf-api/0.5.0/commons-rdf-api-0.5.0.jar" sourcepath="M2_REPO/org/apache/commons/commons-rdf-api/0.5.0/commons-rdf-api-0.5.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/github/vsonnier/hppcrt/0.7.5/hppcrt-0.7.5.jar" sourcepath="M2_REPO/com/github/vsonnier/hppcrt/0.7.5/hppcrt-0.7.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/github/ben-manes/caffeine/caffeine/2.6.1/caffeine-2.6.1.jar" sourcepath="M2_REPO/com/github/ben-manes/caffeine/caffeine/2.6.1/caffeine-2.6.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sourceforge/owlapi/owlapi-apibinding/5.1.11/owlapi-apibinding-5.1.11.jar" sourcepath="M2_REPO/net/sourceforge/owlapi/owlapi-apibinding/5.1.11/owlapi-apibinding-5.1.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sourceforge/owlapi/owlapi-impl/5.1.11/owlapi-impl-5.1.11.jar" sourcepath="M2_REPO/net/sourceforge/owlapi/owlapi-impl/5.1.11/owlapi-impl-5.1.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sourceforge/owlapi/owlapi-oboformat/5.1.11/owlapi-oboformat-5.1.11.jar" sourcepath="M2_REPO/net/sourceforge/owlapi/owlapi-oboformat/5.1.11/owlapi-oboformat-5.1.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sourceforge/owlapi/owlapi-tools/5.1.11/owlapi-tools-5.1.11.jar" sourcepath="M2_REPO/net/sourceforge/owlapi/owlapi-tools/5.1.11/owlapi-tools-5.1.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/yaml/snakeyaml/1.23/snakeyaml-1.23.jar" sourcepath="M2_REPO/org/yaml/snakeyaml/1.23/snakeyaml-1.23-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sourceforge/nekohtml/nekohtml/1.9.22/nekohtml-1.9.22.jar" sourcepath="M2_REPO/net/sourceforge/nekohtml/nekohtml/1.9.22/nekohtml-1.9.22-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/slf4j/slf4j-log4j12/1.7.28/slf4j-log4j12-1.7.28.jar" sourcepath="M2_REPO/org/slf4j/slf4j-log4j12/1.7.28/slf4j-log4j12-1.7.28-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/log4j/log4j/1.2.17/log4j-1.2.17.jar" sourcepath="M2_REPO/log4j/log4j/1.2.17/log4j-1.2.17-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/junit/junit/4.12/junit-4.12.jar" sourcepath="M2_REPO/junit/junit/4.12/junit-4.12-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/hamcrest/hamcrest-core/1.3/hamcrest-core-1.3.jar" sourcepath="M2_REPO/org/hamcrest/hamcrest-core/1.3/hamcrest-core-1.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/mockito/mockito-core/3.0.0/mockito-core-3.0.0.jar" sourcepath="M2_REPO/org/mockito/mockito-core/3.0.0/mockito-core-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/bytebuddy/byte-buddy/1.9.10/byte-buddy-1.9.10.jar" sourcepath="M2_REPO/net/bytebuddy/byte-buddy/1.9.10/byte-buddy-1.9.10-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/bytebuddy/byte-buddy-agent/1.9.10/byte-buddy-agent-1.9.10.jar" sourcepath="M2_REPO/net/bytebuddy/byte-buddy-agent/1.9.10/byte-buddy-agent-1.9.10-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/objenesis/objenesis/2.6/objenesis-2.6.jar" sourcepath="M2_REPO/org/objenesis/objenesis/2.6/objenesis-2.6-sources.jar"/>
+</classpath>
\ No newline at end of file
diff --git a/html-scraper/.project b/html-scraper/.project
new file mode 100755
index 0000000..75b5486
--- /dev/null
+++ b/html-scraper/.project
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+  <name>apache-any23-html-scraper</name>
+  <comment>Any23 plugin for scraping HTML code. NO_M2ECLIPSE_SUPPORT: Project files created with the maven-eclipse-plugin are not supported in M2Eclipse.</comment>
+  <projects>
+    <project>apache-any23-core</project>
+    <project>apache-any23-api</project>
+    <project>apache-any23-csvutils</project>
+    <project>apache-any23-mime</project>
+    <project>apache-any23-encoding</project>
+  </projects>
+  <buildSpec>
+    <buildCommand>
+      <name>org.eclipse.jdt.core.javabuilder</name>
+    </buildCommand>
+    <buildCommand>
+      <name>org.eclipse.m2e.core.maven2Builder</name>
+    </buildCommand>
+  </buildSpec>
+  <natures>
+    <nature>org.eclipse.jdt.core.javanature</nature>
+    <nature>org.eclipse.m2e.core.maven2Nature</nature>
+  </natures>
+</projectDescription>
\ No newline at end of file
diff --git a/html-scraper/.settings/org.eclipse.core.resources.prefs b/html-scraper/.settings/org.eclipse.core.resources.prefs
new file mode 100755
index 0000000..29abf99
--- /dev/null
+++ b/html-scraper/.settings/org.eclipse.core.resources.prefs
@@ -0,0 +1,6 @@
+eclipse.preferences.version=1
+encoding//src/main/java=UTF-8
+encoding//src/main/resources=UTF-8
+encoding//src/test/java=UTF-8
+encoding//src/test/resources=UTF-8
+encoding/<project>=UTF-8
diff --git a/html-scraper/.settings/org.eclipse.jdt.core.prefs b/html-scraper/.settings/org.eclipse.jdt.core.prefs
new file mode 100755
index 0000000..b8947ec
--- /dev/null
+++ b/html-scraper/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,6 @@
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
+org.eclipse.jdt.core.compiler.compliance=1.8
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
+org.eclipse.jdt.core.compiler.release=disabled
+org.eclipse.jdt.core.compiler.source=1.8
diff --git a/html-scraper/.settings/org.eclipse.m2e.core.prefs b/html-scraper/.settings/org.eclipse.m2e.core.prefs
new file mode 100755
index 0000000..f897a7f
--- /dev/null
+++ b/html-scraper/.settings/org.eclipse.m2e.core.prefs
@@ -0,0 +1,4 @@
+activeProfiles=
+eclipse.preferences.version=1
+resolveWorkspaceProjects=true
+version=1
diff --git a/html-scraper/pom.xml b/html-scraper/pom.xml
new file mode 100644
index 0000000..4ed4594
--- /dev/null
+++ b/html-scraper/pom.xml
@@ -0,0 +1,124 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.any23</groupId>
+    <artifactId>apache-any23</artifactId>
+    <version>2.4-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.any23.plugins</groupId>
+  <artifactId>apache-any23-html-scraper</artifactId>
+
+  <name>Apache Any23 :: Plugins :: HTML Scraper</name>
+  <description>Any23 plugin for scraping HTML code.</description>
+
+  <dependencies>
+    <!-- RDF4J -->
+    <dependency>
+      <groupId>org.eclipse.rdf4j</groupId>
+      <artifactId>rdf4j-model</artifactId>
+      <scope>provided</scope>
+    </dependency>
+
+    <!-- Any23 Core. -->
+    <dependency>
+      <groupId>org.apache.any23</groupId>
+      <artifactId>apache-any23-core</artifactId>
+      <version>2.4-SNAPSHOT</version>
+      <scope>provided</scope>
+    </dependency>
+
+    <!-- BEGIN: BoilerPipe -->
+    <dependency>
+      <groupId>net.sourceforge.nekohtml</groupId>
+      <artifactId>nekohtml</artifactId>
+      <version>1.9.22</version>
+    </dependency>
+    <dependency>
+      <groupId>xerces</groupId>
+      <artifactId>xercesImpl</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>de.l3s.boilerpipe</groupId>
+      <artifactId>boilerpipe</artifactId>
+    </dependency>
+    <!-- END: BoilerPipe -->
+
+    <!-- Logging -->
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
+      <version>${slf4j.logger.version}</version>
+      <scope>test</scope>
+    </dependency>
+
+    <!-- BEGIN: Test Dependencies -->
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-core</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <!-- END: Test Dependencies -->
+
+  </dependencies>
+
+  <build>
+
+    <plugins>
+      <!-- Generates the distribution package -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-assembly-plugin</artifactId>
+        <configuration>
+          <appendAssemblyId>false</appendAssemblyId>
+          <descriptors>
+            <descriptor>${basedir}/src/main/assembly/bin.xml</descriptor>
+          </descriptors>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
+
+  <profiles>
+    <profile>
+      <id>release</id>
+      <build>
+        <resources>
+          <resource>
+            <directory>${basedir}/../../</directory>
+            <targetPath>${project.build.directory}/apidocs/META-INF</targetPath>
+            <includes>
+              <include>LICENSE.txt</include>
+              <include>NOTICE.txt</include>
+            </includes>
+          </resource>
+        </resources>
+      </build>
+    </profile>
+  </profiles>
+
+</project>
diff --git a/html-scraper/src/main/assembly/LICENSE-with-deps.txt b/html-scraper/src/main/assembly/LICENSE-with-deps.txt
new file mode 100644
index 0000000..5010c05
--- /dev/null
+++ b/html-scraper/src/main/assembly/LICENSE-with-deps.txt
@@ -0,0 +1,212 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+APACHE ANY23 DEPENDENCIES:
+
+The Apache Any23 distribution packages include a number of dependencies with
+separate copyright notices and license terms. Your use of the source
+code for the these dependencies is subject to the terms and
+conditions of the following licenses.
+
+For the boilerpipe component (http://code.google.com/p/boilerpipe/)
+This is licensed under the The Apache Software License, Version 2.0, see above
diff --git a/html-scraper/src/main/assembly/NOTICE-with-deps.txt b/html-scraper/src/main/assembly/NOTICE-with-deps.txt
new file mode 100644
index 0000000..104dba8
--- /dev/null
+++ b/html-scraper/src/main/assembly/NOTICE-with-deps.txt
@@ -0,0 +1,9 @@
+Apache Any23
+Copyright 2011-2017 The Apache Software Foundation
+Copyright 2008-2011 Digital Enterprise Research Institute (DERI)
+
+This product includes software developed by
+The Apache Software Foundation (http://www.apache.org/).
+
+This product includes software developed by
+Christian Kohlschuetter (http://code.google.com/p/boilerpipe/)
diff --git a/html-scraper/src/main/assembly/README.txt b/html-scraper/src/main/assembly/README.txt
new file mode 100644
index 0000000..0b3bf28
--- /dev/null
+++ b/html-scraper/src/main/assembly/README.txt
@@ -0,0 +1,77 @@
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+
+${project.name} (${implementation.build}; ${maven.build.timestamp})
+
+  What is it?
+  -----------
+
+  ${project.description}
+
+  Documentation
+  -------------
+
+  The most up-to-date documentation can be found at ${project.parent.url}.
+
+  Release Notes
+  -------------
+
+  The full list of changes can be found at ${project.parent.url}/changes-report.html.
+
+  System Requirements
+  -------------------
+
+  JDK:
+    ${javac.target.version} or above. (see http://www.oracle.com/technetwork/java/)
+  Memory:
+    No minimum requirement.
+  Disk:
+    No minimum requirement.
+  Operating System:
+    No minimum requirement. On Windows, Windows NT and above or Cygwin is required for
+    the startup scripts. Tested on Windows XP, Fedora Core and Mac OS X.
+
+  Installing Apache Any23
+  ----------------
+
+** Windows 2000/XP
+
+  1) Unzip the distribution archive, i.e. apache-${project.build.finalName}-bin.zip
+        The subdirectory apache-${project.build.finalName} will be created from the archive.
+
+  2) Copy the jar files under C:\Documents and Settings\<username>\.any23\plugins
+
+** Unix-based Operating Systems (Linux, Solaris and Mac OS X)
+
+  1) Extract the distribution archive, i.e. apache-${project.build.finalName}-bin.tar.gz.
+        The subdirectory apache-${project.build.finalName} will be created from the archive.
+
+  2) Copy the jar files under ~/.any23/plugins
+
+  Licensing
+  ---------
+
+  Please see the file called LICENSE.TXT
+
+  Apache Any23 URLS
+  ----------
+
+  Home Page:          ${project.parent.url}/
+  Downloads:          ${project.parent.url}/download.html
+  Release Notes:      ${project.parent.url}/changes-report.html
+  Mailing Lists:      ${project.parent.url}/mail-lists.html
+  Source Code:        ${project.parent.scm.url}
+  Issue Tracking:     ${project.issueManagement.url}
+  Available Plugins:  ${project.parent.url}/plugins.html
diff --git a/html-scraper/src/main/assembly/bin.xml b/html-scraper/src/main/assembly/bin.xml
new file mode 100644
index 0000000..b94d7ba
--- /dev/null
+++ b/html-scraper/src/main/assembly/bin.xml
@@ -0,0 +1,67 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.1 http://maven.apache.org/xsd/assembly-1.1.1.xsd">
+
+  <id>bin</id>
+  <formats>
+    <format>tar.gz</format>
+    <format>zip</format>
+  </formats>
+  <includeBaseDirectory>true</includeBaseDirectory>
+  <baseDirectory>${project.build.finalName}</baseDirectory>
+
+  <!-- Use the N&L files which apply to the included dependencies -->
+  <files>
+    <file>
+      <source>${basedir}/src/main/assembly/LICENSE-with-deps.txt</source>
+      <destName>LICENSE.txt</destName>
+      <outputDirectory/>
+      <fileMode>666</fileMode>
+    </file>
+    <file>
+      <source>${basedir}/src/main/assembly/NOTICE-with-deps.txt</source>
+      <destName>NOTICE.txt</destName>
+      <outputDirectory/>
+      <fileMode>666</fileMode>
+    </file>
+    <file>
+      <source>${basedir}/src/main/assembly/README.txt</source>
+      <filtered>true</filtered>
+      <outputDirectory/>
+      <fileMode>666</fileMode>
+    </file>
+    <file>
+      <source>${basedir}/../RELEASE-NOTES.txt</source>
+      <outputDirectory/>
+      <fileMode>666</fileMode>
+    </file>
+  </files>
+
+  <dependencySets>
+    <dependencySet>
+      <useProjectArtifact>true</useProjectArtifact>
+      <outputDirectory>/lib</outputDirectory>
+      <includes>
+        <include>${project.groupId}:${project.artifactId}</include>
+        <include>de.l3s.boilerpipe:boilerpipe</include>
+      </includes>
+    </dependencySet>
+  </dependencySets>
+
+</assembly>
diff --git a/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractor.java b/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractor.java
new file mode 100644
index 0000000..94a3210
--- /dev/null
+++ b/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractor.java
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.plugin.htmlscraper;
+
+import de.l3s.boilerpipe.BoilerpipeExtractor;
+import de.l3s.boilerpipe.BoilerpipeProcessingException;
+import de.l3s.boilerpipe.extractors.ArticleExtractor;
+import de.l3s.boilerpipe.extractors.CanolaExtractor;
+import de.l3s.boilerpipe.extractors.DefaultExtractor;
+import de.l3s.boilerpipe.extractors.LargestContentExtractor;
+import org.apache.any23.extractor.ExtractionContext;
+import org.apache.any23.extractor.ExtractionException;
+import org.apache.any23.extractor.ExtractionParameters;
+import org.apache.any23.extractor.ExtractionResult;
+import org.apache.any23.extractor.Extractor;
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.vocab.SINDICE;
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Implementation of content extractor for performing <i>HTML</i> scraping.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public class HTMLScraperExtractor implements Extractor.ContentExtractor {
+
+    public static final IRI PAGE_CONTENT_DE_PROPERTY  =
+            SimpleValueFactory.getInstance().createIRI(SINDICE.NS + "pagecontent/de");
+    public static final IRI PAGE_CONTENT_AE_PROPERTY  =
+            SimpleValueFactory.getInstance().createIRI(SINDICE.NS + "pagecontent/ae");
+    public static final IRI PAGE_CONTENT_LCE_PROPERTY =
+            SimpleValueFactory.getInstance().createIRI(SINDICE.NS + "pagecontent/lce");
+    public static final IRI PAGE_CONTENT_CE_PROPERTY  =
+            SimpleValueFactory.getInstance().createIRI(SINDICE.NS + "pagecontent/ce");
+
+    private final List<ExtractionRule> extractionRules = new ArrayList<>();
+
+    public HTMLScraperExtractor() {
+        loadDefaultRules();
+    }
+
+    public void addTextExtractor(String name, IRI property, BoilerpipeExtractor extractor) {
+        extractionRules.add( new ExtractionRule(name, property, extractor) );
+    }
+
+    public String[] getTextExtractors() {
+        final List<String> extractors = new ArrayList<>();
+        for(ExtractionRule er : extractionRules) {
+            extractors.add(er.name);
+        }
+        return extractors.toArray( new String[extractors.size()] );
+    }
+
+    @Override
+    public void run(
+            ExtractionParameters extractionParameters,
+            ExtractionContext extractionContext,
+            InputStream inputStream,
+            ExtractionResult extractionResult
+    ) throws IOException, ExtractionException {
+        try {
+            final IRI documentIRI = extractionContext.getDocumentIRI();
+            for (ExtractionRule extractionRule : extractionRules) {
+                final String content = extractionRule.boilerpipeExtractor.getText(new InputStreamReader(inputStream));
+                extractionResult.writeTriple(
+                        documentIRI,
+                        extractionRule.property,
+                        SimpleValueFactory.getInstance().createLiteral(content)
+                );
+            }
+        } catch (BoilerpipeProcessingException bpe) {
+            throw new ExtractionException("Error while applying text processor " + ArticleExtractor.class, bpe);
+        }
+    }
+
+    @Override
+    public ExtractorDescription getDescription() {
+        return HTMLScraperExtractorFactory.getDescriptionInstance();
+    }
+
+    @Override
+    public void setStopAtFirstError(boolean b) {
+        // Ignored.
+    }
+
+    private void loadDefaultRules() {
+        addTextExtractor("default-extractor"      , PAGE_CONTENT_DE_PROPERTY , DefaultExtractor.getInstance());
+        addTextExtractor("article-extractor"      , PAGE_CONTENT_AE_PROPERTY , ArticleExtractor.getInstance());
+        addTextExtractor("large-content-extractor", PAGE_CONTENT_LCE_PROPERTY, LargestContentExtractor.getInstance());
+        addTextExtractor("canola-extractor"       , PAGE_CONTENT_CE_PROPERTY , CanolaExtractor.getInstance());
+    }
+
+    /**
+     * This class associates a <i>BoilerPipe</i> extractor with the property going to host the extracted content.
+     */
+    class ExtractionRule {
+
+        public final String name;
+        public final IRI property;
+        public final BoilerpipeExtractor boilerpipeExtractor;
+
+        ExtractionRule(String name, IRI property, BoilerpipeExtractor boilerpipeExtractor) {
+            if(name == null) {
+                throw new NullPointerException("name cannot be null.");
+            }
+            if(property == null) {
+                throw new NullPointerException("property cannot be null.");
+            }
+            if(boilerpipeExtractor == null) {
+                throw new NullPointerException("extractor cannot be null.");
+            }
+            this.name = name;
+            this.property = property;
+            this.boilerpipeExtractor = boilerpipeExtractor;
+        }
+
+    }
+}
diff --git a/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorFactory.java b/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorFactory.java
new file mode 100644
index 0000000..25a9992
--- /dev/null
+++ b/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorFactory.java
@@ -0,0 +1,42 @@
+/**
+ * 
+ */
+package org.apache.any23.plugin.htmlscraper;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.Prefixes;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+public class HTMLScraperExtractorFactory extends SimpleExtractorFactory<HTMLScraperExtractor> implements
+        ExtractorFactory<HTMLScraperExtractor> {
+
+    public static final String NAME = "html-scraper";
+    
+    public static final Prefixes PREFIXES = null;
+
+    private static final ExtractorDescription descriptionInstance = new HTMLScraperExtractorFactory();
+    
+    public HTMLScraperExtractorFactory() {
+        super(
+                HTMLScraperExtractorFactory.NAME, 
+                HTMLScraperExtractorFactory.PREFIXES,
+                Arrays.asList("text/html;q=0.02", "application/xhtml+xml;q=0.02"),
+                null);
+    }
+    
+    @Override
+    public HTMLScraperExtractor createExtractor() {
+        return new HTMLScraperExtractor();
+    }
+
+    public static ExtractorDescription getDescriptionInstance() {
+        return descriptionInstance;
+    }
+}
diff --git a/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/package-info.java b/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/package-info.java
new file mode 100644
index 0000000..441a4de
--- /dev/null
+++ b/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * The {@link org.apache.any23.plugin.htmlscraper.HTMLScraperExtractor} is a special extractor
+ * to scrape textual content from a generic <i>HTML</i> pages.
+ */
+package org.apache.any23.plugin.htmlscraper;
diff --git a/html-scraper/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory b/html-scraper/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory
new file mode 100644
index 0000000..30e75c2
--- /dev/null
+++ b/html-scraper/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory
@@ -0,0 +1 @@
+org.apache.any23.plugin.htmlscraper.HTMLScraperExtractorFactory
\ No newline at end of file
diff --git a/html-scraper/src/test/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorTest.java b/html-scraper/src/test/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorTest.java
new file mode 100644
index 0000000..4d7b0f3
--- /dev/null
+++ b/html-scraper/src/test/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorTest.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.plugin.htmlscraper;
+
+import org.apache.any23.extractor.ExtractionContext;
+import org.apache.any23.extractor.ExtractionException;
+import org.apache.any23.extractor.ExtractionParameters;
+import org.apache.any23.extractor.ExtractionResult;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.HashSet;
+
+import static org.mockito.Mockito.any;
+import static org.mockito.Mockito.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+
+/**
+ * Test case for {@link HTMLScraperExtractor}.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public class HTMLScraperExtractorTest {
+
+    private HTMLScraperExtractor extractor;
+
+    @Before
+    public void setUp() {
+        extractor = new HTMLScraperExtractorFactory().createExtractor();
+    }
+
+    @After
+    public void tearDown() {
+        extractor = null;
+    }
+
+    @Test
+    public void testGetExtractors() {
+        final String[] extractors = extractor.getTextExtractors();
+        Assert.assertEquals( new HashSet<>(Arrays.asList(extractors)).size(), 4 );
+    }
+
+    @Test
+    public void testRun() throws IOException, ExtractionException {
+        final InputStream is = this.getClass().getResourceAsStream("html-scraper-extractor-test.html");
+        final ExtractionResult extractionResult = mock(ExtractionResult.class);
+        final IRI pageIRI = SimpleValueFactory.getInstance().createIRI("http://fake/test/page/testrun");
+        final ExtractionContext extractionContext = new ExtractionContext(
+                extractor.getDescription().getExtractorName(),
+                pageIRI
+        );
+        extractor.run(ExtractionParameters.newDefault(), extractionContext, is, extractionResult);
+
+        verify(extractionResult).writeTriple(
+                eq(pageIRI), eq(HTMLScraperExtractor.PAGE_CONTENT_DE_PROPERTY), any());
+        verify(extractionResult).writeTriple(
+                eq(pageIRI), eq(HTMLScraperExtractor.PAGE_CONTENT_AE_PROPERTY), any());
+        verify(extractionResult).writeTriple(
+                eq(pageIRI), eq(HTMLScraperExtractor.PAGE_CONTENT_LCE_PROPERTY), any());
+        verify(extractionResult).writeTriple(
+                eq(pageIRI), eq(HTMLScraperExtractor.PAGE_CONTENT_CE_PROPERTY), any());
+    }
+
+}
diff --git a/html-scraper/src/test/resources/org/apache/any23/plugin/htmlscraper/html-scraper-extractor-test.html b/html-scraper/src/test/resources/org/apache/any23/plugin/htmlscraper/html-scraper-extractor-test.html
new file mode 100644
index 0000000..6ec92fb
--- /dev/null
+++ b/html-scraper/src/test/resources/org/apache/any23/plugin/htmlscraper/html-scraper-extractor-test.html
@@ -0,0 +1,493 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+        "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml"
+      xmlns:svg="http://www.w3.org/2000/svg">
+<head>
+    <title>Sindice - The semantic web index</title>
+    <meta http-equiv="Content-Type" content="xhtml/xml; charset=UTF-8"/>
+    <link type="text/css" rel="stylesheet" href="/stylesheets/site.css"/>
+
+    <link rel="search"
+          type="application/opensearchdescription+xml"
+          href="http://sindice.com/opensearch-term.xml"
+          title="Sindice Term Search"/>
+    <link rel="search"
+          type="application/opensearchdescription+xml"
+          href="http://sindice.com/opensearch-advanced.xml"
+          title="Sindice Triple Search"/>
+    <link type="image/gif" rel="icon" href="/favicon.gif"/>
+    <link type="image/vnd.microsoft.icon" rel="shortcut icon" href="/favicon.ico"/>
+
+    <link href="/javascripts/jquery/css/start/jquery-ui-1.8.1.custom.css?1291645617" media="screen" rel="stylesheet"
+          type="text/css"/>
+    <link href="/javascripts/jquery/css/blue/style.css?1291645617" media="screen" rel="stylesheet" type="text/css"/>
+    <link href="/javascripts/jsTree/source/tree_component.css?1291645617" media="screen" rel="stylesheet"
+          type="text/css"/>
+    <link href="/javascripts/jquery/js/jquery.jcarousel.css?1291645617" media="screen" rel="stylesheet"
+          type="text/css"/>
+    <link href="/javascripts/jquery/css/jcarousel/tango/skin.css?1291645617" media="screen" rel="stylesheet"
+          type="text/css"/>
+
+
+    <script type="text/javascript" src="/javascripts/jquery/js/jquery-1.4.2.min.js"></script>
+    <script type="text/javascript" src="/javascripts/jquery/js/jquery-ui-1.8.1.custom.min.js"></script>
+    <script src="/javascripts/jquery/js/jquery.jcarousel.min.js?1291645617" type="text/javascript"></script>
+    <script src="/javascripts/replacePrefix.js?1291645618" type="text/javascript"></script>
+    <script src="/javascripts/jquery/js/jquery.tablesorter.min.js?1291645617" type="text/javascript"></script>
+    <script src="/javascripts/jsTree/_lib/css.js?1291645617" type="text/javascript"></script>
+    <script src="/javascripts/jsTree/source/tree_component.js?1291645617" type="text/javascript"></script>
+    <script src="/javascripts/idleDetector.js?1291645618" type="text/javascript"></script>
+    <script src="/javascripts/mainPage.js?1291645618" type="text/javascript"></script>
+</head>
+<body>
+<div class="hidden"><a href="#main">Skip to contents</a></div>
+<div id="header" style="position:relative;" class="container fixedWidth">
+    <div class="columnL span-3">
+        <div id="logo"><a href="/"><img src="/images/logo.png" height="80" width="240" alt="Sindice" title=""/></a>
+        </div>
+        <!-- /column -->
+    </div>
+    <div class="columnR span-8a last">
+        <ul id="nav" class="_">
+            <li class="_"><a href="/">Home</a></li>
+            <li class="_"><a href="/main/about">About</a></li>
+            <li class="_"><a href="/search">Search</a></li>
+            <li class="_"><a href="/main/submit">Submit</a></li>
+            <li class="_"><a href="/main/forum">Forum</a></li>
+            <li class="_"><a href="/developers/welcome">Dev</a></li>
+        </ul>
+        <!-- /column -->
+    </div>
+    <div style="float: right; font-size: 11px; color: rgb(166, 201, 226); position:absolute; bottom:0; right:4px;"
+         id="frontendVersion">
+        web01
+        <a href="/main/changes#version-2.0.1">Version: 2.0.1</a>
+    </div>
+    <!-- /header -->
+</div>
+
+<div id="main" class="container fixedWidth">
+<div id="intro" class="clear">
+
+
+    <div class="column span-6">
+        <h1>Sindice - Data Web Services</h1>
+
+        <div id="about">
+
+            <p>
+                Billion pieces of reusable information can already be found
+                across hundreds of millions web pages which embed RDF and Microformats. Start
+                consuming this data today with Sindice Data Web services.
+            </p>
+
+            <a href="/main/about"><img src="/images/learn-more-white.png" alt="Learn more"/></a>
+
+            <!-- /padding -->
+        </div>
+
+        <!-- /column -->
+    </div>
+
+
+    <div class="column span-6 last">
+        <div id="search-w">
+            <ul class="clear">
+                <li title="search" class="current frontendTab">
+                    Search
+                </li>
+                <li title="submit" class="frontendTab">
+                    Submit
+                </li>
+                <li title="search-inspect" class="frontendTab">
+                    Inspector Tool
+                </li>
+            </ul>
+
+
+            <div id="search" class="frontendTabContent">
+                <h2>Search: </h2>
+
+                <div style="float:left;border:none;">
+                    <input class="search-types-radio" type="radio" name="search-types" id="term-type" value="term"
+                           checked="checked"/><label class="search-type-label" for="term-type">term</label>
+                    <input class="search-types-radio" type="radio" name="search-types" id="property-type"
+                           value="property"/><label class="search-type-label" for="property-type">property</label>
+                    <input class="search-types-radio" type="radio" name="search-types" id="advanced-type"
+                           value="advanced"/><label class="search-type-label" for="advanced-type">advanced</label>
+                    <input class="search-types-radio" type="radio" name="search-types" id="sigma-type"
+                           value="sigma"/><label class="search-type-label" for="sigma-type"><img
+                        style="vertical-align:sub; width:46px;margin-top:6px;" src="/images/sig.ma-about.png"/></label>
+                </div>
+                <div class="search-types" id="search-type-term">
+                    <form action="/search" method="get">
+                        <input onfocus="if (this.value=='Type one or more keywords or IRI') {this.value=''} else {this.select()}; return true;"
+                               type="text" name="q" size="45" value="Type one or more keywords or IRI"/>
+                        <button type="submit" class="inspectButton">
+                            SEARCH
+                        </button>
+                    </form>
+		        <span class="tip">Examples:
+		          <a href="/search?q=tim%20berners%20lee">tim berners lee</a>
+		          (by <a href="/search?q=http%3A%2F%2Fwww.w3.org%2FPeople%2FBerners-Lee%2Fcard">IRI</a>),
+		          <a href="/search?q=michele">michele</a>, <a href="/search?q=deri">deri</a></span>
+
+                    <div style="height:1em;">
+                        <div class="search-numbers-small documentNumberTotal" style="display:none;">
+                            <span>Searching on about </span><span class="totalNo"></span><span> documents.</span>
+                        </div>
+                    </div>
+                </div>
+
+                <div class="search-types" style="display:none;" id="search-type-property">
+                    <form method="get" action="/search">
+                        <input type="text" name="predicate" id="predicate" value="foaf:mbox" size="20"/>
+                        <input type="text" name="value" id="value" value="mailto:timbl@w3.org" size="20"
+                               style="margin-right: 7px;"/>
+                        <button id="propertySearch" class="inspectButton">
+                            SEARCH
+                        </button>
+                    </form>
+                    <script type="text/javascript">
+                        jQuery(document).ready(function() {
+                            jQuery("#propertySearch").click(function() {
+                                var predicate = "<" + jQuery("#predicate").val() + ">";
+
+                                var value = jQuery("#value").val();
+                                if (value.indexOf("http") === 0) {
+                                    value = "<" + value + ">";
+                                } else {
+                                    value = '"' + value + '"';
+                                }
+
+                                var nq = "* " + predicate + " " + value;
+                                document.location.href = "/search?nq=" + encodeURIComponent(nq);
+                                return false;
+                            });
+                        });
+                    </script>
+			        <span class="tip">Examples:
+			          <a href="/search?nq=*+<foaf%3Aknows>+<http%3A%2F%2Frichard.cyganiak.de%2Ffoaf.rdf>">Richard's
+                          contacts</a>,
+			          <a href="/search?nq=*+<foaf%3AworkplaceHomepage>+<http%3A%2F%2Fwww.deri.ie>">deri people</a>,
+			        </span>
+
+                    <div style="height:1em;">
+                        <div class="search-numbers-small documentNumberTotal" style="display:none;">
+                            <span>Searching on about </span><span class="totalNo"></span><span> documents.</span>
+                        </div>
+                    </div>
+                </div>
+
+
+                <div class="search-types" style="display:none;" id="search-type-advanced">
+                    <form method="get" action="/search">
+                        <input type="text" value="Type triple query" size="45" name="nq"
+                               onfocus="if (this.value=='Type triple query') {this.value=''} else {this.select()}; return true;"/>
+
+                        <button type="submit" class="inspectButton">
+                            SEARCH
+                        </button>
+                    </form>
+				        <span class="tip">
+				        <a href="developers/api#Querylanguages-AdvancedSearch">Query Language Documentation</a>,
+				        Example:
+				          <a href="/search?nq=*+%3Cvcard%3Atitle%3E+%27research%27">self confessed researchers</a>
+				        </span>
+
+                    <div style="height:1em;">
+                        <div class="search-numbers-small documentNumberTotal" style="display:none;">
+                            <span>Searching on about </span><span class="totalNo"></span><span> documents.</span>
+                        </div>
+                    </div>
+                </div>
+                <div class="search-types" style="display:none;" id="search-type-sigma">
+
+                    <input id="sigma-search-input" type="text" value="Type keyword query" size="45" name="q"
+                           onfocus="if (this.value=='Type keyword query') {this.value=''} else {this.select()}; return true;"/>
+
+                    <button type="submit" class="inspectButton" id="sigma-search-button">
+                        SEARCH
+                    </button>
+                    <br/>
+					        <span class="tip">Example:
+					          <a href="http://sig.ma/search?q=Michael%20Jackson">Michael Jackson</a>,
+					          <a href="http://sig.ma/search?q=Tim%20Berners%20Lee">Tim Berners Lee</a>,
+					          <a href="http://sig.ma/search?q=Barack%20Obama">Barack Obama</a>
+					        </span>
+
+                    <div style="height:1em;">
+                        <div class="search-numbers-small">
+                            Sig.ma - Live views on the Web of Data.
+                        </div>
+                    </div>
+
+                </div>
+            </div>
+
+
+            <div id="search-inspect" style="display:none;" class="frontendTabContent">
+                <h2 style="margin-bottom:2px;">Inspect a web page for structured data content</h2>
+                <input id="inspect-search-input" type="text" value="http://www.deri.ie/about/team/" size="45" name="q"
+                       onfocus="if (this.value=='http://www.deri.ie/about/team/') {this.value=''} else {this.select()}; return true;"/>
+                <button class="inspectButton" type="submit" id="inspect-search-button">
+                    INSPECT
+                </button>
+                <br/>
+        <span class="tip">
+        <a href="http://inspector.sindice.com">The Web Data Inspector</a>
+        </span>
+
+                <div style="height:1em;">
+                    <div class="search-numbers-small">
+                        shows how Sindice sees your structured data markup.
+                    </div>
+                </div>
+            </div>
+
+            <div id="submit" style="display:none;" class="frontendTabContent">
+                <h2 style="margin-bottom:2px;">Submit a URL of a page with microformats, RDFa markup or RDF files</h2>
+
+                <form method="post" action="/api/v2/ping">
+                    <input size="45" type="text" name="url" id="url" value="http://www.deri.ie/about/team/"
+                           onfocus="if (this.value=='http://www.deri.ie/about/team/') {this.value=''} else {this.select()}; return true;"/>
+                    <button type="submit" class="inspectButton">
+                        SUBMIT
+                    </button>
+                </form>
+        <span class="tip">
+        Use the
+        <a href="/main/submit">full submit form</a>
+        </span>
+
+                <div style="height:1em;">
+                    <div class="search-numbers-small">
+                        to submit multiple pages or semantic sitemaps.
+                    </div>
+                </div>
+            </div>
+
+        </div>
+
+        <!-- /column -->
+    </div>
+
+    <!-- /intro -->
+</div>
+
+<div id="content" class="clear">
+
+
+    <div class="column span-6 equalColumn">
+
+
+        <div class="smallbox pubsub">
+            <h2>
+                <span>Latest data</span>
+                <a class="latestData" style="display:none;" href="/search?q=date%3Atoday&qt=term">
+                    <span class="last24"></span><span> documents processed today</span>
+                </a>
+            </h2>
+
+            <div style="clear:both;"></div>
+            <div id="jcarousel" class="jcarousel-skin-tango">
+                <ul></ul>
+            </div>
+            <script type="text/javascript">
+                var LATEST_DATA_WIDGET_SERVLET_URL = "http://api.sindice.com/latest-data-widget/latest-data-widget";
+            </script>
+            <script src="/javascripts/latest-data-widget.js?1297082337" type="text/javascript"></script>
+        </div>
+
+
+        <div id="latest-news" class="smallbox">
+            <h2>Highlights</h2>
+
+            <div id="gallery">
+                <a href="http://blog.sindice.com/2010/02/19/any23-v0-2-released/" class="show">
+                    <img src="/images/slideshow/any23-logo.png" alt="Anything to triples" alt=""
+                         title="" class="galeryImage"
+                         rel="<h3>any23</h3>Anything to Triples. New version - <b>Now available</b>"/>
+                </a>
+
+                <a href="http://siren.sindice.com">
+                    <img src="/images/slideshow/x-siren.png" alt="SIREn" alt=""
+                         title="" class="galeryImage"
+                         rel="<h3>SIREn</h3><b>Open Source!</b> Efficient Information Retrieval for Lucene"/>
+                </a>
+
+                <a href="http://sig.ma">
+                    <img src="/images/slideshow/x-sigma.png" alt="Sig.ma" alt=""
+                         title="" class="galeryImage"
+                         rel="<h3>Sig.ma</h3>See how Sindice is powering <b>Sigma</b>, live views on the Web of Data"/>
+                </a>
+
+                <a href="http://inspector.sindice.com/inspect">
+                    <img src="/images/slideshow/x-inspector.png" alt="Web data inspector" alt=""
+                         title="" class="galeryImage"
+                         rel="<h3>Web Data Inspector</h3>tool + Full Sindice Cache (with reasoning!) - <b>Now available</b>"/>
+                </a>
+
+                <div class="caption">
+                    <div class="content"></div>
+                </div>
+            </div>
+        </div>
+        <script src="/javascripts/gallery.js?1291645618" type="text/javascript"></script>
+
+
+        <div id="dev" class="smallbox">
+
+            <h2>Developers: start consuming the Web of Data</h2>
+
+            <div>
+                <img style="float:right" src="/images/icon-rdf-dev.png" alt="RDF Developer" width="106" height="60"/>
+
+                <p>
+                    Add value and cool features to your application by using our restful
+                    APIs and JS widgets.
+                    Let us help you finding the right data and the way to access it!
+                </p>
+
+                <p>
+                    How do I build my application? <a href="/developers/api">API documentation</a>
+                </p>
+                <img style="float:right" src="/images/mf-lg-ora.png" alt="Microformat" width="106" height="50"/>
+                <!--
+            <p>
+            What Data, which sources? <a href="/map">Map of the Web of Data</a>
+            </p>
+            -->
+                <p>
+                    Questions? Ideas? <a href="http://groups.google.com/group/sindice-dev">Sindice-dev</a> group.
+                </p>
+                <a href="/developers/welcome">
+                    <img src="/images/learn-more-green.png" alt="Learn more"/>
+                </a>
+            </div>
+
+            <!-- /dev -->
+        </div>
+
+
+        <!-- /column -->
+    </div>
+
+    <div class="column span-6 last equalColumn">
+
+
+        <div id="blog" class="box">
+            <h2 class="clear"><span><a href="http://blog.sindice.com">Sindice Blog</a></span>
+        <span class="feed">
+          <a href="http://blog.sindice.com/feed/atom/">Feed</a>
+        </span>
+            </h2>
+
+            <div class="blogEntriesBox">
+
+                <!-- v0.715358440328743 -->
+                <ul>
+                    <li>
+                        <div class="clear title">
+                            <h3><a href="http://blog.sindice.com/?p=273">Sindice migration</a></h3>
+                            <span class="date">Nov 26, 2010</span>
+                        </div>
+                        <p>This is mainly a test post to verify that the Sindice blog continues to work after migrating
+                            it to a new server. (<a href="http://blog.sindice.com/?p=273">More&nbsp;&rarr;</a>)</p>
+                    </li>
+                    <li>
+                        <div class="clear title">
+                            <h3><a href="http://blog.sindice.com/?p=264">Sindice now supports Efficient Data discovery
+                                and Sync</a></h3>
+                            <span class="date">Jul 09, 2010</span>
+                        </div>
+                        <p>So far semantic web search engines and semantic aggregation services have been inserting
+                            datasets by hand or have been based on &#8220;ran... (<a
+                                    href="http://blog.sindice.com/?p=264">More&nbsp;&rarr;</a>)</p>
+                    </li>
+                    <li>
+                        <div class="clear title">
+                            <h3><a href="http://blog.sindice.com/?p=258">Sindice planned downtime this weekend</a></h3>
+                            <span class="date">Jun 09, 2010</span>
+                        </div>
+                        <p>HiDue to an expansion of one of our datacentres (and the electrical work that this implies),
+                            Sindice and related services such as sig.ma w... (<a href="http://blog.sindice.com/?p=258">More&nbsp;&rarr;</a>)
+                        </p>
+                    </li>
+                    <li>
+                        <div class="clear title">
+                            <h3><a href="http://blog.sindice.com/?p=240">Any23 v0.4.0 Released</a></h3>
+                            <span class="date">May 27, 2010</span>
+                        </div>
+                        <p>Dear All, the Sindice FBK team is proud to announce the Any23 0.4.0 release. (<a
+                                href="http://blog.sindice.com/?p=240">More&nbsp;&rarr;</a>)</p>
+                    </li>
+                    <li>
+                        <div class="clear title">
+                            <h3><a href="http://blog.sindice.com/?p=225">Any23 v0.3.0 Released</a></h3>
+                            <span class="date">Apr 23, 2010</span>
+                        </div>
+                        <p>Dear All, we&#8217;re pleased to announce the Any23 0.3.0 releasePlease keep in mind this is
+                            a beta, so everybody using Any23 in a develop... (<a href="http://blog.sindice.com/?p=225">More&nbsp;&rarr;</a>)
+                        </p>
+                    </li>
+
+                </ul>
+
+
+                <div class="archives">
+                    <a href="http://blog.sindice.com/">More blog posts &rarr;</a>
+                </div>
+
+            </div>
+        </div>
+
+        <!-- /column -->
+    </div>
+</div>
+
+</div>
+
+<div id="footer" class="container clear fixedWidth">
+    <div class="span-6" style="float:left;">
+        <p>
+            <a href="/">Home</a> |
+            <a href="/main/about">About</a> |
+            <a href="/search">Search</a> |
+            <a href="/main/submit">Submit</a> |
+            <a href="/main/forum">Forum</a> |
+            <a href="/developers/welcome">Dev</a>
+        </p>
+        <!-- /column -->
+    </div>
+
+    <div style="text-align: right;float:right !important;" class="span-6 last">
+        <p>
+            <a href="http://blog.sindice.com">Blog</a> |
+            <a href="/developers/feedback">Contact</a> |
+            Copyright &copy; 2008-2011 <a href="http://www.deri.ie">DERI</a>
+        </p>
+        <!-- /column -->
+    </div>
+    <!-- /footer -->
+</div>
+<div style="clear:both;"></div>
+</body>
+</html>
\ No newline at end of file
diff --git a/integration-test/pom.xml b/integration-test/pom.xml
new file mode 100644
index 0000000..fe8d385
--- /dev/null
+++ b/integration-test/pom.xml
@@ -0,0 +1,174 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.any23</groupId>
+    <artifactId>apache-any23</artifactId>
+    <version>2.4-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.any23.plugins</groupId>
+  <artifactId>apache-any23-integration-test</artifactId>
+
+  <name>Apache Any23 :: Plugins :: Integration Test</name>
+  <description>Any23 plugin integration test module.</description>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.any23</groupId>
+      <artifactId>apache-any23-core</artifactId>
+      <version>${project.parent.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.any23</groupId>
+      <artifactId>apache-any23-cli</artifactId>
+      <version>${project.parent.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.any23.plugins</groupId>
+      <artifactId>apache-any23-html-scraper</artifactId>
+      <version>${project.parent.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.any23.plugins</groupId>
+      <artifactId>apache-any23-office-scraper</artifactId>
+      <version>${project.parent.version}</version>
+    </dependency>
+      <dependency>
+      <groupId>org.apache.any23.plugins</groupId>
+      <artifactId>apache-any23-basic-crawler</artifactId>
+      <version>${project.parent.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.any23.plugins</groupId>
+      <artifactId>apache-any23-openie</artifactId>
+      <version>${project.parent.version}</version>
+    </dependency>
+
+    <!-- BEGIN: Test Dependencies -->
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <!-- END: Test Dependencies -->
+
+  </dependencies>
+
+  <build>
+
+    <plugins>
+      <!-- skip assembly generation for ITs -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-assembly-plugin</artifactId>
+        <configuration>
+          <skipAssembly>true</skipAssembly>
+        </configuration>
+      </plugin>
+
+      <!--
+        Detect any plugin declared within the ./plugin classes,
+        for each one performs a compilation and a dependency distribution.
+        Such step is required for the subsequent integration test.
+      -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-invoker-plugin</artifactId>
+        <configuration>
+          <projectsDirectory>..</projectsDirectory>
+          <cloneProjectsTo>${project.build.directory}/plugins-build</cloneProjectsTo>
+          <pomIncludes>
+            <pomInclude>*/pom.xml</pomInclude>
+          </pomIncludes>
+          <pomExcludes>
+            <pomExclude>integration-test/*</pomExclude>
+          </pomExcludes>
+          <properties>
+            <assembly.skip>true</assembly.skip>
+          </properties>
+          <goals>
+            <goal>clean</goal>
+            <goal>dependency:copy-dependencies</goal>
+            <goal>integration-test</goal>
+          </goals>
+        </configuration>
+        <executions>
+          <execution>
+            <phase>verify</phase>
+            <goals>
+              <goal>run</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+      <!--
+        Runs the integration tests declared in this module.
+        An integration test ends by IT.java
+      -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <executions>
+          <execution>
+            <phase>verify</phase>
+            <goals>
+              <goal>test</goal>
+            </goals>
+            <configuration>
+              <skip>false</skip>
+              <excludes>
+                <exclude>none</exclude>
+              </excludes>
+              <includes>
+                <include>**/*IT.java</include>
+              </includes>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+
+  <profiles>
+    <profile>
+      <id>release</id>
+      <build>
+        <resources>
+          <resource>
+            <directory>${basedir}/../../</directory>
+            <targetPath>${project.build.directory}/apidocs/META-INF</targetPath>
+            <includes>
+              <include>LICENSE.txt</include>
+              <include>NOTICE.txt</include>
+            </includes>
+          </resource>
+        </resources>
+      </build>
+    </profile>
+  </profiles>
+
+</project>
diff --git a/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java b/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java
new file mode 100644
index 0000000..351280a
--- /dev/null
+++ b/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.plugin;
+
+import org.apache.any23.cli.Crawler;
+import org.apache.any23.cli.Tool;
+import org.apache.any23.extractor.ExtractorGroup;
+import org.apache.any23.extractor.ExtractorRegistryImpl;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Integration test for plugins.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public class PluginIT {
+
+    private static final int NUM_OF_EXTRACTORS_INCL_OPENIE = 37;
+    
+    private static final int NUM_OF_EXTRACTORS_EXCL_OPENIE = 36;
+
+    private static final String PLUGIN_DIR = "target/plugins-build/";
+
+    private static final File HTML_SCRAPER_TARGET_DIR       = new File(PLUGIN_DIR + "html-scraper/target/classes");
+    private static final File HTML_SCRAPER_DEPENDENCY_DIR   = new File(PLUGIN_DIR + "html-scraper/target/dependency");
+
+    private static final File OFFICE_SCRAPER_TARGET_DIR     = new File(PLUGIN_DIR + "office-scraper/target/classes");
+    private static final File OFFICE_SCRAPER_DEPENDENCY_DIR = new File(PLUGIN_DIR + "office-scraper/target/dependency");
+
+    private static final File CRAWLER_TARGET_DIR     = new File(PLUGIN_DIR + "basic-crawler/target/classes");
+    private static final File CRAWLER_DEPENDENCY_DIR = new File(PLUGIN_DIR + "basic-crawler/target/dependency");
+
+    private static final File OPENIE_TARGET_DIR     = new File(PLUGIN_DIR + "openie/target/classes");
+    private static final File OPENIE_DEPENDENCY_DIR = new File(PLUGIN_DIR + "openie/target/dependency");
+
+    private Any23PluginManager manager;
+
+    @Before
+    public void before() {
+        manager = Any23PluginManager.getInstance();
+    }
+
+    @After
+    public void after() {
+        manager = null;
+    }
+
+    /**
+     * {@link org.apache.any23.extractor.Extractor} plugins detection testing.
+     *
+     * @throws IOException
+     * @throws InstantiationException
+     * @throws IllegalAccessException
+     */
+    @Test
+    public void testDetectExtractorPlugins() throws IOException, InstantiationException, IllegalAccessException {
+        final ExtractorGroup extractorGroup = manager.getApplicableExtractors(
+                new ExtractorRegistryImpl(),
+                HTML_SCRAPER_TARGET_DIR,
+                HTML_SCRAPER_DEPENDENCY_DIR,
+                OFFICE_SCRAPER_TARGET_DIR,
+                OFFICE_SCRAPER_DEPENDENCY_DIR,
+                OPENIE_TARGET_DIR,
+                OPENIE_DEPENDENCY_DIR
+        );
+        try {
+          Class.forName("org.apache.any23.plugin.extractor.openie.OpenIEExtractor", false, this.getClass().getClassLoader());
+          assertEquals("Did not find the number of expected extractors", NUM_OF_EXTRACTORS_INCL_OPENIE ,
+                  extractorGroup.getNumOfExtractors()
+          );
+        } catch (ClassNotFoundException e) {
+          assertEquals("Did not find the number of expected extractors", NUM_OF_EXTRACTORS_EXCL_OPENIE ,
+                  extractorGroup.getNumOfExtractors()
+          );
+        }
+    }
+
+    /**
+     * {@link Tool} plugins detection testing.
+     *
+     * @throws IOException
+     */
+    @Test
+    public void testDetectCLIPlugins() throws IOException {
+        final Iterator<Tool> tools = manager.getApplicableTools(CRAWLER_TARGET_DIR, CRAWLER_DEPENDENCY_DIR);
+        final Set<String> toolClasses = new HashSet<String>();
+        Tool tool;
+        while(tools.hasNext()) {
+            tool = tools.next();
+            assertTrue("Found duplicate tool.", toolClasses.add(tool.getClass().getName()));
+        }
+        assertTrue(
+                String.format(
+                        "Expected [%s] plugin to be detected, but it is not found in the built classpath.",
+                        Crawler.class.getName()
+                ),
+                toolClasses.contains(Crawler.class.getName())
+        );
+        assertEquals(7, toolClasses.size()); // core CLIs
+    }
+
+}
diff --git a/integration-test/src/test/resources/log4j.properties b/integration-test/src/test/resources/log4j.properties
new file mode 100644
index 0000000..d715976
--- /dev/null
+++ b/integration-test/src/test/resources/log4j.properties
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+log4j.rootCategory=INFO, stdout
+
+log4j.appender.stdout.Threshold=INFO
+
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+
+log4j.appender.stdout.layout.ConversionPattern=%5p [%t] %m%n
diff --git a/office-scraper/.classpath b/office-scraper/.classpath
new file mode 100755
index 0000000..a54db88
--- /dev/null
+++ b/office-scraper/.classpath
@@ -0,0 +1,199 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+  <classpathentry kind="src" path="src/test/java" output="target/test-classes" including="**/*.java"/>
+  <classpathentry kind="src" path="src/test/resources" output="target/test-classes" excluding="**/*.java"/>
+  <classpathentry kind="src" path="src/main/java" including="**/*.java"/>
+  <classpathentry kind="src" path="src/main/resources" excluding="**/*.java"/>
+  <classpathentry kind="src" path="target/maven-shared-archive-resources" excluding="**/*.java"/>
+  <classpathentry kind="output" path="target/classes"/>
+  <classpathentry kind="var" path="M2_REPO/javax/xml/bind/jaxb-api/2.3.0/jaxb-api-2.3.0.jar" sourcepath="M2_REPO/javax/xml/bind/jaxb-api/2.3.0/jaxb-api-2.3.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/javax/annotation/javax.annotation-api/1.3.2/javax.annotation-api-1.3.2.jar" sourcepath="M2_REPO/javax/annotation/javax.annotation-api/1.3.2/javax.annotation-api-1.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/javax/measure/unit-api/1.0/unit-api-1.0.jar" sourcepath="M2_REPO/javax/measure/unit-api/1.0/unit-api-1.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/javax/inject/javax.inject/1/javax.inject-1.jar" sourcepath="M2_REPO/javax/inject/javax.inject/1/javax.inject-1-sources.jar"/>
+  <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-model/3.0.0/rdf4j-model-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-model/3.0.0/rdf4j-model-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-util/3.0.0/rdf4j-util-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-util/3.0.0/rdf4j-util-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/slf4j/slf4j-api/1.7.28/slf4j-api-1.7.28.jar" sourcepath="M2_REPO/org/slf4j/slf4j-api/1.7.28/slf4j-api-1.7.28-sources.jar"/>
+  <classpathentry kind="src" path="/apache-any23-core"/>
+  <classpathentry kind="src" path="/apache-any23-api"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-api/3.0.0/rdf4j-rio-api-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-api/3.0.0/rdf4j-rio-api-3.0.0-sources.jar"/>
+  <classpathentry kind="src" path="/apache-any23-csvutils"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/commons/commons-csv/1.6/commons-csv-1.6.jar" sourcepath="M2_REPO/org/apache/commons/commons-csv/1.6/commons-csv-1.6-sources.jar"/>
+  <classpathentry kind="src" path="/apache-any23-mime"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-turtle/3.0.0/rdf4j-rio-turtle-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-turtle/3.0.0/rdf4j-rio-turtle-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-datatypes/3.0.0/rdf4j-rio-datatypes-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-datatypes/3.0.0/rdf4j-rio-datatypes-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-languages/3.0.0/rdf4j-rio-languages-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-languages/3.0.0/rdf4j-rio-languages-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/commons-io/commons-io/2.6/commons-io-2.6.jar" sourcepath="M2_REPO/commons-io/commons-io/2.6/commons-io-2.6-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-ntriples/3.0.0/rdf4j-rio-ntriples-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-ntriples/3.0.0/rdf4j-rio-ntriples-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-n3/3.0.0/rdf4j-rio-n3-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-n3/3.0.0/rdf4j-rio-n3-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-nquads/3.0.0/rdf4j-rio-nquads-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-nquads/3.0.0/rdf4j-rio-nquads-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/tika/tika-core/1.22/tika-core-1.22.jar" sourcepath="M2_REPO/org/apache/tika/tika-core/1.22/tika-core-1.22-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/tika/tika-parsers/1.22/tika-parsers-1.22.jar" sourcepath="M2_REPO/org/apache/tika/tika-parsers/1.22/tika-parsers-1.22-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/glassfish/jaxb/jaxb-runtime/2.3.2/jaxb-runtime-2.3.2.jar" sourcepath="M2_REPO/org/glassfish/jaxb/jaxb-runtime/2.3.2/jaxb-runtime-2.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/jakarta/xml/bind/jakarta.xml.bind-api/2.3.2/jakarta.xml.bind-api-2.3.2.jar" sourcepath="M2_REPO/jakarta/xml/bind/jakarta.xml.bind-api/2.3.2/jakarta.xml.bind-api-2.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/glassfish/jaxb/txw2/2.3.2/txw2-2.3.2.jar" sourcepath="M2_REPO/org/glassfish/jaxb/txw2/2.3.2/txw2-2.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/sun/istack/istack-commons-runtime/3.0.8/istack-commons-runtime-3.0.8.jar" sourcepath="M2_REPO/com/sun/istack/istack-commons-runtime/3.0.8/istack-commons-runtime-3.0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/jvnet/staxex/stax-ex/1.8.1/stax-ex-1.8.1.jar" sourcepath="M2_REPO/org/jvnet/staxex/stax-ex/1.8.1/stax-ex-1.8.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/sun/xml/fastinfoset/FastInfoset/1.2.16/FastInfoset-1.2.16.jar" sourcepath="M2_REPO/com/sun/xml/fastinfoset/FastInfoset/1.2.16/FastInfoset-1.2.16-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/sun/activation/jakarta.activation/1.2.1/jakarta.activation-1.2.1.jar" sourcepath="M2_REPO/com/sun/activation/jakarta.activation/1.2.1/jakarta.activation-1.2.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/xerces/xercesImpl/2.12.0/xercesImpl-2.12.0.jar" sourcepath="M2_REPO/xerces/xercesImpl/2.12.0/xercesImpl-2.12.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/xml-apis/xml-apis/1.4.01/xml-apis-1.4.01.jar" sourcepath="M2_REPO/xml-apis/xml-apis/1.4.01/xml-apis-1.4.01-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/commons/commons-lang3/3.9/commons-lang3-3.9.jar" sourcepath="M2_REPO/org/apache/commons/commons-lang3/3.9/commons-lang3-3.9-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/gagravarr/vorbis-java-tika/0.8/vorbis-java-tika-0.8.jar" sourcepath="M2_REPO/org/gagravarr/vorbis-java-tika/0.8/vorbis-java-tika-0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/tallison/jmatio/1.5/jmatio-1.5.jar" sourcepath="M2_REPO/org/tallison/jmatio/1.5/jmatio-1.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/james/apache-mime4j-core/0.8.3/apache-mime4j-core-0.8.3.jar" sourcepath="M2_REPO/org/apache/james/apache-mime4j-core/0.8.3/apache-mime4j-core-0.8.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/james/apache-mime4j-dom/0.8.3/apache-mime4j-dom-0.8.3.jar" sourcepath="M2_REPO/org/apache/james/apache-mime4j-dom/0.8.3/apache-mime4j-dom-0.8.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/commons/commons-compress/1.18/commons-compress-1.18.jar" sourcepath="M2_REPO/org/apache/commons/commons-compress/1.18/commons-compress-1.18-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/tukaani/xz/1.8/xz-1.8.jar" sourcepath="M2_REPO/org/tukaani/xz/1.8/xz-1.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/epam/parso/2.0.11/parso-2.0.11.jar" sourcepath="M2_REPO/com/epam/parso/2.0.11/parso-2.0.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/brotli/dec/0.1.2/dec-0.1.2.jar" sourcepath="M2_REPO/org/brotli/dec/0.1.2/dec-0.1.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/commons-codec/commons-codec/1.11/commons-codec-1.11.jar" sourcepath="M2_REPO/commons-codec/commons-codec/1.11/commons-codec-1.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/pdfbox/pdfbox/2.0.16/pdfbox-2.0.16.jar" sourcepath="M2_REPO/org/apache/pdfbox/pdfbox/2.0.16/pdfbox-2.0.16-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/pdfbox/fontbox/2.0.16/fontbox-2.0.16.jar" sourcepath="M2_REPO/org/apache/pdfbox/fontbox/2.0.16/fontbox-2.0.16-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/pdfbox/pdfbox-tools/2.0.16/pdfbox-tools-2.0.16.jar" sourcepath="M2_REPO/org/apache/pdfbox/pdfbox-tools/2.0.16/pdfbox-tools-2.0.16-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/pdfbox/jempbox/1.8.16/jempbox-1.8.16.jar" sourcepath="M2_REPO/org/apache/pdfbox/jempbox/1.8.16/jempbox-1.8.16-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/bouncycastle/bcmail-jdk15on/1.62/bcmail-jdk15on-1.62.jar" sourcepath="M2_REPO/org/bouncycastle/bcmail-jdk15on/1.62/bcmail-jdk15on-1.62-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/bouncycastle/bcprov-jdk15on/1.62/bcprov-jdk15on-1.62.jar" sourcepath="M2_REPO/org/bouncycastle/bcprov-jdk15on/1.62/bcprov-jdk15on-1.62-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/bouncycastle/bcpkix-jdk15on/1.62/bcpkix-jdk15on-1.62.jar" sourcepath="M2_REPO/org/bouncycastle/bcpkix-jdk15on/1.62/bcpkix-jdk15on-1.62-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/poi/poi/4.1.0/poi-4.1.0.jar" sourcepath="M2_REPO/org/apache/poi/poi/4.1.0/poi-4.1.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/commons/commons-collections4/4.3/commons-collections4-4.3.jar" sourcepath="M2_REPO/org/apache/commons/commons-collections4/4.3/commons-collections4-4.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/commons/commons-math3/3.6.1/commons-math3-3.6.1.jar" sourcepath="M2_REPO/org/apache/commons/commons-math3/3.6.1/commons-math3-3.6.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/poi/poi-scratchpad/4.1.0/poi-scratchpad-4.1.0.jar" sourcepath="M2_REPO/org/apache/poi/poi-scratchpad/4.1.0/poi-scratchpad-4.1.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/poi/poi-ooxml/4.1.0/poi-ooxml-4.1.0.jar" sourcepath="M2_REPO/org/apache/poi/poi-ooxml/4.1.0/poi-ooxml-4.1.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/poi/poi-ooxml-schemas/4.1.0/poi-ooxml-schemas-4.1.0.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/xmlbeans/xmlbeans/3.1.0/xmlbeans-3.1.0.jar" sourcepath="M2_REPO/org/apache/xmlbeans/xmlbeans/3.1.0/xmlbeans-3.1.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/github/virtuald/curvesapi/1.06/curvesapi-1.06.jar" sourcepath="M2_REPO/com/github/virtuald/curvesapi/1.06/curvesapi-1.06-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/healthmarketscience/jackcess/jackcess/3.0.1/jackcess-3.0.1.jar" sourcepath="M2_REPO/com/healthmarketscience/jackcess/jackcess/3.0.1/jackcess-3.0.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/healthmarketscience/jackcess/jackcess-encrypt/3.0.0/jackcess-encrypt-3.0.0.jar" sourcepath="M2_REPO/com/healthmarketscience/jackcess/jackcess-encrypt/3.0.0/jackcess-encrypt-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/ccil/cowan/tagsoup/tagsoup/1.2.1/tagsoup-1.2.1.jar" sourcepath="M2_REPO/org/ccil/cowan/tagsoup/tagsoup/1.2.1/tagsoup-1.2.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/ow2/asm/asm/7.2-beta/asm-7.2-beta.jar" sourcepath="M2_REPO/org/ow2/asm/asm/7.2-beta/asm-7.2-beta-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/googlecode/mp4parser/isoparser/1.1.22/isoparser-1.1.22.jar" sourcepath="M2_REPO/com/googlecode/mp4parser/isoparser/1.1.22/isoparser-1.1.22-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/drewnoakes/metadata-extractor/2.11.0/metadata-extractor-2.11.0.jar" sourcepath="M2_REPO/com/drewnoakes/metadata-extractor/2.11.0/metadata-extractor-2.11.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/adobe/xmp/xmpcore/5.1.3/xmpcore-5.1.3.jar" sourcepath="M2_REPO/com/adobe/xmp/xmpcore/5.1.3/xmpcore-5.1.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/de/l3s/boilerpipe/boilerpipe/1.1.0/boilerpipe-1.1.0.jar" sourcepath="M2_REPO/de/l3s/boilerpipe/boilerpipe/1.1.0/boilerpipe-1.1.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/rometools/rome/1.12.1/rome-1.12.1.jar" sourcepath="M2_REPO/com/rometools/rome/1.12.1/rome-1.12.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/rometools/rome-utils/1.12.1/rome-utils-1.12.1.jar" sourcepath="M2_REPO/com/rometools/rome-utils/1.12.1/rome-utils-1.12.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/jdom/jdom2/2.0.6/jdom2-2.0.6.jar" sourcepath="M2_REPO/org/jdom/jdom2/2.0.6/jdom2-2.0.6-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/gagravarr/vorbis-java-core/0.8/vorbis-java-core-0.8.jar" sourcepath="M2_REPO/org/gagravarr/vorbis-java-core/0.8/vorbis-java-core-0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/googlecode/juniversalchardet/juniversalchardet/1.0.3/juniversalchardet-1.0.3.jar" sourcepath="M2_REPO/com/googlecode/juniversalchardet/juniversalchardet/1.0.3/juniversalchardet-1.0.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/codelibs/jhighlight/1.0.3/jhighlight-1.0.3.jar" sourcepath="M2_REPO/org/codelibs/jhighlight/1.0.3/jhighlight-1.0.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/pff/java-libpst/0.8.1/java-libpst-0.8.1.jar" sourcepath="M2_REPO/com/pff/java-libpst/0.8.1/java-libpst-0.8.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/github/junrar/junrar/4.0.0/junrar-4.0.0.jar" sourcepath="M2_REPO/com/github/junrar/junrar/4.0.0/junrar-4.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/cxf/cxf-rt-rs-client/3.3.2/cxf-rt-rs-client-3.3.2.jar" sourcepath="M2_REPO/org/apache/cxf/cxf-rt-rs-client/3.3.2/cxf-rt-rs-client-3.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/cxf/cxf-rt-transports-http/3.3.2/cxf-rt-transports-http-3.3.2.jar" sourcepath="M2_REPO/org/apache/cxf/cxf-rt-transports-http/3.3.2/cxf-rt-transports-http-3.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/cxf/cxf-core/3.3.2/cxf-core-3.3.2.jar" sourcepath="M2_REPO/org/apache/cxf/cxf-core/3.3.2/cxf-core-3.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/fasterxml/woodstox/woodstox-core/5.0.3/woodstox-core-5.0.3.jar" sourcepath="M2_REPO/com/fasterxml/woodstox/woodstox-core/5.0.3/woodstox-core-5.0.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/codehaus/woodstox/stax2-api/3.1.4/stax2-api-3.1.4.jar" sourcepath="M2_REPO/org/codehaus/woodstox/stax2-api/3.1.4/stax2-api-3.1.4-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/ws/xmlschema/xmlschema-core/2.2.4/xmlschema-core-2.2.4.jar" sourcepath="M2_REPO/org/apache/ws/xmlschema/xmlschema-core/2.2.4/xmlschema-core-2.2.4-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/glassfish/jaxb/jaxb-xjc/2.3.2/jaxb-xjc-2.3.2.jar" sourcepath="M2_REPO/org/glassfish/jaxb/jaxb-xjc/2.3.2/jaxb-xjc-2.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/glassfish/jaxb/xsom/2.3.2/xsom-2.3.2.jar" sourcepath="M2_REPO/org/glassfish/jaxb/xsom/2.3.2/xsom-2.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/sun/xml/bind/external/relaxng-datatype/2.3.2/relaxng-datatype-2.3.2.jar" sourcepath="M2_REPO/com/sun/xml/bind/external/relaxng-datatype/2.3.2/relaxng-datatype-2.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/glassfish/jaxb/codemodel/2.3.2/codemodel-2.3.2.jar" sourcepath="M2_REPO/org/glassfish/jaxb/codemodel/2.3.2/codemodel-2.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/sun/xml/bind/external/rngom/2.3.2/rngom-2.3.2.jar" sourcepath="M2_REPO/com/sun/xml/bind/external/rngom/2.3.2/rngom-2.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/sun/xml/dtd-parser/dtd-parser/1.4.1/dtd-parser-1.4.1.jar" sourcepath="M2_REPO/com/sun/xml/dtd-parser/dtd-parser/1.4.1/dtd-parser-1.4.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/sun/istack/istack-commons-tools/3.0.8/istack-commons-tools-3.0.8.jar" sourcepath="M2_REPO/com/sun/istack/istack-commons-tools/3.0.8/istack-commons-tools-3.0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/ant/ant/1.10.5/ant-1.10.5.jar" sourcepath="M2_REPO/org/apache/ant/ant/1.10.5/ant-1.10.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/ant/ant-launcher/1.10.5/ant-launcher-1.10.5.jar" sourcepath="M2_REPO/org/apache/ant/ant-launcher/1.10.5/ant-launcher-1.10.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/cxf/cxf-rt-frontend-jaxrs/3.3.2/cxf-rt-frontend-jaxrs-3.3.2.jar" sourcepath="M2_REPO/org/apache/cxf/cxf-rt-frontend-jaxrs/3.3.2/cxf-rt-frontend-jaxrs-3.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/jakarta/ws/rs/jakarta.ws.rs-api/2.1.5/jakarta.ws.rs-api-2.1.5.jar" sourcepath="M2_REPO/jakarta/ws/rs/jakarta.ws.rs-api/2.1.5/jakarta.ws.rs-api-2.1.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/cxf/cxf-rt-security/3.3.2/cxf-rt-security-3.3.2.jar" sourcepath="M2_REPO/org/apache/cxf/cxf-rt-security/3.3.2/cxf-rt-security-3.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/commons/commons-exec/1.3/commons-exec-1.3.jar" sourcepath="M2_REPO/org/apache/commons/commons-exec/1.3/commons-exec-1.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/opennlp/opennlp-tools/1.9.1/opennlp-tools-1.9.1.jar" sourcepath="M2_REPO/org/apache/opennlp/opennlp-tools/1.9.1/opennlp-tools-1.9.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/googlecode/json-simple/json-simple/1.1.1/json-simple-1.1.1.jar" sourcepath="M2_REPO/com/googlecode/json-simple/json-simple/1.1.1/json-simple-1.1.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/github/openjson/openjson/1.0.11/openjson-1.0.11.jar" sourcepath="M2_REPO/com/github/openjson/openjson/1.0.11/openjson-1.0.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/google/code/gson/gson/2.8.5/gson-2.8.5.jar" sourcepath="M2_REPO/com/google/code/gson/gson/2.8.5/gson-2.8.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/slf4j/jul-to-slf4j/1.7.28/jul-to-slf4j-1.7.28.jar" sourcepath="M2_REPO/org/slf4j/jul-to-slf4j/1.7.28/jul-to-slf4j-1.7.28-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/slf4j/jcl-over-slf4j/1.7.28/jcl-over-slf4j-1.7.28.jar" sourcepath="M2_REPO/org/slf4j/jcl-over-slf4j/1.7.28/jcl-over-slf4j-1.7.28-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/edu/ucar/netcdf4/4.5.5/netcdf4-4.5.5.jar" sourcepath="M2_REPO/edu/ucar/netcdf4/4.5.5/netcdf4-4.5.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/edu/ucar/cdm/4.5.5/cdm-4.5.5.jar" sourcepath="M2_REPO/edu/ucar/cdm/4.5.5/cdm-4.5.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/edu/ucar/udunits/4.5.5/udunits-4.5.5.jar" sourcepath="M2_REPO/edu/ucar/udunits/4.5.5/udunits-4.5.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/joda-time/joda-time/2.2/joda-time-2.2.jar" sourcepath="M2_REPO/joda-time/joda-time/2.2/joda-time-2.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/jcip/jcip-annotations/1.0/jcip-annotations-1.0.jar" sourcepath="M2_REPO/net/jcip/jcip-annotations/1.0/jcip-annotations-1.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/edu/ucar/httpservices/4.5.5/httpservices-4.5.5.jar" sourcepath="M2_REPO/edu/ucar/httpservices/4.5.5/httpservices-4.5.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/httpcomponents/httpclient/4.5.10/httpclient-4.5.10.jar" sourcepath="M2_REPO/org/apache/httpcomponents/httpclient/4.5.10/httpclient-4.5.10-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/httpcomponents/httpcore/4.4.12/httpcore-4.4.12.jar" sourcepath="M2_REPO/org/apache/httpcomponents/httpcore/4.4.12/httpcore-4.4.12-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/httpcomponents/httpmime/4.5.10/httpmime-4.5.10.jar" sourcepath="M2_REPO/org/apache/httpcomponents/httpmime/4.5.10/httpmime-4.5.10-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/quartz-scheduler/quartz/2.2.0/quartz-2.2.0.jar" sourcepath="M2_REPO/org/quartz-scheduler/quartz/2.2.0/quartz-2.2.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/google/protobuf/protobuf-java/3.9.0/protobuf-java-3.9.0.jar" sourcepath="M2_REPO/com/google/protobuf/protobuf-java/3.9.0/protobuf-java-3.9.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sf/ehcache/ehcache-core/2.6.2/ehcache-core-2.6.2.jar" sourcepath="M2_REPO/net/sf/ehcache/ehcache-core/2.6.2/ehcache-core-2.6.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/itadaki/bzip2/0.9.1/bzip2-0.9.1.jar" sourcepath="M2_REPO/org/itadaki/bzip2/0.9.1/bzip2-0.9.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/beust/jcommander/1.72/jcommander-1.72.jar" sourcepath="M2_REPO/com/beust/jcommander/1.72/jcommander-1.72-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/google/guava/guava/22.0/guava-22.0.jar" sourcepath="M2_REPO/com/google/guava/guava/22.0/guava-22.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/google/guava/failureaccess/1.0.1/failureaccess-1.0.1.jar" sourcepath="M2_REPO/com/google/guava/failureaccess/1.0.1/failureaccess-1.0.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/google/guava/listenablefuture/9999.0-empty-to-avoid-conflict-with-guava/listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/google/code/findbugs/jsr305/3.0.2/jsr305-3.0.2.jar" sourcepath="M2_REPO/com/google/code/findbugs/jsr305/3.0.2/jsr305-3.0.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/checkerframework/checker-qual/2.8.1/checker-qual-2.8.1.jar" sourcepath="M2_REPO/org/checkerframework/checker-qual/2.8.1/checker-qual-2.8.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/google/errorprone/error_prone_annotations/2.3.2/error_prone_annotations-2.3.2.jar" sourcepath="M2_REPO/com/google/errorprone/error_prone_annotations/2.3.2/error_prone_annotations-2.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/google/j2objc/j2objc-annotations/1.3/j2objc-annotations-1.3.jar" sourcepath="M2_REPO/com/google/j2objc/j2objc-annotations/1.3/j2objc-annotations-1.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/codehaus/mojo/animal-sniffer-annotations/1.17/animal-sniffer-annotations-1.17.jar" sourcepath="M2_REPO/org/codehaus/mojo/animal-sniffer-annotations/1.17/animal-sniffer-annotations-1.17-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/edu/ucar/grib/4.5.5/grib-4.5.5.jar" sourcepath="M2_REPO/edu/ucar/grib/4.5.5/grib-4.5.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/java/dev/jna/jna/5.3.1/jna-5.3.1.jar" sourcepath="M2_REPO/net/java/dev/jna/jna/5.3.1/jna-5.3.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/jsoup/jsoup/1.12.1/jsoup-1.12.1.jar" sourcepath="M2_REPO/org/jsoup/jsoup/1.12.1/jsoup-1.12.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/mchange/c3p0/0.9.5.4/c3p0-0.9.5.4.jar" sourcepath="M2_REPO/com/mchange/c3p0/0.9.5.4/c3p0-0.9.5.4-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/mchange/mchange-commons-java/0.2.15/mchange-commons-java-0.2.15.jar" sourcepath="M2_REPO/com/mchange/mchange-commons-java/0.2.15/mchange-commons-java-0.2.15-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/sis/core/sis-utility/0.8/sis-utility-0.8.jar" sourcepath="M2_REPO/org/apache/sis/core/sis-utility/0.8/sis-utility-0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/opengis/geoapi/3.0.1/geoapi-3.0.1.jar" sourcepath="M2_REPO/org/opengis/geoapi/3.0.1/geoapi-3.0.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/sis/storage/sis-netcdf/0.8/sis-netcdf-0.8.jar" sourcepath="M2_REPO/org/apache/sis/storage/sis-netcdf/0.8/sis-netcdf-0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/sis/storage/sis-storage/0.8/sis-storage-0.8.jar" sourcepath="M2_REPO/org/apache/sis/storage/sis-storage/0.8/sis-storage-0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/sis/core/sis-feature/0.8/sis-feature-0.8.jar" sourcepath="M2_REPO/org/apache/sis/core/sis-feature/0.8/sis-feature-0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/sis/core/sis-metadata/0.8/sis-metadata-0.8.jar" sourcepath="M2_REPO/org/apache/sis/core/sis-metadata/0.8/sis-metadata-0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/sis/core/sis-referencing/0.8/sis-referencing-0.8.jar" sourcepath="M2_REPO/org/apache/sis/core/sis-referencing/0.8/sis-referencing-0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/edu/usc/ir/sentiment-analysis-parser/0.1/sentiment-analysis-parser-0.1.jar" sourcepath="M2_REPO/edu/usc/ir/sentiment-analysis-parser/0.1/sentiment-analysis-parser-0.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/fasterxml/jackson/core/jackson-core/2.9.10/jackson-core-2.9.10.jar" sourcepath="M2_REPO/com/fasterxml/jackson/core/jackson-core/2.9.10/jackson-core-2.9.10-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/fasterxml/jackson/core/jackson-databind/2.9.10/jackson-databind-2.9.10.jar" sourcepath="M2_REPO/com/fasterxml/jackson/core/jackson-databind/2.9.10/jackson-databind-2.9.10-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/fasterxml/jackson/core/jackson-annotations/2.9.10/jackson-annotations-2.9.10.jar" sourcepath="M2_REPO/com/fasterxml/jackson/core/jackson-annotations/2.9.10/jackson-annotations-2.9.10-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/pdfbox/jbig2-imageio/3.0.2/jbig2-imageio-3.0.2.jar" sourcepath="M2_REPO/org/apache/pdfbox/jbig2-imageio/3.0.2/jbig2-imageio-3.0.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/github/jai-imageio/jai-imageio-core/1.4.0/jai-imageio-core-1.4.0.jar" sourcepath="M2_REPO/com/github/jai-imageio/jai-imageio-core/1.4.0/jai-imageio-core-1.4.0-sources.jar"/>
+  <classpathentry kind="src" path="/apache-any23-encoding"/>
+  <classpathentry kind="var" path="M2_REPO/org/rypt/f8/1.1/f8-1.1.jar" sourcepath="M2_REPO/org/rypt/f8/1.1/f8-1.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/httpcomponents/httpclient-cache/4.5.10/httpclient-cache-4.5.10.jar" sourcepath="M2_REPO/org/apache/httpcomponents/httpclient-cache/4.5.10/httpclient-cache-4.5.10-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sf/biweekly/biweekly/0.6.3/biweekly-0.6.3.jar" sourcepath="M2_REPO/net/sf/biweekly/biweekly/0.6.3/biweekly-0.6.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/github/mangstadt/vinnie/2.0.2/vinnie-2.0.2.jar" sourcepath="M2_REPO/com/github/mangstadt/vinnie/2.0.2/vinnie-2.0.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-binary/3.0.0/rdf4j-rio-binary-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-binary/3.0.0/rdf4j-rio-binary-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-rdfjson/3.0.0/rdf4j-rio-rdfjson-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-rdfjson/3.0.0/rdf4j-rio-rdfjson-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-rdfxml/3.0.0/rdf4j-rio-rdfxml-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-rdfxml/3.0.0/rdf4j-rio-rdfxml-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-trix/3.0.0/rdf4j-rio-trix-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-trix/3.0.0/rdf4j-rio-trix-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-trig/3.0.0/rdf4j-rio-trig-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-trig/3.0.0/rdf4j-rio-trig-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-jsonld/3.0.0/rdf4j-rio-jsonld-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-jsonld/3.0.0/rdf4j-rio-jsonld-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/github/jsonld-java/jsonld-java/0.12.5/jsonld-java-0.12.5.jar" sourcepath="M2_REPO/com/github/jsonld-java/jsonld-java/0.12.5/jsonld-java-0.12.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-repository-sail/3.0.0/rdf4j-repository-sail-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-repository-sail/3.0.0/rdf4j-repository-sail-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-repository-api/3.0.0/rdf4j-repository-api-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-repository-api/3.0.0/rdf4j-repository-api-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-query/3.0.0/rdf4j-query-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-query/3.0.0/rdf4j-query-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-sail-api/3.0.0/rdf4j-sail-api-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-sail-api/3.0.0/rdf4j-sail-api-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-queryalgebra-model/3.0.0/rdf4j-queryalgebra-model-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-queryalgebra-model/3.0.0/rdf4j-queryalgebra-model-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-http-client/3.0.0/rdf4j-http-client-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-http-client/3.0.0/rdf4j-http-client-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-http-protocol/3.0.0/rdf4j-http-protocol-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-http-protocol/3.0.0/rdf4j-http-protocol-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-queryresultio-api/3.0.0/rdf4j-queryresultio-api-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-queryresultio-api/3.0.0/rdf4j-queryresultio-api-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-queryresultio-binary/3.0.0/rdf4j-queryresultio-binary-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-queryresultio-binary/3.0.0/rdf4j-queryresultio-binary-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-queryparser-api/3.0.0/rdf4j-queryparser-api-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-queryparser-api/3.0.0/rdf4j-queryparser-api-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-queryalgebra-evaluation/3.0.0/rdf4j-queryalgebra-evaluation-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-queryalgebra-evaluation/3.0.0/rdf4j-queryalgebra-evaluation-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-repository-sparql/3.0.0/rdf4j-repository-sparql-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-repository-sparql/3.0.0/rdf4j-repository-sparql-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-queryparser-sparql/3.0.0/rdf4j-queryparser-sparql-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-queryparser-sparql/3.0.0/rdf4j-queryparser-sparql-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-queryresultio-sparqlxml/3.0.0/rdf4j-queryresultio-sparqlxml-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-queryresultio-sparqlxml/3.0.0/rdf4j-queryresultio-sparqlxml-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/mapdb/mapdb/1.0.8/mapdb-1.0.8.jar" sourcepath="M2_REPO/org/mapdb/mapdb/1.0.8/mapdb-1.0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-sail-memory/3.0.0/rdf4j-sail-memory-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-sail-memory/3.0.0/rdf4j-sail-memory-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-sail-base/3.0.0/rdf4j-sail-base-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-sail-base/3.0.0/rdf4j-sail-base-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/semarglproject/semargl-rdf4j/0.7/semargl-rdf4j-0.7.jar" sourcepath="M2_REPO/org/semarglproject/semargl-rdf4j/0.7/semargl-rdf4j-0.7-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/semarglproject/semargl-core/0.7/semargl-core-0.7.jar" sourcepath="M2_REPO/org/semarglproject/semargl-core/0.7/semargl-core-0.7-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/semarglproject/semargl-rdfa/0.7/semargl-rdfa-0.7.jar" sourcepath="M2_REPO/org/semarglproject/semargl-rdfa/0.7/semargl-rdfa-0.7-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/semarglproject/semargl-rdf/0.7/semargl-rdf-0.7.jar" sourcepath="M2_REPO/org/semarglproject/semargl-rdf/0.7/semargl-rdf-0.7-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sourceforge/owlapi/owlapi-rio/5.1.11/owlapi-rio-5.1.11.jar" sourcepath="M2_REPO/net/sourceforge/owlapi/owlapi-rio/5.1.11/owlapi-rio-5.1.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sourceforge/owlapi/owlapi-parsers/5.1.11/owlapi-parsers-5.1.11.jar" sourcepath="M2_REPO/net/sourceforge/owlapi/owlapi-parsers/5.1.11/owlapi-parsers-5.1.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sourceforge/owlapi/owlapi-api/5.1.11/owlapi-api-5.1.11.jar" sourcepath="M2_REPO/net/sourceforge/owlapi/owlapi-api/5.1.11/owlapi-api-5.1.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/commons/commons-rdf-api/0.5.0/commons-rdf-api-0.5.0.jar" sourcepath="M2_REPO/org/apache/commons/commons-rdf-api/0.5.0/commons-rdf-api-0.5.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/github/vsonnier/hppcrt/0.7.5/hppcrt-0.7.5.jar" sourcepath="M2_REPO/com/github/vsonnier/hppcrt/0.7.5/hppcrt-0.7.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/github/ben-manes/caffeine/caffeine/2.6.1/caffeine-2.6.1.jar" sourcepath="M2_REPO/com/github/ben-manes/caffeine/caffeine/2.6.1/caffeine-2.6.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sourceforge/owlapi/owlapi-apibinding/5.1.11/owlapi-apibinding-5.1.11.jar" sourcepath="M2_REPO/net/sourceforge/owlapi/owlapi-apibinding/5.1.11/owlapi-apibinding-5.1.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sourceforge/owlapi/owlapi-impl/5.1.11/owlapi-impl-5.1.11.jar" sourcepath="M2_REPO/net/sourceforge/owlapi/owlapi-impl/5.1.11/owlapi-impl-5.1.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sourceforge/owlapi/owlapi-oboformat/5.1.11/owlapi-oboformat-5.1.11.jar" sourcepath="M2_REPO/net/sourceforge/owlapi/owlapi-oboformat/5.1.11/owlapi-oboformat-5.1.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sourceforge/owlapi/owlapi-tools/5.1.11/owlapi-tools-5.1.11.jar" sourcepath="M2_REPO/net/sourceforge/owlapi/owlapi-tools/5.1.11/owlapi-tools-5.1.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/yaml/snakeyaml/1.23/snakeyaml-1.23.jar" sourcepath="M2_REPO/org/yaml/snakeyaml/1.23/snakeyaml-1.23-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/slf4j/slf4j-log4j12/1.7.28/slf4j-log4j12-1.7.28.jar" sourcepath="M2_REPO/org/slf4j/slf4j-log4j12/1.7.28/slf4j-log4j12-1.7.28-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/log4j/log4j/1.2.17/log4j-1.2.17.jar" sourcepath="M2_REPO/log4j/log4j/1.2.17/log4j-1.2.17-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/junit/junit/4.12/junit-4.12.jar" sourcepath="M2_REPO/junit/junit/4.12/junit-4.12-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/hamcrest/hamcrest-core/1.3/hamcrest-core-1.3.jar" sourcepath="M2_REPO/org/hamcrest/hamcrest-core/1.3/hamcrest-core-1.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/mockito/mockito-core/3.0.0/mockito-core-3.0.0.jar" sourcepath="M2_REPO/org/mockito/mockito-core/3.0.0/mockito-core-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/bytebuddy/byte-buddy/1.9.10/byte-buddy-1.9.10.jar" sourcepath="M2_REPO/net/bytebuddy/byte-buddy/1.9.10/byte-buddy-1.9.10-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/bytebuddy/byte-buddy-agent/1.9.10/byte-buddy-agent-1.9.10.jar" sourcepath="M2_REPO/net/bytebuddy/byte-buddy-agent/1.9.10/byte-buddy-agent-1.9.10-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/objenesis/objenesis/2.6/objenesis-2.6.jar" sourcepath="M2_REPO/org/objenesis/objenesis/2.6/objenesis-2.6-sources.jar"/>
+</classpath>
\ No newline at end of file
diff --git a/office-scraper/.project b/office-scraper/.project
new file mode 100755
index 0000000..0b006bf
--- /dev/null
+++ b/office-scraper/.project
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+  <name>apache-any23-office-scraper</name>
+  <comment>Any23 plugin for scraping metadata from MS Office related file formats. NO_M2ECLIPSE_SUPPORT: Project files created with the maven-eclipse-plugin are not supported in M2Eclipse.</comment>
+  <projects>
+    <project>apache-any23-core</project>
+    <project>apache-any23-api</project>
+    <project>apache-any23-csvutils</project>
+    <project>apache-any23-mime</project>
+    <project>apache-any23-encoding</project>
+  </projects>
+  <buildSpec>
+    <buildCommand>
+      <name>org.eclipse.jdt.core.javabuilder</name>
+    </buildCommand>
+    <buildCommand>
+      <name>org.eclipse.m2e.core.maven2Builder</name>
+    </buildCommand>
+  </buildSpec>
+  <natures>
+    <nature>org.eclipse.jdt.core.javanature</nature>
+    <nature>org.eclipse.m2e.core.maven2Nature</nature>
+  </natures>
+</projectDescription>
\ No newline at end of file
diff --git a/office-scraper/.settings/org.eclipse.core.resources.prefs b/office-scraper/.settings/org.eclipse.core.resources.prefs
new file mode 100755
index 0000000..29abf99
--- /dev/null
+++ b/office-scraper/.settings/org.eclipse.core.resources.prefs
@@ -0,0 +1,6 @@
+eclipse.preferences.version=1
+encoding//src/main/java=UTF-8
+encoding//src/main/resources=UTF-8
+encoding//src/test/java=UTF-8
+encoding//src/test/resources=UTF-8
+encoding/<project>=UTF-8
diff --git a/office-scraper/.settings/org.eclipse.jdt.core.prefs b/office-scraper/.settings/org.eclipse.jdt.core.prefs
new file mode 100755
index 0000000..b8947ec
--- /dev/null
+++ b/office-scraper/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,6 @@
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
+org.eclipse.jdt.core.compiler.compliance=1.8
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
+org.eclipse.jdt.core.compiler.release=disabled
+org.eclipse.jdt.core.compiler.source=1.8
diff --git a/office-scraper/.settings/org.eclipse.m2e.core.prefs b/office-scraper/.settings/org.eclipse.m2e.core.prefs
new file mode 100755
index 0000000..f897a7f
--- /dev/null
+++ b/office-scraper/.settings/org.eclipse.m2e.core.prefs
@@ -0,0 +1,4 @@
+activeProfiles=
+eclipse.preferences.version=1
+resolveWorkspaceProjects=true
+version=1
diff --git a/office-scraper/pom.xml b/office-scraper/pom.xml
new file mode 100644
index 0000000..8c0d821
--- /dev/null
+++ b/office-scraper/pom.xml
@@ -0,0 +1,114 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.any23</groupId>
+    <artifactId>apache-any23</artifactId>
+    <version>2.4-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.any23.plugins</groupId>
+  <artifactId>apache-any23-office-scraper</artifactId>
+
+  <name>Apache Any23 :: Plugins :: Office Scraper</name>
+  <description>Any23 plugin for scraping metadata from MS Office related file formats.</description>
+
+  <dependencies>
+    <!-- RDF4J -->
+    <dependency>
+      <groupId>org.eclipse.rdf4j</groupId>
+      <artifactId>rdf4j-model</artifactId>
+      <scope>provided</scope>
+    </dependency>
+
+    <!-- Any23 Core. -->
+    <dependency>
+      <groupId>org.apache.any23</groupId>
+      <artifactId>apache-any23-core</artifactId>
+      <version>2.4-SNAPSHOT</version>
+      <scope>provided</scope>
+    </dependency>
+
+    <!-- Apache POI -->
+    <dependency>
+      <groupId>org.apache.poi</groupId>
+      <artifactId>poi</artifactId>
+    </dependency>
+    
+    <!-- Logging -->
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
+      <version>${slf4j.logger.version}</version>
+      <scope>test</scope>
+    </dependency>
+
+    <!-- BEGIN: Test Dependencies -->
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-core</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <!-- END: Test Dependencies -->
+
+  </dependencies>
+
+  <build>
+
+    <plugins>
+      <!-- Generates the distribution package -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-assembly-plugin</artifactId>
+        <configuration>
+          <appendAssemblyId>false</appendAssemblyId>
+          <descriptors>
+            <descriptor>${basedir}/src/main/assembly/bin.xml</descriptor>
+          </descriptors>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
+
+  <profiles>
+    <profile>
+      <id>release</id>
+      <build>
+        <resources>
+          <resource>
+            <directory>${basedir}/../../</directory>
+            <targetPath>${project.build.directory}/apidocs/META-INF</targetPath>
+            <includes>
+              <include>LICENSE.txt</include>
+              <include>NOTICE.txt</include>
+            </includes>
+          </resource>
+        </resources>
+      </build>
+    </profile>
+  </profiles>
+
+</project>
diff --git a/office-scraper/src/main/assembly/LICENSE-with-deps.txt b/office-scraper/src/main/assembly/LICENSE-with-deps.txt
new file mode 100644
index 0000000..4fe12ff
--- /dev/null
+++ b/office-scraper/src/main/assembly/LICENSE-with-deps.txt
@@ -0,0 +1,212 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+APACHE ANY23 DEPENDENCIES:
+
+The Apache Any23 distribution packages include a number of dependencies with
+separate copyright notices and license terms. Your use of the source
+code for the these dependencies is subject to the terms and
+conditions of the following licenses.
+
+For the Apache POI component (http://poi.apache.org/)
+This is licensed under the The Apache Software License, Version 2.0, see above
diff --git a/office-scraper/src/main/assembly/NOTICE-with-deps.txt b/office-scraper/src/main/assembly/NOTICE-with-deps.txt
new file mode 100644
index 0000000..341fcb5
--- /dev/null
+++ b/office-scraper/src/main/assembly/NOTICE-with-deps.txt
@@ -0,0 +1,6 @@
+Apache Any23
+Copyright 2011-2017 The Apache Software Foundation
+Copyright 2008-2011 Digital Enterprise Research Institute (DERI)
+
+This product includes software developed by
+The Apache Software Foundation (http://www.apache.org/).
diff --git a/office-scraper/src/main/assembly/README.txt b/office-scraper/src/main/assembly/README.txt
new file mode 100644
index 0000000..0b3bf28
--- /dev/null
+++ b/office-scraper/src/main/assembly/README.txt
@@ -0,0 +1,77 @@
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+
+${project.name} (${implementation.build}; ${maven.build.timestamp})
+
+  What is it?
+  -----------
+
+  ${project.description}
+
+  Documentation
+  -------------
+
+  The most up-to-date documentation can be found at ${project.parent.url}.
+
+  Release Notes
+  -------------
+
+  The full list of changes can be found at ${project.parent.url}/changes-report.html.
+
+  System Requirements
+  -------------------
+
+  JDK:
+    ${javac.target.version} or above. (see http://www.oracle.com/technetwork/java/)
+  Memory:
+    No minimum requirement.
+  Disk:
+    No minimum requirement.
+  Operating System:
+    No minimum requirement. On Windows, Windows NT and above or Cygwin is required for
+    the startup scripts. Tested on Windows XP, Fedora Core and Mac OS X.
+
+  Installing Apache Any23
+  ----------------
+
+** Windows 2000/XP
+
+  1) Unzip the distribution archive, i.e. apache-${project.build.finalName}-bin.zip
+        The subdirectory apache-${project.build.finalName} will be created from the archive.
+
+  2) Copy the jar files under C:\Documents and Settings\<username>\.any23\plugins
+
+** Unix-based Operating Systems (Linux, Solaris and Mac OS X)
+
+  1) Extract the distribution archive, i.e. apache-${project.build.finalName}-bin.tar.gz.
+        The subdirectory apache-${project.build.finalName} will be created from the archive.
+
+  2) Copy the jar files under ~/.any23/plugins
+
+  Licensing
+  ---------
+
+  Please see the file called LICENSE.TXT
+
+  Apache Any23 URLS
+  ----------
+
+  Home Page:          ${project.parent.url}/
+  Downloads:          ${project.parent.url}/download.html
+  Release Notes:      ${project.parent.url}/changes-report.html
+  Mailing Lists:      ${project.parent.url}/mail-lists.html
+  Source Code:        ${project.parent.scm.url}
+  Issue Tracking:     ${project.issueManagement.url}
+  Available Plugins:  ${project.parent.url}/plugins.html
diff --git a/office-scraper/src/main/assembly/bin.xml b/office-scraper/src/main/assembly/bin.xml
new file mode 100644
index 0000000..4c85432
--- /dev/null
+++ b/office-scraper/src/main/assembly/bin.xml
@@ -0,0 +1,67 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.1 http://maven.apache.org/xsd/assembly-1.1.1.xsd">
+
+  <id>bin</id>
+  <formats>
+    <format>tar.gz</format>
+    <format>zip</format>
+  </formats>
+  <includeBaseDirectory>true</includeBaseDirectory>
+  <baseDirectory>${project.build.finalName}</baseDirectory>
+
+  <!-- Use the N&L files which apply to the included dependencies -->
+  <files>
+    <file>
+      <source>${basedir}/src/main/assembly/LICENSE-with-deps.txt</source>
+      <destName>LICENSE.txt</destName>
+      <outputDirectory/>
+      <fileMode>666</fileMode>
+    </file>
+    <file>
+      <source>${basedir}/src/main/assembly/NOTICE-with-deps.txt</source>
+      <destName>NOTICE.txt</destName>
+      <outputDirectory/>
+      <fileMode>666</fileMode>
+    </file>
+    <file>
+      <source>${basedir}/src/main/assembly/README.txt</source>
+      <filtered>true</filtered>
+      <outputDirectory/>
+      <fileMode>666</fileMode>
+    </file>
+    <file>
+      <source>${basedir}/../RELEASE-NOTES.txt</source>
+      <outputDirectory/>
+      <fileMode>666</fileMode>
+    </file>
+  </files>
+
+  <dependencySets>
+    <dependencySet>
+      <useProjectArtifact>true</useProjectArtifact>
+      <outputDirectory>/lib</outputDirectory>
+      <includes>
+        <include>${project.groupId}:${project.artifactId}</include>
+        <include>org.apache.poi:poi</include>
+      </includes>
+    </dependencySet>
+  </dependencySets>
+
+</assembly>
diff --git a/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractor.java b/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractor.java
new file mode 100644
index 0000000..b879641
--- /dev/null
+++ b/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractor.java
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.plugin.officescraper;
+
+import org.apache.any23.extractor.ExtractionContext;
+import org.apache.any23.extractor.ExtractionException;
+import org.apache.any23.extractor.ExtractionParameters;
+import org.apache.any23.extractor.ExtractionResult;
+import org.apache.any23.extractor.Extractor;
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.rdf.RDFUtils;
+import org.apache.any23.vocab.Excel;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
+import org.apache.poi.ss.usermodel.Cell;
+import org.apache.poi.ss.usermodel.CellType;
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.ss.usermodel.Workbook;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.vocabulary.RDF;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * Implementation of {@link org.apache.any23.extractor.Extractor.ContentExtractor} able to process
+ * a <i>MS Excel 97-2007+</i> file format <i>.xls/.xlsx</i> and
+ * convert the detected content to triples.
+ * This extractor is based on
+ * <a href="http://poi.apache.org/spreadsheet/index.html">Apache POI-HSSF and POI-XSSF Java API</a>.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public class ExcelExtractor implements Extractor.ContentExtractor {
+
+    private static final Excel excel = Excel.getInstance();
+
+    private boolean stopAtFirstError = false;
+
+    public ExcelExtractor() {}
+
+    public boolean isStopAtFirstError() {
+        return stopAtFirstError;
+    }
+
+    @Override
+    public void setStopAtFirstError(boolean f) {
+        stopAtFirstError = f;
+    }
+
+    @Override
+    public ExtractorDescription getDescription() {
+        return ExcelExtractorFactory.getDescriptionInstance();
+    }
+
+    @Override
+    public void run(
+            ExtractionParameters extractionParameters,
+            ExtractionContext context,
+            InputStream in,
+            ExtractionResult er
+    ) throws IOException, ExtractionException {
+        try {
+            final IRI documentIRI = context.getDocumentIRI();
+            final Workbook workbook = createWorkbook(documentIRI, in);
+            processWorkbook(documentIRI, workbook, er);
+        } catch (Exception e) {
+            throw new ExtractionException("An error occurred while extracting MS Excel content.", e);
+        }
+    }
+
+    // TODO: this should be done by Tika, the extractors should be split.
+    private Workbook createWorkbook(IRI document, InputStream is) throws IOException {
+        final String documentIRI = document.toString();
+        if (documentIRI.endsWith(".xlsx")) {
+            return new XSSFWorkbook(is);
+        } else if (documentIRI.endsWith("xls")) {
+            return new HSSFWorkbook(is);
+        } else {
+            throw new IllegalArgumentException("Unsupported extension for resource [" + documentIRI + "]");
+        }
+    }
+
+    private void processWorkbook(IRI documentIRI, Workbook wb, ExtractionResult er) {
+        for (int sheetIndex = 0; sheetIndex < wb.getNumberOfSheets(); sheetIndex++) {
+            final Sheet sheet = wb.getSheetAt(sheetIndex);
+            final IRI sheetIRI = getSheetIRI(documentIRI, sheet);
+            er.writeTriple(documentIRI, excel.containsSheet, sheetIRI);
+            er.writeTriple(sheetIRI, RDF.TYPE, excel.sheet);
+            writeSheetMetadata(sheetIRI, sheet, er);
+            for (Row row : sheet) {
+                final IRI rowIRI = getRowIRI(sheetIRI, row);
+                er.writeTriple(sheetIRI, excel.containsRow, rowIRI);
+                er.writeTriple(rowIRI, RDF.TYPE, excel.row);
+                writeRowMetadata(rowIRI, row, er);
+                for (Cell cell : row) {
+                    writeCell(rowIRI, cell, er);
+                }
+            }
+        }
+    }
+
+    private void writeSheetMetadata(IRI sheetIRI, Sheet sheet, ExtractionResult er) {
+        final String sheetName   = sheet.getSheetName();
+        final int    firstRowNum = sheet.getFirstRowNum();
+        final int    lastRowNum  = sheet.getLastRowNum();
+        er.writeTriple(sheetIRI, excel.sheetName, RDFUtils.literal(sheetName));
+        er.writeTriple(sheetIRI, excel.firstRow, RDFUtils.literal(firstRowNum));
+        er.writeTriple(sheetIRI, excel.lastRow, RDFUtils.literal(lastRowNum));
+    }
+
+    private void writeRowMetadata(IRI rowIRI, Row row, ExtractionResult er) {
+        final int    firstCellNum = row.getFirstCellNum();
+        final int    lastCellNum  = row.getLastCellNum();
+        er.writeTriple(rowIRI, excel.firstCell , RDFUtils.literal(firstCellNum));
+        er.writeTriple(rowIRI, excel.lastCell  , RDFUtils.literal(lastCellNum ));
+    }
+
+    private void writeCell(IRI rowIRI, Cell cell, ExtractionResult er) {
+        final IRI cellType = cellTypeToType(cell.getCellType());
+        if (cellType == null)
+            return; // Skip unsupported cells.
+        final IRI cellIRI = getCellIRI(rowIRI, cell);
+        er.writeTriple(rowIRI, excel.containsCell, cellIRI);
+        er.writeTriple(cellIRI, RDF.TYPE, excel.cell);
+        er.writeTriple(
+                cellIRI,
+                excel.cellValue,
+                RDFUtils.literal(cell.getStringCellValue(), cellType)
+        );
+    }
+
+    private IRI getSheetIRI(IRI documentIRI, Sheet sheet) {
+        return RDFUtils.iri(documentIRI.toString() + "/sheet/" + sheet.getSheetName());
+    }
+
+    private IRI getRowIRI(IRI sheetIRI, Row row) {
+        return RDFUtils.iri(sheetIRI.toString() + "/" + row.getRowNum());
+    }
+
+    private IRI getCellIRI(IRI rowIRI, Cell cell) {
+        return RDFUtils.iri(rowIRI +
+		String.format("/%d/", cell.getColumnIndex()));
+    }
+
+    private IRI cellTypeToType(CellType cellType) {
+        final String postfix;
+        if (cellType == null) {
+            postfix = null;
+        } else {
+            switch (cellType) {
+                case STRING:
+                    postfix = "string";
+                    break;
+                case BOOLEAN:
+                    postfix = "boolean";
+                    break;
+                case NUMERIC:
+                    postfix = "numeric";
+                    break;
+                default:
+                    postfix = null;
+            }
+        }
+        return postfix == null ? null : RDFUtils.iri(excel.getNamespace().toString() + postfix);
+    }
+
+
+}
diff --git a/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractorFactory.java b/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractorFactory.java
new file mode 100644
index 0000000..2515bf0
--- /dev/null
+++ b/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractorFactory.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.plugin.officescraper;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.Prefixes;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+public class ExcelExtractorFactory extends SimpleExtractorFactory<ExcelExtractor> implements
+        ExtractorFactory<ExcelExtractor> {
+
+    public static final String NAME = "excel";
+    
+    public static final Prefixes PREFIXES = null;
+
+    private static final ExtractorDescription descriptionInstance = new ExcelExtractorFactory();
+    
+    public ExcelExtractorFactory() {
+        super(
+                ExcelExtractorFactory.NAME, 
+                ExcelExtractorFactory.PREFIXES,
+                Arrays.asList(
+                        "application/vnd.ms-excel;q=0.1",
+                        "application/msexcel;q=0.1",
+                        "application/x-msexcel;q=0.1",
+                        "application/x-ms-excel;q=0.1",
+                        "application/x-tika-ooxml;q=0.1",
+                        "application/x-tika-msoffice;q=0.1",
+                        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;q=0.1"
+                ),
+                null);
+    }
+    
+    @Override
+    public ExcelExtractor createExtractor() {
+        return new ExcelExtractor();
+    }
+
+    public static ExtractorDescription getDescriptionInstance() {
+        return descriptionInstance;
+    }
+}
diff --git a/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/package-info.java b/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/package-info.java
new file mode 100644
index 0000000..17fc3cc
--- /dev/null
+++ b/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * This package defines a set of {@link org.apache.any23.extractor.Extractor}s
+ * able to extract metadata from various <i>Microsoft Office</i> documents.
+ */
+package org.apache.any23.plugin.officescraper;
\ No newline at end of file
diff --git a/office-scraper/src/main/java/org/apache/any23/vocab/Excel.java b/office-scraper/src/main/java/org/apache/any23/vocab/Excel.java
new file mode 100644
index 0000000..7fc5bbc
--- /dev/null
+++ b/office-scraper/src/main/java/org/apache/any23/vocab/Excel.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.vocab;
+
+import org.eclipse.rdf4j.model.IRI;
+
+/**
+ * The <i>MS Excel</i> extractor vocabulary.
+ *
+ * @see org.apache.any23.plugin.officescraper.ExcelExtractor
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public class Excel extends Vocabulary {
+
+    public static final String SHEET = "sheet";
+    public static final String ROW   = "row";
+    public static final String CELL  = "cell";
+
+    public static final String CONTAINS_SHEET = "containsSheet";
+    public static final String CONTAINS_ROW   = "containsRow";
+    public static final String CONTAINS_CELL  = "containsCell";
+    public static final String CELL_VALUE     = "cellValue";
+
+
+    public static final String SHEET_NAME = "sheetName";
+    public static final String FIRST_ROW  = "firstRow";
+    public static final String LAST_ROW   = "lastRow";
+    public static final String FIRST_CELL = "firstCell";
+    public static final String LAST_CELL  = "lastCell";
+
+    /**
+     * This property links the identifier of a <i>document</i> to the identifier of a <i>sheet</i>.
+     */
+    public final IRI containsSheet = createProperty(CONTAINS_SHEET);
+
+    /**
+     * This property links the identifier of a <i>sheet</i> to the identifier of a <i>row</i>.
+     */
+    public final IRI containsRow = createProperty(CONTAINS_ROW);
+
+    /**
+     * This property links the identifier of a <i>row</i> to the identifier of a <i>cell</i>.
+     */
+    public final IRI containsCell = createProperty(CONTAINS_CELL);
+
+    /**
+     * This property links the identifier of a <i>Sheet</i> to the name of the sheet.
+     */
+    public final IRI sheetName = createProperty(SHEET_NAME);
+
+    /**
+     * This property links the identifier of a <i>Sheet</i> to the index of the first declared row.
+     */
+    public final IRI firstRow = createProperty(FIRST_ROW);
+
+    /**
+     * This property links the identifier of a <i>Sheet</i> to the index of the last declared row.
+     */
+    public final IRI lastRow = createProperty(LAST_ROW);
+
+    /**
+     * This property links the identifier of a <i>Row</i> to the index of the first declared cell.
+     */
+    public final IRI firstCell = createProperty(FIRST_CELL);
+
+    /**
+     * This property links the identifier of a <i>Row</i> to the index of the last declared cell.
+     */
+    public final IRI lastCell = createProperty(LAST_CELL);
+
+    /**
+     * This property links the identifier of a <i>cell</i> to the content of the cell.
+     */
+    public final IRI cellValue = createProperty(CELL_VALUE);
+
+
+    /**
+     * This resource identifies a <i>Sheet</i>.
+     */
+    public final IRI sheet = createResource(SHEET);
+
+    /**
+     * This resource identifies a <i>row</i>.
+     */
+    public final IRI row = createResource(ROW);
+
+    /**
+     * This resource identifies a <i>cell</i>.
+     */
+    public final IRI cell = createResource(CELL);
+
+    /**
+     * The namespace of the vocabulary as a string.
+     */
+    public static final String NS = "http://any23.apache.org/excel/";
+
+    private static final class InstanceHolder {
+        private static final Excel instance = new Excel();
+    }
+
+    public static Excel getInstance() {
+        return InstanceHolder.instance;
+    }
+
+    /**
+    *
+    * @param localName resource label to create
+    * @return the new IRI instance.
+    */
+    public IRI createResource(String localName) {
+        return createProperty(NS, localName);
+    }
+
+    /**
+     *
+     * @param localName property label to create
+     * @return the new IRI instance.
+     */
+    public IRI createProperty(String localName) {
+        return createProperty(NS, localName);
+    }
+
+    private Excel() {
+        super(NS);
+    }
+
+
+}
diff --git a/office-scraper/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory b/office-scraper/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory
new file mode 100644
index 0000000..1b4576c
--- /dev/null
+++ b/office-scraper/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory
@@ -0,0 +1 @@
+org.apache.any23.plugin.officescraper.ExcelExtractorFactory
\ No newline at end of file
diff --git a/office-scraper/src/test/java/org/apache/any23/plugin/officescraper/ExcelExtractorTest.java b/office-scraper/src/test/java/org/apache/any23/plugin/officescraper/ExcelExtractorTest.java
new file mode 100644
index 0000000..3edebbf
--- /dev/null
+++ b/office-scraper/src/test/java/org/apache/any23/plugin/officescraper/ExcelExtractorTest.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.plugin.officescraper;
+
+import org.apache.any23.extractor.ExtractionContext;
+import org.apache.any23.extractor.ExtractionException;
+import org.apache.any23.extractor.ExtractionParameters;
+import org.apache.any23.extractor.ExtractionResult;
+import org.apache.any23.extractor.ExtractionResultImpl;
+import org.apache.any23.rdf.RDFUtils;
+import org.apache.any23.vocab.Excel;
+import org.apache.any23.writer.CompositeTripleHandler;
+import org.apache.any23.writer.CountingTripleHandler;
+import org.apache.any23.writer.NTriplesWriter;
+import org.apache.any23.writer.TripleHandler;
+import org.apache.any23.writer.TripleHandlerException;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mockito;
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.vocabulary.RDF;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * Test case for {@link ExcelExtractor}.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public class ExcelExtractorTest {
+
+    private static final Logger logger = LoggerFactory.getLogger(ExcelExtractorTest.class);
+
+    private ExcelExtractor extractor;
+
+    @Before
+    public void setUp() {
+        extractor = new ExcelExtractorFactory().createExtractor();
+    }
+
+    @Test
+    public void testGetDescription() {
+        Assert.assertNotNull( extractor.getDescription() );
+    }
+
+    @Test
+    public void testExtractXLSX() throws IOException, ExtractionException, TripleHandlerException {
+        final String FILE = "test1-workbook.xlsx";
+        processFile(FILE);
+    }
+
+    @Test
+    public void testExtractXLS() throws IOException, ExtractionException, TripleHandlerException {
+        final String FILE = "test2-workbook.xls";
+        processFile(FILE);
+    }
+
+    private void processFile(String resource) throws IOException, ExtractionException, TripleHandlerException {
+        final ExtractionParameters extractionParameters = ExtractionParameters.newDefault();
+        final ExtractionContext extractionContext = new ExtractionContext(
+                extractor.getDescription().getExtractorName(),
+                RDFUtils.iri("file://" + resource)
+        );
+        final InputStream is = this.getClass().getResourceAsStream(resource);
+        final CompositeTripleHandler compositeTripleHandler = new CompositeTripleHandler();
+        final TripleHandler verifierTripleHandler = Mockito.mock(TripleHandler.class);
+        compositeTripleHandler.addChild(verifierTripleHandler);
+        final CountingTripleHandler countingTripleHandler = new CountingTripleHandler();
+        compositeTripleHandler.addChild(countingTripleHandler);
+        final ByteArrayOutputStream out = new ByteArrayOutputStream();
+        compositeTripleHandler.addChild( new NTriplesWriter(out) );
+        final ExtractionResult extractionResult = new ExtractionResultImpl(
+                extractionContext, extractor, compositeTripleHandler
+        );
+        extractor.run(extractionParameters, extractionContext, is, extractionResult);
+        compositeTripleHandler.close();
+        logger.debug(out.toString());
+
+        verifyPredicateOccurrence(verifierTripleHandler, Excel.getInstance().containsSheet, 2 );
+        verifyPredicateOccurrence(verifierTripleHandler, Excel.getInstance().containsRow  , 6 );
+        verifyPredicateOccurrence(verifierTripleHandler, Excel.getInstance().containsCell , 18);
+
+        verifyTypeOccurrence(verifierTripleHandler, Excel.getInstance().sheet, 2 );
+        verifyTypeOccurrence(verifierTripleHandler, Excel.getInstance().row  , 6 );
+        verifyTypeOccurrence(verifierTripleHandler, Excel.getInstance().cell , 18);
+    }
+
+    private void verifyPredicateOccurrence(TripleHandler mock, IRI predicate, int occurrence)
+    throws TripleHandlerException {
+        Mockito.verify( mock, Mockito.times(occurrence)).receiveTriple(
+                Mockito.any(),
+                Mockito.eq(predicate),
+                Mockito.any(),
+                Mockito.any(),
+                Mockito.any()
+        );
+    }
+
+    private void verifyTypeOccurrence(TripleHandler mock, IRI type, int occurrence)
+    throws TripleHandlerException {
+        Mockito.verify( mock, Mockito.times(occurrence)).receiveTriple(
+                Mockito.any(),
+                Mockito.eq(RDF.TYPE),
+                Mockito.eq(type),
+                Mockito.any(),
+                Mockito.any()
+        );
+    }
+
+}
diff --git a/office-scraper/src/test/java/org/apache/any23/plugin/officescraper/XSSFWorkbookTest.java b/office-scraper/src/test/java/org/apache/any23/plugin/officescraper/XSSFWorkbookTest.java
new file mode 100644
index 0000000..ebb8da1
--- /dev/null
+++ b/office-scraper/src/test/java/org/apache/any23/plugin/officescraper/XSSFWorkbookTest.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.plugin.officescraper;
+
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
+import org.apache.poi.ss.usermodel.Cell;
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.ss.usermodel.Workbook;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.junit.Assert;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * General test to verify usability of the {@link XSSFWorkbook} class.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public class XSSFWorkbookTest {
+
+    private static final Logger logger = LoggerFactory.getLogger(XSSFWorkbookTest.class);
+
+    @Test
+    public void testXLSXFormatAccess() throws IOException {
+        verifyResource("test1-workbook.xlsx");
+    }
+
+    @Test
+    public void testXLSFormatAccess() throws IOException {
+        verifyResource("test2-workbook.xls");
+    }
+
+    private void verifyResource(String resource) throws IOException {
+        final InputStream document = this.getClass().getResourceAsStream(resource);
+        final Workbook wb;
+        if(resource.endsWith(".xlsx")) {
+            wb = new XSSFWorkbook(document);
+        } else if(resource.endsWith("xls")) {
+            wb = new HSSFWorkbook(document);
+        } else {
+            throw new IllegalArgumentException("Unsupported extension for resource " + resource);
+        }
+        Assert.assertEquals(2, wb.getNumberOfSheets());
+        Sheet sheet;
+        for (int sheetIndex = 0; sheetIndex < wb.getNumberOfSheets(); sheetIndex++) {
+            sheet = wb.getSheetAt(sheetIndex);
+            int rowcount = 0;
+            for (Row row : sheet) {
+                rowcount++;
+                int cellcount = 0;
+                for (Cell cell : row) {
+                    cellcount++;
+                    logger.debug(
+                            String.format(
+                                    "cell [%d, %d]: %s",
+                                    cell.getRowIndex(),
+                                    cell.getColumnIndex(),
+                                    cell.getStringCellValue()
+                            )
+                    );
+                    verifyContent(sheetIndex, cell.getRowIndex(), cell.getColumnIndex(), cell.getStringCellValue());
+                }
+                Assert.assertEquals(3, cellcount);
+            }
+            Assert.assertEquals(3, rowcount);
+        }
+    }
+
+    private void verifyContent(int sheet, int row, int col, String content) {
+        Assert.assertEquals(
+                String.format("%s %d.%d", sheet == 0 ? "a" : "b", row + 1, col + 1),
+                content
+        );
+    }
+
+}
diff --git a/office-scraper/src/test/resources/log4j.properties b/office-scraper/src/test/resources/log4j.properties
new file mode 100644
index 0000000..1a9ad34
--- /dev/null
+++ b/office-scraper/src/test/resources/log4j.properties
@@ -0,0 +1,34 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+log4j.rootCategory=INFO, O  
+      
+# Stdout  
+log4j.appender.O=org.apache.log4j.ConsoleAppender  
+      
+# File  
+#log4j.appender.R=org.apache.log4j.RollingFileAppender  
+#log4j.appender.R.File=log4j.log  
+      
+# Control the maximum log file size  
+#log4j.appender.R.MaxFileSize=100KB  
+      
+# Archive log files (one backup file here)  
+log4j.appender.R.MaxBackupIndex=1  
+      
+log4j.appender.R.layout=org.apache.log4j.PatternLayout  
+log4j.appender.O.layout=org.apache.log4j.PatternLayout  
+      
+log4j.appender.R.layout.ConversionPattern=[%d{ISO8601}]%5p%6.6r[%t]%x - %C.%M(%F:%L) - %m%n  
+log4j.appender.O.layout.ConversionPattern=[%d{ISO8601}]%5p%6.6r[%t]%x - %C.%M(%F:%L) - %m%n  
diff --git a/office-scraper/src/test/resources/org/apache/any23/plugin/officescraper/test1-workbook.xlsx b/office-scraper/src/test/resources/org/apache/any23/plugin/officescraper/test1-workbook.xlsx
new file mode 100644
index 0000000..4cb187f
Binary files /dev/null and b/office-scraper/src/test/resources/org/apache/any23/plugin/officescraper/test1-workbook.xlsx differ
diff --git a/office-scraper/src/test/resources/org/apache/any23/plugin/officescraper/test2-workbook.xls b/office-scraper/src/test/resources/org/apache/any23/plugin/officescraper/test2-workbook.xls
new file mode 100644
index 0000000..cb385a1
Binary files /dev/null and b/office-scraper/src/test/resources/org/apache/any23/plugin/officescraper/test2-workbook.xls differ
diff --git a/openie/.classpath b/openie/.classpath
new file mode 100755
index 0000000..5c6985f
--- /dev/null
+++ b/openie/.classpath
@@ -0,0 +1,239 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+  <classpathentry kind="src" path="src/test/java" output="target/test-classes" including="**/*.java"/>
+  <classpathentry kind="src" path="src/main/java" including="**/*.java"/>
+  <classpathentry kind="src" path="src/main/resources" excluding="**/*.java"/>
+  <classpathentry kind="src" path="target/maven-shared-archive-resources" excluding="**/*.java"/>
+  <classpathentry kind="output" path="target/classes"/>
+  <classpathentry kind="var" path="M2_REPO/javax/xml/bind/jaxb-api/2.3.0/jaxb-api-2.3.0.jar" sourcepath="M2_REPO/javax/xml/bind/jaxb-api/2.3.0/jaxb-api-2.3.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/javax/annotation/javax.annotation-api/1.3.2/javax.annotation-api-1.3.2.jar" sourcepath="M2_REPO/javax/annotation/javax.annotation-api/1.3.2/javax.annotation-api-1.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/javax/measure/unit-api/1.0/unit-api-1.0.jar" sourcepath="M2_REPO/javax/measure/unit-api/1.0/unit-api-1.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/javax/inject/javax.inject/1/javax.inject-1.jar" sourcepath="M2_REPO/javax/inject/javax.inject/1/javax.inject-1-sources.jar"/>
+  <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8"/>
+  <classpathentry kind="src" path="/apache-any23-core"/>
+  <classpathentry kind="src" path="/apache-any23-api"/>
+  <classpathentry kind="var" path="M2_REPO/org/slf4j/slf4j-api/1.7.28/slf4j-api-1.7.28.jar" sourcepath="M2_REPO/org/slf4j/slf4j-api/1.7.28/slf4j-api-1.7.28-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-model/3.0.0/rdf4j-model-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-model/3.0.0/rdf4j-model-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-util/3.0.0/rdf4j-util-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-util/3.0.0/rdf4j-util-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-api/3.0.0/rdf4j-rio-api-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-api/3.0.0/rdf4j-rio-api-3.0.0-sources.jar"/>
+  <classpathentry kind="src" path="/apache-any23-csvutils"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/commons/commons-csv/1.6/commons-csv-1.6.jar" sourcepath="M2_REPO/org/apache/commons/commons-csv/1.6/commons-csv-1.6-sources.jar"/>
+  <classpathentry kind="src" path="/apache-any23-mime"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-turtle/3.0.0/rdf4j-rio-turtle-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-turtle/3.0.0/rdf4j-rio-turtle-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-datatypes/3.0.0/rdf4j-rio-datatypes-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-datatypes/3.0.0/rdf4j-rio-datatypes-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-languages/3.0.0/rdf4j-rio-languages-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-languages/3.0.0/rdf4j-rio-languages-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/commons-io/commons-io/2.6/commons-io-2.6.jar" sourcepath="M2_REPO/commons-io/commons-io/2.6/commons-io-2.6-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-ntriples/3.0.0/rdf4j-rio-ntriples-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-ntriples/3.0.0/rdf4j-rio-ntriples-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-n3/3.0.0/rdf4j-rio-n3-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-n3/3.0.0/rdf4j-rio-n3-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-nquads/3.0.0/rdf4j-rio-nquads-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-nquads/3.0.0/rdf4j-rio-nquads-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/tika/tika-core/1.22/tika-core-1.22.jar" sourcepath="M2_REPO/org/apache/tika/tika-core/1.22/tika-core-1.22-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/tika/tika-parsers/1.22/tika-parsers-1.22.jar" sourcepath="M2_REPO/org/apache/tika/tika-parsers/1.22/tika-parsers-1.22-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/glassfish/jaxb/jaxb-runtime/2.3.2/jaxb-runtime-2.3.2.jar" sourcepath="M2_REPO/org/glassfish/jaxb/jaxb-runtime/2.3.2/jaxb-runtime-2.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/jakarta/xml/bind/jakarta.xml.bind-api/2.3.2/jakarta.xml.bind-api-2.3.2.jar" sourcepath="M2_REPO/jakarta/xml/bind/jakarta.xml.bind-api/2.3.2/jakarta.xml.bind-api-2.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/glassfish/jaxb/txw2/2.3.2/txw2-2.3.2.jar" sourcepath="M2_REPO/org/glassfish/jaxb/txw2/2.3.2/txw2-2.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/sun/istack/istack-commons-runtime/3.0.8/istack-commons-runtime-3.0.8.jar" sourcepath="M2_REPO/com/sun/istack/istack-commons-runtime/3.0.8/istack-commons-runtime-3.0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/jvnet/staxex/stax-ex/1.8.1/stax-ex-1.8.1.jar" sourcepath="M2_REPO/org/jvnet/staxex/stax-ex/1.8.1/stax-ex-1.8.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/sun/xml/fastinfoset/FastInfoset/1.2.16/FastInfoset-1.2.16.jar" sourcepath="M2_REPO/com/sun/xml/fastinfoset/FastInfoset/1.2.16/FastInfoset-1.2.16-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/sun/activation/jakarta.activation/1.2.1/jakarta.activation-1.2.1.jar" sourcepath="M2_REPO/com/sun/activation/jakarta.activation/1.2.1/jakarta.activation-1.2.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/xerces/xercesImpl/2.12.0/xercesImpl-2.12.0.jar" sourcepath="M2_REPO/xerces/xercesImpl/2.12.0/xercesImpl-2.12.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/xml-apis/xml-apis/1.4.01/xml-apis-1.4.01.jar" sourcepath="M2_REPO/xml-apis/xml-apis/1.4.01/xml-apis-1.4.01-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/commons/commons-lang3/3.9/commons-lang3-3.9.jar" sourcepath="M2_REPO/org/apache/commons/commons-lang3/3.9/commons-lang3-3.9-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/gagravarr/vorbis-java-tika/0.8/vorbis-java-tika-0.8.jar" sourcepath="M2_REPO/org/gagravarr/vorbis-java-tika/0.8/vorbis-java-tika-0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/tallison/jmatio/1.5/jmatio-1.5.jar" sourcepath="M2_REPO/org/tallison/jmatio/1.5/jmatio-1.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/james/apache-mime4j-core/0.8.3/apache-mime4j-core-0.8.3.jar" sourcepath="M2_REPO/org/apache/james/apache-mime4j-core/0.8.3/apache-mime4j-core-0.8.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/james/apache-mime4j-dom/0.8.3/apache-mime4j-dom-0.8.3.jar" sourcepath="M2_REPO/org/apache/james/apache-mime4j-dom/0.8.3/apache-mime4j-dom-0.8.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/commons/commons-compress/1.18/commons-compress-1.18.jar" sourcepath="M2_REPO/org/apache/commons/commons-compress/1.18/commons-compress-1.18-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/tukaani/xz/1.8/xz-1.8.jar" sourcepath="M2_REPO/org/tukaani/xz/1.8/xz-1.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/epam/parso/2.0.11/parso-2.0.11.jar" sourcepath="M2_REPO/com/epam/parso/2.0.11/parso-2.0.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/brotli/dec/0.1.2/dec-0.1.2.jar" sourcepath="M2_REPO/org/brotli/dec/0.1.2/dec-0.1.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/commons-codec/commons-codec/1.11/commons-codec-1.11.jar" sourcepath="M2_REPO/commons-codec/commons-codec/1.11/commons-codec-1.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/pdfbox/pdfbox/2.0.16/pdfbox-2.0.16.jar" sourcepath="M2_REPO/org/apache/pdfbox/pdfbox/2.0.16/pdfbox-2.0.16-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/pdfbox/fontbox/2.0.16/fontbox-2.0.16.jar" sourcepath="M2_REPO/org/apache/pdfbox/fontbox/2.0.16/fontbox-2.0.16-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/pdfbox/pdfbox-tools/2.0.16/pdfbox-tools-2.0.16.jar" sourcepath="M2_REPO/org/apache/pdfbox/pdfbox-tools/2.0.16/pdfbox-tools-2.0.16-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/pdfbox/jempbox/1.8.16/jempbox-1.8.16.jar" sourcepath="M2_REPO/org/apache/pdfbox/jempbox/1.8.16/jempbox-1.8.16-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/bouncycastle/bcmail-jdk15on/1.62/bcmail-jdk15on-1.62.jar" sourcepath="M2_REPO/org/bouncycastle/bcmail-jdk15on/1.62/bcmail-jdk15on-1.62-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/bouncycastle/bcprov-jdk15on/1.62/bcprov-jdk15on-1.62.jar" sourcepath="M2_REPO/org/bouncycastle/bcprov-jdk15on/1.62/bcprov-jdk15on-1.62-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/bouncycastle/bcpkix-jdk15on/1.62/bcpkix-jdk15on-1.62.jar" sourcepath="M2_REPO/org/bouncycastle/bcpkix-jdk15on/1.62/bcpkix-jdk15on-1.62-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/poi/poi/4.1.0/poi-4.1.0.jar" sourcepath="M2_REPO/org/apache/poi/poi/4.1.0/poi-4.1.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/commons/commons-collections4/4.3/commons-collections4-4.3.jar" sourcepath="M2_REPO/org/apache/commons/commons-collections4/4.3/commons-collections4-4.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/commons/commons-math3/3.6.1/commons-math3-3.6.1.jar" sourcepath="M2_REPO/org/apache/commons/commons-math3/3.6.1/commons-math3-3.6.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/poi/poi-scratchpad/4.1.0/poi-scratchpad-4.1.0.jar" sourcepath="M2_REPO/org/apache/poi/poi-scratchpad/4.1.0/poi-scratchpad-4.1.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/poi/poi-ooxml/4.1.0/poi-ooxml-4.1.0.jar" sourcepath="M2_REPO/org/apache/poi/poi-ooxml/4.1.0/poi-ooxml-4.1.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/poi/poi-ooxml-schemas/4.1.0/poi-ooxml-schemas-4.1.0.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/xmlbeans/xmlbeans/3.1.0/xmlbeans-3.1.0.jar" sourcepath="M2_REPO/org/apache/xmlbeans/xmlbeans/3.1.0/xmlbeans-3.1.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/github/virtuald/curvesapi/1.06/curvesapi-1.06.jar" sourcepath="M2_REPO/com/github/virtuald/curvesapi/1.06/curvesapi-1.06-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/healthmarketscience/jackcess/jackcess/3.0.1/jackcess-3.0.1.jar" sourcepath="M2_REPO/com/healthmarketscience/jackcess/jackcess/3.0.1/jackcess-3.0.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/healthmarketscience/jackcess/jackcess-encrypt/3.0.0/jackcess-encrypt-3.0.0.jar" sourcepath="M2_REPO/com/healthmarketscience/jackcess/jackcess-encrypt/3.0.0/jackcess-encrypt-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/ccil/cowan/tagsoup/tagsoup/1.2.1/tagsoup-1.2.1.jar" sourcepath="M2_REPO/org/ccil/cowan/tagsoup/tagsoup/1.2.1/tagsoup-1.2.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/ow2/asm/asm/7.2-beta/asm-7.2-beta.jar" sourcepath="M2_REPO/org/ow2/asm/asm/7.2-beta/asm-7.2-beta-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/googlecode/mp4parser/isoparser/1.1.22/isoparser-1.1.22.jar" sourcepath="M2_REPO/com/googlecode/mp4parser/isoparser/1.1.22/isoparser-1.1.22-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/drewnoakes/metadata-extractor/2.11.0/metadata-extractor-2.11.0.jar" sourcepath="M2_REPO/com/drewnoakes/metadata-extractor/2.11.0/metadata-extractor-2.11.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/adobe/xmp/xmpcore/5.1.3/xmpcore-5.1.3.jar" sourcepath="M2_REPO/com/adobe/xmp/xmpcore/5.1.3/xmpcore-5.1.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/de/l3s/boilerpipe/boilerpipe/1.1.0/boilerpipe-1.1.0.jar" sourcepath="M2_REPO/de/l3s/boilerpipe/boilerpipe/1.1.0/boilerpipe-1.1.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/rometools/rome/1.12.1/rome-1.12.1.jar" sourcepath="M2_REPO/com/rometools/rome/1.12.1/rome-1.12.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/rometools/rome-utils/1.12.1/rome-utils-1.12.1.jar" sourcepath="M2_REPO/com/rometools/rome-utils/1.12.1/rome-utils-1.12.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/jdom/jdom2/2.0.6/jdom2-2.0.6.jar" sourcepath="M2_REPO/org/jdom/jdom2/2.0.6/jdom2-2.0.6-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/gagravarr/vorbis-java-core/0.8/vorbis-java-core-0.8.jar" sourcepath="M2_REPO/org/gagravarr/vorbis-java-core/0.8/vorbis-java-core-0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/googlecode/juniversalchardet/juniversalchardet/1.0.3/juniversalchardet-1.0.3.jar" sourcepath="M2_REPO/com/googlecode/juniversalchardet/juniversalchardet/1.0.3/juniversalchardet-1.0.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/codelibs/jhighlight/1.0.3/jhighlight-1.0.3.jar" sourcepath="M2_REPO/org/codelibs/jhighlight/1.0.3/jhighlight-1.0.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/pff/java-libpst/0.8.1/java-libpst-0.8.1.jar" sourcepath="M2_REPO/com/pff/java-libpst/0.8.1/java-libpst-0.8.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/github/junrar/junrar/4.0.0/junrar-4.0.0.jar" sourcepath="M2_REPO/com/github/junrar/junrar/4.0.0/junrar-4.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/cxf/cxf-rt-rs-client/3.3.2/cxf-rt-rs-client-3.3.2.jar" sourcepath="M2_REPO/org/apache/cxf/cxf-rt-rs-client/3.3.2/cxf-rt-rs-client-3.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/cxf/cxf-rt-transports-http/3.3.2/cxf-rt-transports-http-3.3.2.jar" sourcepath="M2_REPO/org/apache/cxf/cxf-rt-transports-http/3.3.2/cxf-rt-transports-http-3.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/cxf/cxf-core/3.3.2/cxf-core-3.3.2.jar" sourcepath="M2_REPO/org/apache/cxf/cxf-core/3.3.2/cxf-core-3.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/fasterxml/woodstox/woodstox-core/5.0.3/woodstox-core-5.0.3.jar" sourcepath="M2_REPO/com/fasterxml/woodstox/woodstox-core/5.0.3/woodstox-core-5.0.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/codehaus/woodstox/stax2-api/3.1.4/stax2-api-3.1.4.jar" sourcepath="M2_REPO/org/codehaus/woodstox/stax2-api/3.1.4/stax2-api-3.1.4-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/ws/xmlschema/xmlschema-core/2.2.4/xmlschema-core-2.2.4.jar" sourcepath="M2_REPO/org/apache/ws/xmlschema/xmlschema-core/2.2.4/xmlschema-core-2.2.4-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/glassfish/jaxb/jaxb-xjc/2.3.2/jaxb-xjc-2.3.2.jar" sourcepath="M2_REPO/org/glassfish/jaxb/jaxb-xjc/2.3.2/jaxb-xjc-2.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/glassfish/jaxb/xsom/2.3.2/xsom-2.3.2.jar" sourcepath="M2_REPO/org/glassfish/jaxb/xsom/2.3.2/xsom-2.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/sun/xml/bind/external/relaxng-datatype/2.3.2/relaxng-datatype-2.3.2.jar" sourcepath="M2_REPO/com/sun/xml/bind/external/relaxng-datatype/2.3.2/relaxng-datatype-2.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/glassfish/jaxb/codemodel/2.3.2/codemodel-2.3.2.jar" sourcepath="M2_REPO/org/glassfish/jaxb/codemodel/2.3.2/codemodel-2.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/sun/xml/bind/external/rngom/2.3.2/rngom-2.3.2.jar" sourcepath="M2_REPO/com/sun/xml/bind/external/rngom/2.3.2/rngom-2.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/sun/xml/dtd-parser/dtd-parser/1.4.1/dtd-parser-1.4.1.jar" sourcepath="M2_REPO/com/sun/xml/dtd-parser/dtd-parser/1.4.1/dtd-parser-1.4.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/sun/istack/istack-commons-tools/3.0.8/istack-commons-tools-3.0.8.jar" sourcepath="M2_REPO/com/sun/istack/istack-commons-tools/3.0.8/istack-commons-tools-3.0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/ant/ant/1.10.5/ant-1.10.5.jar" sourcepath="M2_REPO/org/apache/ant/ant/1.10.5/ant-1.10.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/ant/ant-launcher/1.10.5/ant-launcher-1.10.5.jar" sourcepath="M2_REPO/org/apache/ant/ant-launcher/1.10.5/ant-launcher-1.10.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/cxf/cxf-rt-frontend-jaxrs/3.3.2/cxf-rt-frontend-jaxrs-3.3.2.jar" sourcepath="M2_REPO/org/apache/cxf/cxf-rt-frontend-jaxrs/3.3.2/cxf-rt-frontend-jaxrs-3.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/jakarta/ws/rs/jakarta.ws.rs-api/2.1.5/jakarta.ws.rs-api-2.1.5.jar" sourcepath="M2_REPO/jakarta/ws/rs/jakarta.ws.rs-api/2.1.5/jakarta.ws.rs-api-2.1.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/cxf/cxf-rt-security/3.3.2/cxf-rt-security-3.3.2.jar" sourcepath="M2_REPO/org/apache/cxf/cxf-rt-security/3.3.2/cxf-rt-security-3.3.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/commons/commons-exec/1.3/commons-exec-1.3.jar" sourcepath="M2_REPO/org/apache/commons/commons-exec/1.3/commons-exec-1.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/opennlp/opennlp-tools/1.5.3/opennlp-tools-1.5.3.jar" sourcepath="M2_REPO/org/apache/opennlp/opennlp-tools/1.5.3/opennlp-tools-1.5.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/googlecode/json-simple/json-simple/1.1.1/json-simple-1.1.1.jar" sourcepath="M2_REPO/com/googlecode/json-simple/json-simple/1.1.1/json-simple-1.1.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/github/openjson/openjson/1.0.11/openjson-1.0.11.jar" sourcepath="M2_REPO/com/github/openjson/openjson/1.0.11/openjson-1.0.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/google/code/gson/gson/2.8.5/gson-2.8.5.jar" sourcepath="M2_REPO/com/google/code/gson/gson/2.8.5/gson-2.8.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/slf4j/jul-to-slf4j/1.7.28/jul-to-slf4j-1.7.28.jar" sourcepath="M2_REPO/org/slf4j/jul-to-slf4j/1.7.28/jul-to-slf4j-1.7.28-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/slf4j/jcl-over-slf4j/1.7.28/jcl-over-slf4j-1.7.28.jar" sourcepath="M2_REPO/org/slf4j/jcl-over-slf4j/1.7.28/jcl-over-slf4j-1.7.28-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/edu/ucar/netcdf4/4.5.5/netcdf4-4.5.5.jar" sourcepath="M2_REPO/edu/ucar/netcdf4/4.5.5/netcdf4-4.5.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/edu/ucar/cdm/4.5.5/cdm-4.5.5.jar" sourcepath="M2_REPO/edu/ucar/cdm/4.5.5/cdm-4.5.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/edu/ucar/udunits/4.5.5/udunits-4.5.5.jar" sourcepath="M2_REPO/edu/ucar/udunits/4.5.5/udunits-4.5.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/joda-time/joda-time/2.2/joda-time-2.2.jar" sourcepath="M2_REPO/joda-time/joda-time/2.2/joda-time-2.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/jcip/jcip-annotations/1.0/jcip-annotations-1.0.jar" sourcepath="M2_REPO/net/jcip/jcip-annotations/1.0/jcip-annotations-1.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/edu/ucar/httpservices/4.5.5/httpservices-4.5.5.jar" sourcepath="M2_REPO/edu/ucar/httpservices/4.5.5/httpservices-4.5.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/httpcomponents/httpclient/4.5.10/httpclient-4.5.10.jar" sourcepath="M2_REPO/org/apache/httpcomponents/httpclient/4.5.10/httpclient-4.5.10-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/httpcomponents/httpcore/4.4.12/httpcore-4.4.12.jar" sourcepath="M2_REPO/org/apache/httpcomponents/httpcore/4.4.12/httpcore-4.4.12-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/httpcomponents/httpmime/4.5.10/httpmime-4.5.10.jar" sourcepath="M2_REPO/org/apache/httpcomponents/httpmime/4.5.10/httpmime-4.5.10-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/quartz-scheduler/quartz/2.2.0/quartz-2.2.0.jar" sourcepath="M2_REPO/org/quartz-scheduler/quartz/2.2.0/quartz-2.2.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/google/protobuf/protobuf-java/3.9.0/protobuf-java-3.9.0.jar" sourcepath="M2_REPO/com/google/protobuf/protobuf-java/3.9.0/protobuf-java-3.9.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sf/ehcache/ehcache-core/2.6.2/ehcache-core-2.6.2.jar" sourcepath="M2_REPO/net/sf/ehcache/ehcache-core/2.6.2/ehcache-core-2.6.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/itadaki/bzip2/0.9.1/bzip2-0.9.1.jar" sourcepath="M2_REPO/org/itadaki/bzip2/0.9.1/bzip2-0.9.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/beust/jcommander/1.72/jcommander-1.72.jar" sourcepath="M2_REPO/com/beust/jcommander/1.72/jcommander-1.72-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/google/guava/guava/15.0/guava-15.0.jar" sourcepath="M2_REPO/com/google/guava/guava/15.0/guava-15.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/google/code/findbugs/jsr305/2.0.1/jsr305-2.0.1.jar"/>
+  <classpathentry kind="var" path="M2_REPO/edu/ucar/grib/4.5.5/grib-4.5.5.jar" sourcepath="M2_REPO/edu/ucar/grib/4.5.5/grib-4.5.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/java/dev/jna/jna/5.3.1/jna-5.3.1.jar" sourcepath="M2_REPO/net/java/dev/jna/jna/5.3.1/jna-5.3.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/jsoup/jsoup/1.12.1/jsoup-1.12.1.jar" sourcepath="M2_REPO/org/jsoup/jsoup/1.12.1/jsoup-1.12.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/mchange/c3p0/0.9.5.4/c3p0-0.9.5.4.jar" sourcepath="M2_REPO/com/mchange/c3p0/0.9.5.4/c3p0-0.9.5.4-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/mchange/mchange-commons-java/0.2.15/mchange-commons-java-0.2.15.jar" sourcepath="M2_REPO/com/mchange/mchange-commons-java/0.2.15/mchange-commons-java-0.2.15-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/sis/core/sis-utility/0.8/sis-utility-0.8.jar" sourcepath="M2_REPO/org/apache/sis/core/sis-utility/0.8/sis-utility-0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/opengis/geoapi/3.0.1/geoapi-3.0.1.jar" sourcepath="M2_REPO/org/opengis/geoapi/3.0.1/geoapi-3.0.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/sis/storage/sis-netcdf/0.8/sis-netcdf-0.8.jar" sourcepath="M2_REPO/org/apache/sis/storage/sis-netcdf/0.8/sis-netcdf-0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/sis/storage/sis-storage/0.8/sis-storage-0.8.jar" sourcepath="M2_REPO/org/apache/sis/storage/sis-storage/0.8/sis-storage-0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/sis/core/sis-feature/0.8/sis-feature-0.8.jar" sourcepath="M2_REPO/org/apache/sis/core/sis-feature/0.8/sis-feature-0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/sis/core/sis-metadata/0.8/sis-metadata-0.8.jar" sourcepath="M2_REPO/org/apache/sis/core/sis-metadata/0.8/sis-metadata-0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/sis/core/sis-referencing/0.8/sis-referencing-0.8.jar" sourcepath="M2_REPO/org/apache/sis/core/sis-referencing/0.8/sis-referencing-0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/edu/usc/ir/sentiment-analysis-parser/0.1/sentiment-analysis-parser-0.1.jar" sourcepath="M2_REPO/edu/usc/ir/sentiment-analysis-parser/0.1/sentiment-analysis-parser-0.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/fasterxml/jackson/core/jackson-core/2.9.10/jackson-core-2.9.10.jar" sourcepath="M2_REPO/com/fasterxml/jackson/core/jackson-core/2.9.10/jackson-core-2.9.10-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/fasterxml/jackson/core/jackson-databind/2.9.10/jackson-databind-2.9.10.jar" sourcepath="M2_REPO/com/fasterxml/jackson/core/jackson-databind/2.9.10/jackson-databind-2.9.10-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/fasterxml/jackson/core/jackson-annotations/2.9.10/jackson-annotations-2.9.10.jar" sourcepath="M2_REPO/com/fasterxml/jackson/core/jackson-annotations/2.9.10/jackson-annotations-2.9.10-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/pdfbox/jbig2-imageio/3.0.2/jbig2-imageio-3.0.2.jar" sourcepath="M2_REPO/org/apache/pdfbox/jbig2-imageio/3.0.2/jbig2-imageio-3.0.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/github/jai-imageio/jai-imageio-core/1.4.0/jai-imageio-core-1.4.0.jar" sourcepath="M2_REPO/com/github/jai-imageio/jai-imageio-core/1.4.0/jai-imageio-core-1.4.0-sources.jar"/>
+  <classpathentry kind="src" path="/apache-any23-encoding"/>
+  <classpathentry kind="var" path="M2_REPO/org/rypt/f8/1.1/f8-1.1.jar" sourcepath="M2_REPO/org/rypt/f8/1.1/f8-1.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/httpcomponents/httpclient-cache/4.5.10/httpclient-cache-4.5.10.jar" sourcepath="M2_REPO/org/apache/httpcomponents/httpclient-cache/4.5.10/httpclient-cache-4.5.10-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sf/biweekly/biweekly/0.6.3/biweekly-0.6.3.jar" sourcepath="M2_REPO/net/sf/biweekly/biweekly/0.6.3/biweekly-0.6.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/github/mangstadt/vinnie/2.0.2/vinnie-2.0.2.jar" sourcepath="M2_REPO/com/github/mangstadt/vinnie/2.0.2/vinnie-2.0.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-binary/3.0.0/rdf4j-rio-binary-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-binary/3.0.0/rdf4j-rio-binary-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-rdfjson/3.0.0/rdf4j-rio-rdfjson-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-rdfjson/3.0.0/rdf4j-rio-rdfjson-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-rdfxml/3.0.0/rdf4j-rio-rdfxml-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-rdfxml/3.0.0/rdf4j-rio-rdfxml-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-trix/3.0.0/rdf4j-rio-trix-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-trix/3.0.0/rdf4j-rio-trix-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-trig/3.0.0/rdf4j-rio-trig-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-trig/3.0.0/rdf4j-rio-trig-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-jsonld/3.0.0/rdf4j-rio-jsonld-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-rio-jsonld/3.0.0/rdf4j-rio-jsonld-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/github/jsonld-java/jsonld-java/0.12.5/jsonld-java-0.12.5.jar" sourcepath="M2_REPO/com/github/jsonld-java/jsonld-java/0.12.5/jsonld-java-0.12.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-repository-sail/3.0.0/rdf4j-repository-sail-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-repository-sail/3.0.0/rdf4j-repository-sail-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-repository-api/3.0.0/rdf4j-repository-api-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-repository-api/3.0.0/rdf4j-repository-api-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-query/3.0.0/rdf4j-query-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-query/3.0.0/rdf4j-query-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-sail-api/3.0.0/rdf4j-sail-api-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-sail-api/3.0.0/rdf4j-sail-api-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-queryalgebra-model/3.0.0/rdf4j-queryalgebra-model-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-queryalgebra-model/3.0.0/rdf4j-queryalgebra-model-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-http-client/3.0.0/rdf4j-http-client-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-http-client/3.0.0/rdf4j-http-client-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-http-protocol/3.0.0/rdf4j-http-protocol-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-http-protocol/3.0.0/rdf4j-http-protocol-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-queryresultio-api/3.0.0/rdf4j-queryresultio-api-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-queryresultio-api/3.0.0/rdf4j-queryresultio-api-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-queryresultio-binary/3.0.0/rdf4j-queryresultio-binary-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-queryresultio-binary/3.0.0/rdf4j-queryresultio-binary-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-queryparser-api/3.0.0/rdf4j-queryparser-api-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-queryparser-api/3.0.0/rdf4j-queryparser-api-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-queryalgebra-evaluation/3.0.0/rdf4j-queryalgebra-evaluation-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-queryalgebra-evaluation/3.0.0/rdf4j-queryalgebra-evaluation-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-repository-sparql/3.0.0/rdf4j-repository-sparql-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-repository-sparql/3.0.0/rdf4j-repository-sparql-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-queryparser-sparql/3.0.0/rdf4j-queryparser-sparql-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-queryparser-sparql/3.0.0/rdf4j-queryparser-sparql-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-queryresultio-sparqlxml/3.0.0/rdf4j-queryresultio-sparqlxml-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-queryresultio-sparqlxml/3.0.0/rdf4j-queryresultio-sparqlxml-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/mapdb/mapdb/1.0.8/mapdb-1.0.8.jar" sourcepath="M2_REPO/org/mapdb/mapdb/1.0.8/mapdb-1.0.8-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-sail-memory/3.0.0/rdf4j-sail-memory-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-sail-memory/3.0.0/rdf4j-sail-memory-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/rdf4j/rdf4j-sail-base/3.0.0/rdf4j-sail-base-3.0.0.jar" sourcepath="M2_REPO/org/eclipse/rdf4j/rdf4j-sail-base/3.0.0/rdf4j-sail-base-3.0.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/semarglproject/semargl-rdf4j/0.7/semargl-rdf4j-0.7.jar" sourcepath="M2_REPO/org/semarglproject/semargl-rdf4j/0.7/semargl-rdf4j-0.7-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/semarglproject/semargl-core/0.7/semargl-core-0.7.jar" sourcepath="M2_REPO/org/semarglproject/semargl-core/0.7/semargl-core-0.7-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/semarglproject/semargl-rdfa/0.7/semargl-rdfa-0.7.jar" sourcepath="M2_REPO/org/semarglproject/semargl-rdfa/0.7/semargl-rdfa-0.7-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/semarglproject/semargl-rdf/0.7/semargl-rdf-0.7.jar" sourcepath="M2_REPO/org/semarglproject/semargl-rdf/0.7/semargl-rdf-0.7-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sourceforge/owlapi/owlapi-rio/5.1.11/owlapi-rio-5.1.11.jar" sourcepath="M2_REPO/net/sourceforge/owlapi/owlapi-rio/5.1.11/owlapi-rio-5.1.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sourceforge/owlapi/owlapi-parsers/5.1.11/owlapi-parsers-5.1.11.jar" sourcepath="M2_REPO/net/sourceforge/owlapi/owlapi-parsers/5.1.11/owlapi-parsers-5.1.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sourceforge/owlapi/owlapi-api/5.1.11/owlapi-api-5.1.11.jar" sourcepath="M2_REPO/net/sourceforge/owlapi/owlapi-api/5.1.11/owlapi-api-5.1.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/commons/commons-rdf-api/0.5.0/commons-rdf-api-0.5.0.jar" sourcepath="M2_REPO/org/apache/commons/commons-rdf-api/0.5.0/commons-rdf-api-0.5.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/github/vsonnier/hppcrt/0.7.5/hppcrt-0.7.5.jar" sourcepath="M2_REPO/com/github/vsonnier/hppcrt/0.7.5/hppcrt-0.7.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/github/ben-manes/caffeine/caffeine/2.6.1/caffeine-2.6.1.jar" sourcepath="M2_REPO/com/github/ben-manes/caffeine/caffeine/2.6.1/caffeine-2.6.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sourceforge/owlapi/owlapi-apibinding/5.1.11/owlapi-apibinding-5.1.11.jar" sourcepath="M2_REPO/net/sourceforge/owlapi/owlapi-apibinding/5.1.11/owlapi-apibinding-5.1.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sourceforge/owlapi/owlapi-impl/5.1.11/owlapi-impl-5.1.11.jar" sourcepath="M2_REPO/net/sourceforge/owlapi/owlapi-impl/5.1.11/owlapi-impl-5.1.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sourceforge/owlapi/owlapi-oboformat/5.1.11/owlapi-oboformat-5.1.11.jar" sourcepath="M2_REPO/net/sourceforge/owlapi/owlapi-oboformat/5.1.11/owlapi-oboformat-5.1.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sourceforge/owlapi/owlapi-tools/5.1.11/owlapi-tools-5.1.11.jar" sourcepath="M2_REPO/net/sourceforge/owlapi/owlapi-tools/5.1.11/owlapi-tools-5.1.11-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/yaml/snakeyaml/1.23/snakeyaml-1.23.jar" sourcepath="M2_REPO/org/yaml/snakeyaml/1.23/snakeyaml-1.23-sources.jar"/>
+  <classpathentry kind="src" path="/apache-any23-test-resources"/>
+  <classpathentry kind="var" path="M2_REPO/org/allenai/openie/openie_2.11/4.2.6/openie_2.11-4.2.6.jar" sourcepath="M2_REPO/org/allenai/openie/openie_2.11/4.2.6/openie_2.11-4.2.6-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/scala-lang/scala-library/2.11.5/scala-library-2.11.5.jar" sourcepath="M2_REPO/org/scala-lang/scala-library/2.11.5/scala-library-2.11.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/clearnlp/clearnlp/2.0.2/clearnlp-2.0.2.jar" sourcepath="M2_REPO/com/clearnlp/clearnlp/2.0.2/clearnlp-2.0.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/args4j/args4j/2.0.23/args4j-2.0.23.jar" sourcepath="M2_REPO/args4j/args4j/2.0.23/args4j-2.0.23-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/log4j/log4j/1.2.17/log4j-1.2.17.jar" sourcepath="M2_REPO/log4j/log4j/1.2.17/log4j-1.2.17-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sourceforge/jregex/jregex/1.2_01/jregex-1.2_01.jar" sourcepath="M2_REPO/net/sourceforge/jregex/jregex/1.2_01/jregex-1.2_01-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/carrotsearch/hppc/0.5.2/hppc-0.5.2.jar" sourcepath="M2_REPO/com/carrotsearch/hppc/0.5.2/hppc-0.5.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/clearnlp/clearnlp-dictionary/1.0/clearnlp-dictionary-1.0.jar" sourcepath="M2_REPO/com/clearnlp/clearnlp-dictionary/1.0/clearnlp-dictionary-1.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/clearnlp/clearnlp-general-en-dep/1.2/clearnlp-general-en-dep-1.2.jar" sourcepath="M2_REPO/com/clearnlp/clearnlp-general-en-dep/1.2/clearnlp-general-en-dep-1.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/clearnlp/clearnlp-general-en-pos/1.1/clearnlp-general-en-pos-1.1.jar" sourcepath="M2_REPO/com/clearnlp/clearnlp-general-en-pos/1.1/clearnlp-general-en-pos-1.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/clearnlp/clearnlp-general-en-srl/1.1/clearnlp-general-en-srl-1.1.jar" sourcepath="M2_REPO/com/clearnlp/clearnlp-general-en-srl/1.1/clearnlp-general-en-srl-1.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/github/scopt/scopt_2.11/3.4.0/scopt_2.11-3.4.0.jar" sourcepath="M2_REPO/com/github/scopt/scopt_2.11/3.4.0/scopt_2.11-3.4.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/edu/washington/cs/knowitall/morpha-stemmer/1.0.5/morpha-stemmer-1.0.5.jar" sourcepath="M2_REPO/edu/washington/cs/knowitall/morpha-stemmer/1.0.5/morpha-stemmer-1.0.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/edu/washington/cs/knowitall/opennlp-chunk-models/1.5/opennlp-chunk-models-1.5.jar" sourcepath="M2_REPO/edu/washington/cs/knowitall/opennlp-chunk-models/1.5/opennlp-chunk-models-1.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/edu/washington/cs/knowitall/opennlp-postag-models/1.5/opennlp-postag-models-1.5.jar" sourcepath="M2_REPO/edu/washington/cs/knowitall/opennlp-postag-models/1.5/opennlp-postag-models-1.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/edu/washington/cs/knowitall/opennlp-tokenize-models/1.5/opennlp-tokenize-models-1.5.jar" sourcepath="M2_REPO/edu/washington/cs/knowitall/opennlp-tokenize-models/1.5/opennlp-tokenize-models-1.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/edu/washington/cs/knowitall/reverb-core/1.4.3/reverb-core-1.4.3.jar" sourcepath="M2_REPO/edu/washington/cs/knowitall/reverb-core/1.4.3/reverb-core-1.4.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/edu/washington/cs/knowitall/reverb-models/1.4.0/reverb-models-1.4.0.jar" sourcepath="M2_REPO/edu/washington/cs/knowitall/reverb-models/1.4.0/reverb-models-1.4.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/edu/washington/cs/knowitall/common-java/2.0.2/common-java-2.0.2.jar" sourcepath="M2_REPO/edu/washington/cs/knowitall/common-java/2.0.2/common-java-2.0.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/edu/washington/cs/knowitall/openregex/1.1.1/openregex-1.1.1.jar" sourcepath="M2_REPO/edu/washington/cs/knowitall/openregex/1.1.1/openregex-1.1.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/commons-lang/commons-lang/2.6/commons-lang-2.6.jar" sourcepath="M2_REPO/commons-lang/commons-lang/2.6/commons-lang-2.6-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/commons-cli/commons-cli/1.2/commons-cli-1.2.jar" sourcepath="M2_REPO/commons-cli/commons-cli/1.2/commons-cli-1.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/cc/mallet/mallet/2.0.7/mallet-2.0.7.jar" sourcepath="M2_REPO/cc/mallet/mallet/2.0.7/mallet-2.0.7-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/junit/junit/4.12/junit-4.12.jar" sourcepath="M2_REPO/junit/junit/4.12/junit-4.12-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/hamcrest/hamcrest-core/1.3/hamcrest-core-1.3.jar" sourcepath="M2_REPO/org/hamcrest/hamcrest-core/1.3/hamcrest-core-1.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/beanshell/bsh/2.0b4/bsh-2.0b4.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sf/trove4j/trove4j/2.0.2/trove4j-2.0.2.jar" sourcepath="M2_REPO/net/sf/trove4j/trove4j/2.0.2/trove4j-2.0.2-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/googlecode/matrix-toolkits-java/mtj/0.9.14/mtj-0.9.14.jar" sourcepath="M2_REPO/com/googlecode/matrix-toolkits-java/mtj/0.9.14/mtj-0.9.14-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/googlecode/netlib-java/netlib-java/0.9.3/netlib-java-0.9.3.jar" sourcepath="M2_REPO/com/googlecode/netlib-java/netlib-java/0.9.3/netlib-java-0.9.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/sourceforge/f2j/arpack_combined_all/0.1/arpack_combined_all-0.1.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/apache/opennlp/opennlp-maxent/3.0.3/opennlp-maxent-3.0.3.jar" sourcepath="M2_REPO/org/apache/opennlp/opennlp-maxent/3.0.3/opennlp-maxent-3.0.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/edu/washington/cs/knowitall/opennlp-sent-models/1.5/opennlp-sent-models-1.5.jar" sourcepath="M2_REPO/edu/washington/cs/knowitall/opennlp-sent-models/1.5/opennlp-sent-models-1.5-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/databinder/unfiltered-filter_2.11/0.7.1/unfiltered-filter_2.11-0.7.1.jar" sourcepath="M2_REPO/net/databinder/unfiltered-filter_2.11/0.7.1/unfiltered-filter_2.11-0.7.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/databinder/unfiltered_2.11/0.7.1/unfiltered_2.11-0.7.1.jar" sourcepath="M2_REPO/net/databinder/unfiltered_2.11/0.7.1/unfiltered_2.11-0.7.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/databinder/unfiltered-util_2.11/0.7.1/unfiltered-util_2.11-0.7.1.jar" sourcepath="M2_REPO/net/databinder/unfiltered-util_2.11/0.7.1/unfiltered-util_2.11-0.7.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/scala-lang/modules/scala-xml_2.11/1.0.1/scala-xml_2.11-1.0.1.jar" sourcepath="M2_REPO/org/scala-lang/modules/scala-xml_2.11/1.0.1/scala-xml_2.11-1.0.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/databinder/unfiltered-jetty_2.11/0.7.1/unfiltered-jetty_2.11-0.7.1.jar" sourcepath="M2_REPO/net/databinder/unfiltered-jetty_2.11/0.7.1/unfiltered-jetty_2.11-0.7.1-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/jetty/jetty-webapp/8.1.13.v20130916/jetty-webapp-8.1.13.v20130916.jar" sourcepath="M2_REPO/org/eclipse/jetty/jetty-webapp/8.1.13.v20130916/jetty-webapp-8.1.13.v20130916-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/jetty/jetty-xml/8.1.13.v20130916/jetty-xml-8.1.13.v20130916.jar" sourcepath="M2_REPO/org/eclipse/jetty/jetty-xml/8.1.13.v20130916/jetty-xml-8.1.13.v20130916-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/jetty/jetty-util/8.1.13.v20130916/jetty-util-8.1.13.v20130916.jar" sourcepath="M2_REPO/org/eclipse/jetty/jetty-util/8.1.13.v20130916/jetty-util-8.1.13.v20130916-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/jetty/jetty-servlet/8.1.13.v20130916/jetty-servlet-8.1.13.v20130916.jar" sourcepath="M2_REPO/org/eclipse/jetty/jetty-servlet/8.1.13.v20130916/jetty-servlet-8.1.13.v20130916-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/jetty/jetty-security/8.1.13.v20130916/jetty-security-8.1.13.v20130916.jar" sourcepath="M2_REPO/org/eclipse/jetty/jetty-security/8.1.13.v20130916/jetty-security-8.1.13.v20130916-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/jetty/jetty-server/8.1.13.v20130916/jetty-server-8.1.13.v20130916.jar" sourcepath="M2_REPO/org/eclipse/jetty/jetty-server/8.1.13.v20130916/jetty-server-8.1.13.v20130916-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/jetty/orbit/javax.servlet/3.0.0.v201112011016/javax.servlet-3.0.0.v201112011016.jar" sourcepath="M2_REPO/org/eclipse/jetty/orbit/javax.servlet/3.0.0.v201112011016/javax.servlet-3.0.0.v201112011016-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/jetty/jetty-continuation/8.1.13.v20130916/jetty-continuation-8.1.13.v20130916.jar" sourcepath="M2_REPO/org/eclipse/jetty/jetty-continuation/8.1.13.v20130916/jetty-continuation-8.1.13.v20130916-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/jetty/jetty-http/8.1.13.v20130916/jetty-http-8.1.13.v20130916.jar" sourcepath="M2_REPO/org/eclipse/jetty/jetty-http/8.1.13.v20130916/jetty-http-8.1.13.v20130916-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/eclipse/jetty/jetty-io/8.1.13.v20130916/jetty-io-8.1.13.v20130916.jar" sourcepath="M2_REPO/org/eclipse/jetty/jetty-io/8.1.13.v20130916/jetty-io-8.1.13.v20130916-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/net/databinder/dispatch/dispatch-core_2.11/0.11.0/dispatch-core_2.11-0.11.0.jar" sourcepath="M2_REPO/net/databinder/dispatch/dispatch-core_2.11/0.11.0/dispatch-core_2.11-0.11.0-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/com/ning/async-http-client/1.7.16/async-http-client-1.7.16.jar" sourcepath="M2_REPO/com/ning/async-http-client/1.7.16/async-http-client-1.7.16-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/io/netty/netty/3.6.3.Final/netty-3.6.3.Final.jar" sourcepath="M2_REPO/io/netty/netty/3.6.3.Final/netty-3.6.3.Final-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/allenai/openregex/openregex-scala_2.11/1.1.3/openregex-scala_2.11-1.1.3.jar" sourcepath="M2_REPO/org/allenai/openregex/openregex-scala_2.11/1.1.3/openregex-scala_2.11-1.1.3-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/scalaz/scalaz-core_2.11/7.0.9/scalaz-core_2.11-7.0.9.jar" sourcepath="M2_REPO/org/scalaz/scalaz-core_2.11/7.0.9/scalaz-core_2.11-7.0.9-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/scala-lang/modules/scala-parser-combinators_2.11/1.0.4/scala-parser-combinators_2.11-1.0.4.jar" sourcepath="M2_REPO/org/scala-lang/modules/scala-parser-combinators_2.11/1.0.4/scala-parser-combinators_2.11-1.0.4-sources.jar"/>
+  <classpathentry kind="var" path="M2_REPO/org/slf4j/slf4j-log4j12/1.7.28/slf4j-log4j12-1.7.28.jar" sourcepath="M2_REPO/org/slf4j/slf4j-log4j12/1.7.28/slf4j-log4j12-1.7.28-sources.jar"/>
+</classpath>
\ No newline at end of file
diff --git a/openie/.project b/openie/.project
new file mode 100755
index 0000000..fb384c8
--- /dev/null
+++ b/openie/.project
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+  <name>apache-any23-openie</name>
+  <comment>Open Information Extraction module. NO_M2ECLIPSE_SUPPORT: Project files created with the maven-eclipse-plugin are not supported in M2Eclipse.</comment>
+  <projects>
+    <project>apache-any23-core</project>
+    <project>apache-any23-api</project>
+    <project>apache-any23-csvutils</project>
+    <project>apache-any23-mime</project>
+    <project>apache-any23-encoding</project>
+    <project>apache-any23-test-resources</project>
+  </projects>
+  <buildSpec>
+    <buildCommand>
+      <name>org.eclipse.jdt.core.javabuilder</name>
+    </buildCommand>
+    <buildCommand>
+      <name>org.eclipse.m2e.core.maven2Builder</name>
+    </buildCommand>
+  </buildSpec>
+  <natures>
+    <nature>org.eclipse.jdt.core.javanature</nature>
+    <nature>org.eclipse.m2e.core.maven2Nature</nature>
+  </natures>
+</projectDescription>
\ No newline at end of file
diff --git a/openie/.settings/org.eclipse.core.resources.prefs b/openie/.settings/org.eclipse.core.resources.prefs
new file mode 100755
index 0000000..839d647
--- /dev/null
+++ b/openie/.settings/org.eclipse.core.resources.prefs
@@ -0,0 +1,5 @@
+eclipse.preferences.version=1
+encoding//src/main/java=UTF-8
+encoding//src/main/resources=UTF-8
+encoding//src/test/java=UTF-8
+encoding/<project>=UTF-8
diff --git a/openie/.settings/org.eclipse.jdt.core.prefs b/openie/.settings/org.eclipse.jdt.core.prefs
new file mode 100755
index 0000000..b8947ec
--- /dev/null
+++ b/openie/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,6 @@
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
+org.eclipse.jdt.core.compiler.compliance=1.8
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
+org.eclipse.jdt.core.compiler.release=disabled
+org.eclipse.jdt.core.compiler.source=1.8
diff --git a/openie/.settings/org.eclipse.m2e.core.prefs b/openie/.settings/org.eclipse.m2e.core.prefs
new file mode 100755
index 0000000..f897a7f
--- /dev/null
+++ b/openie/.settings/org.eclipse.m2e.core.prefs
@@ -0,0 +1,4 @@
+activeProfiles=
+eclipse.preferences.version=1
+resolveWorkspaceProjects=true
+version=1
diff --git a/openie/pom.xml b/openie/pom.xml
new file mode 100644
index 0000000..35811f7
--- /dev/null
+++ b/openie/pom.xml
@@ -0,0 +1,164 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.any23</groupId>
+    <artifactId>apache-any23</artifactId>
+    <version>2.4-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.any23.plugins</groupId>
+  <artifactId>apache-any23-openie</artifactId>
+
+  <name>Apache Any23 :: Plugins :: OpenIE</name>
+  <description>Open Information Extraction module.</description>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.any23</groupId>
+      <artifactId>apache-any23-core</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.any23</groupId>
+      <artifactId>apache-any23-test-resources</artifactId>
+      <version>${project.version}</version>
+      <scope>test</scope>
+      <type>test-jar</type>
+    </dependency>
+    <dependency>
+      <groupId>org.allenai.openie</groupId>
+      <artifactId>openie_2.11</artifactId>
+      <version>${openie_2.11.version}</version>
+      <scope>compile</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>ch.qos.logback</groupId>
+          <artifactId>logback-classic</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>ch.qos.logback</groupId>
+          <artifactId>logback-core</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.allenai.openie</groupId>
+      <artifactId>openie_2.11</artifactId>
+      <version>${openie_2.11.version}</version>
+      <scope>compile</scope>
+      <type>pom</type>
+      <exclusions>
+        <exclusion>
+          <groupId>ch.qos.logback</groupId>
+          <artifactId>logback-classic</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>ch.qos.logback</groupId>
+          <artifactId>logback-core</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>edu.washington.cs.knowitall</groupId>
+      <artifactId>openregex</artifactId>
+      <version>${openregex.version}</version>
+      <scope>runtime</scope>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <configuration>
+          <skipTests>true</skipTests>
+        </configuration>
+      </plugin>
+      <!-- Generates the distribution package -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-assembly-plugin</artifactId>
+        <configuration>
+          <appendAssemblyId>false</appendAssemblyId>
+          <descriptors>
+            <descriptor>${basedir}/src/main/assembly/bin.xml</descriptor>
+          </descriptors>
+        </configuration>
+      </plugin>
+    </plugins>
+    <pluginManagement>
+      <plugins>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-assembly-plugin</artifactId>
+          <version>${maven-assembly-plugin.version}</version>
+          <executions>
+            <execution>
+              <id>assembly</id>
+              <phase>package</phase>
+              <goals>
+                <goal>single</goal>
+              </goals>
+            </execution>
+          </executions>
+          <configuration>
+            <attach>true</attach>
+            <skipAssembly>true</skipAssembly>
+            <tarLongFileMode>gnu</tarLongFileMode>
+          </configuration>
+        </plugin>
+      </plugins>
+    </pluginManagement>
+  </build>
+
+  <profiles>
+    <profile>
+      <id>release</id>
+      <build>
+        <resources>
+          <resource>
+            <directory>${basedir}/../</directory>
+            <targetPath>${project.build.directory}/apidocs/META-INF</targetPath>
+            <includes>
+              <include>LICENSE.txt</include>
+              <include>NOTICE.txt</include>
+            </includes>
+          </resource>
+        </resources>
+      </build>
+    </profile>
+
+  </profiles>
+
+</project>
diff --git a/openie/src/main/java/org/apache/any23/plugin/extractor/openie/OpenIEExtractor.java b/openie/src/main/java/org/apache/any23/plugin/extractor/openie/OpenIEExtractor.java
new file mode 100644
index 0000000..3992388
--- /dev/null
+++ b/openie/src/main/java/org/apache/any23/plugin/extractor/openie/OpenIEExtractor.java
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.any23.plugin.extractor.openie;
+
+import java.io.IOException;
+import java.util.List;
+
+import javax.xml.transform.TransformerConfigurationException;
+import javax.xml.transform.TransformerFactoryConfigurationError;
+
+import org.apache.any23.extractor.Extractor;
+import org.apache.any23.extractor.IssueReport;
+import org.apache.any23.extractor.ExtractionContext;
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.plugin.Author;
+import org.apache.any23.rdf.RDFUtils;
+import org.apache.any23.util.StreamUtils;
+import org.apache.tika.Tika;
+import org.apache.tika.exception.TikaException;
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.Resource;
+import org.eclipse.rdf4j.model.Value;
+import org.eclipse.rdf4j.model.vocabulary.RDF;
+import org.eclipse.rdf4j.model.vocabulary.RDFS;
+import org.apache.any23.extractor.ExtractionException;
+import org.apache.any23.extractor.ExtractionParameters;
+import org.apache.any23.extractor.ExtractionResult;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.w3c.dom.Document;
+
+import edu.knowitall.openie.Argument;
+import edu.knowitall.openie.Instance;
+import edu.knowitall.openie.OpenIE;
+import edu.knowitall.tool.parse.ClearParser;
+import edu.knowitall.tool.postag.ClearPostagger;
+import edu.knowitall.tool.srl.ClearSrl;
+import edu.knowitall.tool.tokenize.ClearTokenizer;
+import scala.collection.JavaConversions;
+import scala.collection.Seq;
+
+/**
+ * An <a href="https://github.com/allenai/openie-standalone">OpenIE</a> 
+ * extractor able to generate <i>RDF</i> statements from 
+ * sentences representing relations in the text.
+ */
+@Author(name="Lewis John McGibbney (lewismc@apache.org)")
+public class OpenIEExtractor implements Extractor.TagSoupDOMExtractor {
+
+    private static final Logger LOG = LoggerFactory.getLogger(OpenIEExtractor.class);
+
+    /**
+     * default constructor
+     */
+    public OpenIEExtractor() {
+        // default constructor
+    }
+
+    /**
+     * @see org.apache.any23.extractor.Extractor#getDescription()
+     */
+    @Override
+    public ExtractorDescription getDescription() {
+        return OpenIEExtractorFactory.getDescriptionInstance();
+    }
+
+    @Override
+    public void run(ExtractionParameters extractionParameters,
+            ExtractionContext context, Document in, ExtractionResult out)
+                    throws IOException, ExtractionException {
+
+        Runtime runtime = Runtime.getRuntime();
+        long maxMemory = runtime.maxMemory();
+        //free up as much memory as possible before performing this calculation
+        runtime.gc();
+        long usedMemory = Math.max(0L, runtime.totalMemory() - runtime.freeMemory());
+        long availableMemory = maxMemory - usedMemory;
+        if (availableMemory < 4294967296L) {
+            out.notifyIssue(IssueReport.IssueLevel.FATAL,
+                    "Not enough heap space available to perform OpenIE extraction: "
+                            + (availableMemory/1048576L) + "/" + (maxMemory / 1048576L)
+                            + " MB. Requires 4096 MB.", -1, -1);
+            LOG.error("Increase JVM heap size when running OpenIE extractor. max=" + maxMemory + "; available=" + availableMemory);
+            return;
+        }
+
+        IRI documentIRI = context.getDocumentIRI();
+        RDFUtils.iri(documentIRI.toString() + "root");
+        out.writeNamespace(RDF.PREFIX, RDF.NAMESPACE);
+        out.writeNamespace(RDFS.PREFIX, RDFS.NAMESPACE);
+        LOG.debug("Processing: {}", documentIRI.toString());
+
+        OpenIE openIE = new OpenIE(
+                new ClearParser(
+                        new ClearPostagger(
+                                new ClearTokenizer())), new ClearSrl(), false, false);
+
+        Seq<Instance> extractions = null;
+        Tika tika = new Tika();
+        try {
+            extractions = openIE.extract(tika.parseToString(StreamUtils.documentToInputStream(in)));
+        } catch (TransformerConfigurationException | TransformerFactoryConfigurationError e) {
+            LOG.error("Encountered error during OpenIE extraction.", e);
+        } catch (TikaException e) {
+            LOG.error("Encountered error whilst parsing InputStream with Tika.", e);
+        }
+
+        List<Instance> listExtractions = JavaConversions.seqAsJavaList(extractions);
+        // for each extraction instance we can obtain a number of extraction elements
+        // instance.confidence() - a confidence value for the extraction itself
+        // instance.extr().context() - an optional representation of the context for this extraction
+        // instance.extr().arg1().text() - subject
+        // instance.extr().rel().text() - predicate
+        // instance.extr().arg2s().text() - object
+        String thresholdString;
+        try {
+            thresholdString = extractionParameters.getProperty("any23.extraction.openie.confidence.threshold");
+        } catch (RuntimeException e) {
+            thresholdString = null;
+        }
+        double threshold = thresholdString == null ? 0.5 : Double.parseDouble(thresholdString);
+        for(Instance instance : listExtractions) {
+            if (instance.confidence() > threshold) {
+                List<Argument> listArg2s = JavaConversions.seqAsJavaList(instance.extr().arg2s());
+                for(Argument argument : listArg2s) {
+                    Resource subject = RDFUtils.makeIRI(instance.extr().arg1().text(), documentIRI);
+                    IRI predicate = (IRI) RDFUtils.makeIRI(instance.extr().rel().text(), documentIRI);
+                    Value object = RDFUtils.toValue(argument.text());
+                    out.writeTriple(subject, predicate, object);
+                }
+            }
+        }
+    }
+}
diff --git a/openie/src/main/java/org/apache/any23/plugin/extractor/openie/OpenIEExtractorFactory.java b/openie/src/main/java/org/apache/any23/plugin/extractor/openie/OpenIEExtractorFactory.java
new file mode 100644
index 0000000..1c86c62
--- /dev/null
+++ b/openie/src/main/java/org/apache/any23/plugin/extractor/openie/OpenIEExtractorFactory.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.any23.plugin.extractor.openie;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.Prefixes;
+
+/**
+ * @author lewismc
+ *
+ */
+public class OpenIEExtractorFactory extends SimpleExtractorFactory<OpenIEExtractor>
+        implements ExtractorFactory<OpenIEExtractor> {
+
+    public static final String NAME = "openie";
+
+    public static final Prefixes prefixes = null;
+
+    private static final ExtractorDescription descriptionInstance = new OpenIEExtractorFactory();
+
+    public OpenIEExtractorFactory() {
+        super(NAME, prefixes, Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"), "example-openie.html");
+    }
+
+    @Override
+    public OpenIEExtractor createExtractor() {
+        return new OpenIEExtractor();
+    }
+
+    public static ExtractorDescription getDescriptionInstance() {
+        return descriptionInstance;
+    }
+
+}
diff --git a/openie/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory b/openie/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory
new file mode 100644
index 0000000..10ebf16
--- /dev/null
+++ b/openie/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory
@@ -0,0 +1 @@
+org.apache.any23.plugin.extractor.openie.OpenIEExtractorFactory
\ No newline at end of file
diff --git a/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java b/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java
new file mode 100644
index 0000000..dcc4e8f
--- /dev/null
+++ b/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.any23.openie;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+import org.apache.any23.extractor.ExtractionContext;
+import org.apache.any23.extractor.ExtractionException;
+import org.apache.any23.extractor.ExtractionParameters;
+import org.apache.any23.extractor.ExtractionResult;
+import org.apache.any23.extractor.ExtractionResultImpl;
+import org.apache.any23.plugin.extractor.openie.OpenIEExtractor;
+import org.apache.any23.rdf.RDFUtils;
+import org.apache.any23.util.StreamUtils;
+import org.apache.any23.writer.RDFXMLWriter;
+import org.apache.any23.writer.TripleHandler;
+import org.apache.any23.writer.TripleHandlerException;
+import org.eclipse.rdf4j.model.IRI;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * @author lewismc
+ *
+ */
+public class OpenIEExtractorTest {
+
+    private static final Logger logger = LoggerFactory.getLogger(OpenIEExtractorTest.class);
+
+    private OpenIEExtractor extractor;
+
+    @Before
+    public void setUp() throws Exception {
+        extractor = new OpenIEExtractor();
+    }
+
+    @After
+    public void tearDown() throws Exception {
+        extractor = null;
+    }
+
+    @Test
+    public void testExtractFromHTMLDocument() 
+      throws IOException, ExtractionException, TripleHandlerException {
+        final IRI uri = RDFUtils.iri("http://podaac.jpl.nasa.gov/aquarius");
+        extract(uri, "/org/apache/any23/extractor/openie/example-openie.html");
+    }
+    
+    public void extract(IRI uri, String filePath) 
+      throws IOException, ExtractionException, TripleHandlerException {
+      FileOutputStream fos = new FileOutputStream(File.createTempFile("OpenIEExtractorTest", "tmp"));
+      final TripleHandler tHandler = new RDFXMLWriter(fos);
+      final ExtractionContext extractionContext = new ExtractionContext("rdf-openie", uri);
+      final ExtractionResult result = new ExtractionResultImpl(extractionContext, extractor, tHandler);
+      try {
+        extractor.run(
+                ExtractionParameters.newDefault(),
+                extractionContext,
+                StreamUtils.inputStreamToDocument(this.getClass().getResourceAsStream(filePath)),
+                result
+        );
+      } finally {
+        logger.debug(fos.toString());
+        tHandler.close();
+        result.close();
+      }
+    }
+
+}
diff --git a/pom.xml b/pom.xml
new file mode 100644
index 0000000..4a3719e
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,1170 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache</groupId>
+    <artifactId>apache</artifactId>
+    <version>21</version>
+  </parent>
+
+  <groupId>org.apache.any23</groupId>
+  <artifactId>apache-any23-plugins</artifactId>
+  <version>2.4-SNAPSHOT</version>
+  <packaging>pom</packaging>
+
+  <name>Apache Any23 Plugins</name>
+  <description>Anything To Triples (any23) is a library, a web service and a command line tool that 
+  extracts structured data in RDF format from a variety of Web documents.</description>
+  <url>http://any23.apache.org</url>
+  <inceptionYear>2010</inceptionYear>
+
+  <repositories>
+    <repository>
+      <snapshots>
+        <enabled>false</enabled>
+      </snapshots>
+      <id>bintray-allenai-maven</id>
+      <name>bintray</name>
+      <url>http://allenai.bintray.com/maven</url>
+    </repository>
+  </repositories>
+  <pluginRepositories>
+    <pluginRepository>
+      <snapshots>
+        <enabled>false</enabled>
+      </snapshots>
+      <id>bintray-allenai-maven</id>
+      <name>bintray-plugins</name>
+      <url>http://allenai.bintray.com/maven</url>
+    </pluginRepository>
+  </pluginRepositories>
+
+  <developers>
+    <developer>
+      <id>hansbrende</id>
+      <name>Hans Brende</name>
+      <email>hansbrende[at]apache[dot]org</email>
+      <roles>
+        <role>Committer</role>
+        <role>PMC Member</role>
+      </roles>
+    </developer>
+    <developer>
+      <id>band</id>
+      <name>Bill Anderson</name>
+      <email>band[at]apache[dot]org</email>
+      <roles>
+        <role>Committer</role>
+        <role>PMC Member</role>
+      </roles>
+    </developer>
+    <developer>
+      <id>ansell</id>
+      <name>Peter Ansell</name>
+      <email>ansell[at]apache[dot]org</email>
+      <roles>
+        <role>Committer</role>
+        <role>PMC Member</role>
+      </roles>
+    </developer>
+    <developer>
+      <id>dpalmisano</id>
+      <name>Davide Palmisano</name>
+      <email>dpalmisano[at]apache[dot]org</email>
+      <roles>
+        <role>Committer</role>
+        <role>PMC Member</role>
+      </roles>
+    </developer>
+    <developer>
+      <id>giovanni</id>
+      <name>Giovanni Tummarello</name>
+      <email>giovanni[at]apache[dot]org</email>
+      <roles>
+        <role>Committer</role>
+        <role>PMC Member</role>
+      </roles>
+    </developer>
+    <developer>
+      <id>lewismc</id>
+      <name>Lewis John McGibbney</name>
+      <email>lewismc[at]apache[dot]org</email>
+      <roles>
+        <role>Committer</role>
+        <role>PMC Member</role>
+      </roles>
+    </developer>
+    <developer>
+      <id>mattmann</id>
+      <name>Chris Mattmann</name>
+      <email>mattmann[at]apache[dot]org</email>
+      <roles>
+        <role>Champion</role>
+        <role>Committer</role>
+        <role>PMC Member</role>
+        <role>Mentor</role>
+      </roles>
+    </developer>
+    <developer>
+      <id>mostarda</id>
+      <name>Michele Mostarda</name>
+      <email>mostarda[at]apache[dot]org</email>
+      <roles>
+        <role>Committer</role>
+        <role>PMC Member</role>
+      </roles>
+    </developer>
+    <developer>
+      <id>niq</id>
+      <name>Nick Kew</name>
+      <email>niq[at]apache[dot]org</email>
+      <roles>
+        <role>Committer</role>
+        <role>PMC Member</role>
+        <role>Mentor</role>
+      </roles>
+    </developer>
+    <developer>
+      <id>pramirez</id>
+      <name>Paul Michael Ramirez</name>
+      <email>pramirez[at]apache[dot]org</email>
+      <roles>
+        <role>Committer</role>
+        <role>PMC Member</role>
+        <role>Mentor</role>
+      </roles>
+    </developer>
+    <developer>
+      <id>reto</id>
+      <name>Reto Bachmann-Gmür</name>
+      <email>reto[at]apache[dot]org</email>
+      <roles>
+        <role>Committer</role>
+        <role>PMC Member</role>
+      </roles>
+    </developer>
+    <developer>
+      <id>simonetripodi</id>
+      <name>Simone Tripodi</name>
+      <email>simonetripodi[at]apache[dot]org</email>
+      <roles>
+        <role>Committer</role>
+        <role>PMC Member</role>
+        <role>Mentor</role>
+      </roles>
+    </developer>
+    <developer>
+      <id>szydan</id>
+      <name>Szymon Danielczyk</name>
+      <email>szydan[at]apache[dot]org</email>
+      <roles>
+        <role>Committer</role>
+        <role>PMC Member</role>
+      </roles>
+    </developer>
+    <developer>
+      <id>tommaso</id>
+      <name>Tommaso Teofili</name>
+      <email>tommaso[at]apache[dot]org</email>
+      <roles>
+        <role>Committer</role>
+        <role>PMC Member</role>
+        <role>Mentor</role>
+      </roles>
+    </developer>
+  </developers>
+
+  <mailingLists>
+    <mailingList>
+      <name>Dev Mailing List</name>
+      <post>dev[at]any23[dot]apache[dot]org</post>
+      <subscribe>dev-subscribe[at]any23[dot]apache[dot]org</subscribe>
+      <unsubscribe>dev-unsubscribe[at]any23[dot]apache[dot]org</unsubscribe>
+      <archive>http://mail-archives.apache.org/mod_mbox/any23-dev/</archive>
+      <otherArchives>
+        <otherArchive>http://any23-dev.markmail.org/</otherArchive>
+      </otherArchives>
+    </mailingList>
+
+    <mailingList>
+      <name>User Mailing List</name>
+      <post>user[at]any23[dot]apache[dot]org</post>
+      <subscribe>user-subscribe[at]any23[dot]apache[dot]org</subscribe>
+      <unsubscribe>user-unsubscribe[at]any23[dot]apache[dot]org</unsubscribe>
+      <archive>http://mail-archives.apache.org/mod_mbox/any23-user/</archive>
+      <otherArchives>
+        <otherArchive>http://any23-user.markmail.org/</otherArchive>
+      </otherArchives>
+    </mailingList>
+
+    <mailingList>
+      <name>Commits Mailing List</name>
+      <post>commits[at]any23[dot]apache[dot]org</post>
+      <subscribe>commits-subscribe[at]any23[dot]apache[dot]org</subscribe>
+      <unsubscribe>commits-unsubscribe[at]any23[dot]apache[dot]org</unsubscribe>
+      <archive>http://mail-archives.apache.org/mod_mbox/any23-commits/</archive>
+      <otherArchives>
+        <otherArchive>http://any23-commits.markmail.org/</otherArchive>
+      </otherArchives>
+    </mailingList>
+  </mailingLists>
+
+  <modules>
+    <module>basic-crawler</module>
+    <module>html-scraper</module>
+    <module>office-scraper</module>
+    <module>openie</module>
+    <!--module>plugins/integration-test</module-->
+  </modules>
+
+  <scm>
+    <developerConnection>scm:git:https://gitbox.apache.org/repos/asf/any23-plugins.git</developerConnection>
+    <connection>scm:git:http://gitbox.apache.org/repos/asf/any23-plugins.git</connection>
+    <url>https://gitbox.apache.org/repos/asf/any23-plugins.git</url>
+    <tag>HEAD</tag>
+  </scm>
+  <issueManagement>
+    <system>JIRA</system>
+    <url>https://issues.apache.org/jira/browse/ANY23</url>
+  </issueManagement>
+  <ciManagement>
+    <system>Jenkins</system>
+    <url>https://builds.apache.org/job/Any23-plugins/</url>
+  </ciManagement>
+
+  <!-- General properties. -->
+  <properties>
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    <project.build.resourceEncoding>UTF-8</project.build.resourceEncoding>
+    <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
+
+    <javac.src.version>1.8</javac.src.version>
+    <javac.target.version>1.8</javac.target.version>
+
+    <maven.build.timestamp.format>yyyy-MM-dd HH:mm:ssZ</maven.build.timestamp.format>
+    <implementation.build>${scmBranch}@r${buildNumber}</implementation.build>
+    <implementation.build.tstamp>${maven.build.timestamp}</implementation.build.tstamp>
+
+    <httpclient.version>4.5.10</httpclient.version>
+    <httpcore.version>4.4.12</httpcore.version>
+    <owlapi.version>5.1.11</owlapi.version>
+    <poi.version>4.1.0</poi.version>
+    <rdf4j.version>3.0.0</rdf4j.version>
+    <semargl.version>0.7</semargl.version>
+    <slf4j.logger.version>1.7.28</slf4j.logger.version>
+    <tika.version>1.22</tika.version>
+    <openie_2.11.version>4.2.6</openie_2.11.version>
+    <openregex.version>1.1.1</openregex.version>
+    <jackson.version>2.9.10</jackson.version>
+
+    <!-- Overridden in profiles to add JDK specific arguments to surefire -->
+    <surefire-extra-args />
+
+    <!-- Used to track API changes based on Semantic Versioning
+         NOTE: velocity does.not.allow.dot.notation, so justUseCamelCase -->
+    <latestStableRelease>2.3</latestStableRelease>
+
+    <!-- Maven Plugin Versions -->
+    <maven-javadoc-plugin.version>3.1.1</maven-javadoc-plugin.version>
+    <maven-clean-plugin.version>3.1.0</maven-clean-plugin.version>
+    <maven-deploy-plugin.version>3.0.0-M1</maven-deploy-plugin.version>
+    <maven-install-plugin.version>3.0.0-M1</maven-install-plugin.version>
+    <maven-resources-plugin.version>3.1.0</maven-resources-plugin.version>
+    <maven-assembly-plugin.version>3.1.1</maven-assembly-plugin.version>
+    <appassembler-maven-plugin.version>2.1.0</appassembler-maven-plugin.version>
+    <maven-release-plugin.version>2.5.3</maven-release-plugin.version>
+    <buildnumber-maven-plugin.version>1.4</buildnumber-maven-plugin.version>
+    <maven-compiler-plugin.version>3.8.1</maven-compiler-plugin.version>
+    <maven-jar-plugin.version>3.1.2</maven-jar-plugin.version>
+    <maven-surefire-plugin.version>3.0.0-M3</maven-surefire-plugin.version>
+    <jacoco-maven-plugin.version>0.8.4</jacoco-maven-plugin.version>
+    <maven-site-plugin.version>3.7.1</maven-site-plugin.version>
+    <maven-changes-plugin.version>2.12.1</maven-changes-plugin.version>
+    <maven-project-info-reports-plugin.version>3.0.0</maven-project-info-reports-plugin.version>
+    <maven-jxr-plugin.version>3.0.0</maven-jxr-plugin.version>
+    <maven-checkstyle-plugin.version>3.1.0</maven-checkstyle-plugin.version>
+    <apache-rat-plugin.version>0.13</apache-rat-plugin.version>
+    <maven-source-plugin.version>3.0.1</maven-source-plugin.version>
+    <maven-gpg-plugin.version>1.6</maven-gpg-plugin.version>
+    <maven-war-plugin.version>3.2.3</maven-war-plugin.version>
+    <maven-invoker-plugin.version>3.2.1</maven-invoker-plugin.version>
+    <maven-checkstyle-plugin.version>3.1.12.2</maven-checkstyle-plugin.version>
+
+    <!--
+     | Any23 website has to be stored in SVN
+    -->
+    <site.deploymentBaseUrl>scm:svn:https://svn.apache.org/repos/asf/any23/site</site.deploymentBaseUrl>
+    <site.filePath>${project.basedir}/any23-site/</site.filePath>
+    <site.urlDeployment>file://${site.filePath}</site.urlDeployment>
+    <site.scmPubCheckoutDirectory>${site.filePath}</site.scmPubCheckoutDirectory>
+    <assembly.skip>false</assembly.skip>
+  </properties>
+
+  <dependencyManagement>
+    <dependencies>
+      <dependency>
+        <groupId>org.apache.commons</groupId>
+        <artifactId>commons-lang3</artifactId>
+        <version>3.9</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.httpcomponents</groupId>
+        <artifactId>httpclient</artifactId>
+        <version>${httpclient.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.httpcomponents</groupId>
+        <artifactId>httpclient-cache</artifactId>
+        <version>${httpclient.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.httpcomponents</groupId>
+        <artifactId>httpmime</artifactId>
+        <version>${httpclient.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.httpcomponents</groupId>
+        <artifactId>httpcore</artifactId>
+        <version>${httpcore.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>commons-codec</groupId>
+        <artifactId>commons-codec</artifactId>
+        <version>1.11</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.commons</groupId>
+        <artifactId>commons-compress</artifactId>
+        <version>1.18</version>
+      </dependency>
+      <dependency>
+        <groupId>xerces</groupId>
+        <artifactId>xercesImpl</artifactId>
+        <version>2.12.0</version>
+      </dependency>
+      <dependency>
+        <groupId>org.jsoup</groupId>
+        <artifactId>jsoup</artifactId>
+        <version>1.12.1</version>
+      </dependency>
+      <dependency>
+        <groupId>net.sf.biweekly</groupId>
+        <artifactId>biweekly</artifactId>
+        <version>0.6.3</version>
+      </dependency>
+
+      <!-- BEGIN: Tika -->
+      <dependency>
+        <groupId>org.apache.tika</groupId>
+        <artifactId>tika-core</artifactId>
+        <version>${tika.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.tika</groupId>
+        <artifactId>tika-parsers</artifactId>
+        <version>${tika.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.poi</groupId>
+        <artifactId>poi</artifactId>
+        <version>${poi.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.poi</groupId>
+        <artifactId>poi-ooxml</artifactId>
+        <version>${poi.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.poi</groupId>
+        <artifactId>poi-scratchpad</artifactId>
+        <version>${poi.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>com.fasterxml.jackson.core</groupId>
+        <artifactId>jackson-core</artifactId>
+        <version>${jackson.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>com.fasterxml.jackson.core</groupId>
+        <artifactId>jackson-databind</artifactId>
+        <version>${jackson.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>com.fasterxml.jackson.core</groupId>
+        <artifactId>jackson-annotations</artifactId>
+        <version>${jackson.version}</version>
+      </dependency>
+      <!-- END: Tika -->
+
+      <!-- BEGIN: RDF4J -->
+      <dependency>
+        <groupId>org.eclipse.rdf4j</groupId>
+        <artifactId>rdf4j-model</artifactId>
+        <version>${rdf4j.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.eclipse.rdf4j</groupId>
+        <artifactId>rdf4j-rio-api</artifactId>
+        <version>${rdf4j.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.eclipse.rdf4j</groupId>
+        <artifactId>rdf4j-rio-languages</artifactId>
+        <version>${rdf4j.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.eclipse.rdf4j</groupId>
+        <artifactId>rdf4j-rio-datatypes</artifactId>
+        <version>${rdf4j.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.eclipse.rdf4j</groupId>
+        <artifactId>rdf4j-rio-binary</artifactId>
+        <version>${rdf4j.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.eclipse.rdf4j</groupId>
+        <artifactId>rdf4j-rio-turtle</artifactId>
+        <version>${rdf4j.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.eclipse.rdf4j</groupId>
+        <artifactId>rdf4j-rio-rdfxml</artifactId>
+        <version>${rdf4j.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.eclipse.rdf4j</groupId>
+        <artifactId>rdf4j-rio-ntriples</artifactId>
+        <version>${rdf4j.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.eclipse.rdf4j</groupId>
+        <artifactId>rdf4j-rio-nquads</artifactId>
+        <version>${rdf4j.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.eclipse.rdf4j</groupId>
+        <artifactId>rdf4j-rio-n3</artifactId>
+        <version>${rdf4j.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.eclipse.rdf4j</groupId>
+        <artifactId>rdf4j-rio-trix</artifactId>
+        <version>${rdf4j.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.eclipse.rdf4j</groupId>
+        <artifactId>rdf4j-rio-rdfjson</artifactId>
+        <version>${rdf4j.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.eclipse.rdf4j</groupId>
+        <artifactId>rdf4j-rio-trig</artifactId>
+        <version>${rdf4j.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.eclipse.rdf4j</groupId>
+        <artifactId>rdf4j-rio-jsonld</artifactId>
+        <version>${rdf4j.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.eclipse.rdf4j</groupId>
+        <artifactId>rdf4j-repository-sail</artifactId>
+        <version>${rdf4j.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.eclipse.rdf4j</groupId>
+        <artifactId>rdf4j-sail-memory</artifactId>
+        <version>${rdf4j.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.eclipse.rdf4j</groupId>
+        <artifactId>rdf4j-repository-api</artifactId>
+        <version>${rdf4j.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>com.github.jsonld-java</groupId>
+        <artifactId>jsonld-java</artifactId>
+        <version>0.12.5</version>
+      </dependency>
+      <dependency>
+        <groupId>org.semarglproject</groupId>
+        <artifactId>semargl-rdf4j</artifactId>
+        <version>${semargl.version}</version>
+      </dependency>
+      <!-- END: RDF4J -->
+
+      <!-- BEGIN: OWLAPI -->
+      <dependency>
+        <groupId>net.sourceforge.owlapi</groupId>
+        <artifactId>owlapi-api</artifactId>
+        <version>${owlapi.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>net.sourceforge.owlapi</groupId>
+        <artifactId>owlapi-rio</artifactId>
+        <version>${owlapi.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>net.sourceforge.owlapi</groupId>
+        <artifactId>owlapi-parsers</artifactId>
+        <version>${owlapi.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>net.sourceforge.owlapi</groupId>
+        <artifactId>owlapi-apibinding</artifactId>
+        <version>${owlapi.version}</version>
+      </dependency>
+      <!-- END: OWLAPI -->
+
+      <!-- BEGIN:  Apache Commons -->
+      <dependency>
+        <groupId>org.apache.commons</groupId>
+        <artifactId>commons-csv</artifactId>
+        <version>1.6</version>
+      </dependency>
+      <dependency>
+        <groupId>commons-io</groupId>
+        <artifactId>commons-io</artifactId>
+        <version>2.6</version>
+      </dependency>
+      <!-- END:  Apache Commons -->
+
+      <!-- BEGIN: Plugins specific dependencies -->
+      <dependency>
+        <groupId>de.l3s.boilerpipe</groupId>
+        <artifactId>boilerpipe</artifactId>
+        <version>1.1.0</version>
+      </dependency>
+      <!-- END: Plugins specific dependencies -->
+
+      <dependency>
+        <groupId>com.beust</groupId>
+        <artifactId>jcommander</artifactId>
+        <version>1.72</version>
+      </dependency>
+
+      <!-- BEGIN: logger -->
+      <dependency>
+        <groupId>org.slf4j</groupId>
+        <artifactId>slf4j-api</artifactId>
+        <version>${slf4j.logger.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.slf4j</groupId>
+        <artifactId>jcl-over-slf4j</artifactId>
+        <version>${slf4j.logger.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.slf4j</groupId>
+        <artifactId>jul-to-slf4j</artifactId>
+        <version>${slf4j.logger.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.slf4j</groupId>
+        <artifactId>slf4j-log4j12</artifactId>
+        <version>${slf4j.logger.version}</version>
+      </dependency>
+      <!-- END: logger -->
+
+      <dependency>
+        <groupId>org.yaml</groupId>
+        <artifactId>snakeyaml</artifactId>
+        <version>1.23</version>
+      </dependency>
+
+      <!-- BEGIN: Test Dependencies -->
+      <dependency>
+        <groupId>junit</groupId>
+        <artifactId>junit</artifactId>
+        <version>4.12</version>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.mockito</groupId>
+        <artifactId>mockito-core</artifactId>
+        <version>3.0.0</version>
+        <scope>test</scope>
+      </dependency>
+      <!-- END: Test Dependencies -->
+    </dependencies>
+  </dependencyManagement>
+
+  <build>
+    <pluginManagement>
+      <plugins>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-clean-plugin</artifactId>
+          <version>${maven-clean-plugin.version}</version>
+        </plugin>
+
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-war-plugin</artifactId>
+          <version>${maven-war-plugin.version}</version>
+        </plugin>
+
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-invoker-plugin</artifactId>
+          <version>${maven-invoker-plugin.version}</version>
+        </plugin>
+
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-deploy-plugin</artifactId>
+          <version>${maven-deploy-plugin.version}</version>
+          <inherited>true</inherited>
+          <configuration>
+            <updateReleaseInfo>true</updateReleaseInfo>
+          </configuration>
+        </plugin>
+
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-install-plugin</artifactId>
+          <version>${maven-install-plugin.version}</version>
+        </plugin>
+
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-resources-plugin</artifactId>
+          <version>${maven-resources-plugin.version}</version>
+        </plugin>
+
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-assembly-plugin</artifactId>
+          <version>${maven-assembly-plugin.version}</version>
+          <executions>
+            <execution>
+              <id>assembly</id>
+              <phase>package</phase>
+              <goals>
+                <goal>single</goal>
+              </goals>
+            </execution>
+          </executions>
+          <configuration>
+            <attach>true</attach>
+            <skipAssembly>${assembly.skip}</skipAssembly>
+            <tarLongFileMode>posix</tarLongFileMode>
+          </configuration>
+        </plugin>
+
+        <plugin>
+          <groupId>org.codehaus.mojo</groupId>
+          <artifactId>appassembler-maven-plugin</artifactId>
+          <version>${appassembler-maven-plugin.version}</version>
+          <configuration>
+            <repositoryLayout>flat</repositoryLayout>
+            <repositoryName>lib</repositoryName>
+            <extraJvmArguments>-Xms500m -Xmx500m -XX:PermSize=128m -XX:-UseGCOverheadLimit</extraJvmArguments>
+          </configuration>
+        </plugin>
+
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-release-plugin</artifactId>
+          <version>${maven-release-plugin.version}</version>
+          <configuration>
+            <preparationGoals>install</preparationGoals>
+            <mavenExecutorId>forked-path</mavenExecutorId>
+            <useReleaseProfile>false</useReleaseProfile>
+            <autoVersionSubmodules>false</autoVersionSubmodules>
+            <tagNameFormat>any23-@{project.version}</tagNameFormat>
+            <arguments>-Dany23.online.test.disabled=true -Prelease,apache</arguments>
+          </configuration>
+          <dependencies>
+            <dependency>
+              <groupId>org.apache.maven.scm</groupId>
+              <artifactId>maven-scm-provider-gitexe</artifactId>
+              <version>1.9</version>
+            </dependency>
+          </dependencies>
+        </plugin>
+
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-enforcer-plugin</artifactId>
+          <version>3.0.0-M2</version>
+          <executions>
+            <execution>
+              <id>enforce-maven</id>
+              <goals>
+                <goal>enforce</goal>
+              </goals>
+              <configuration>
+                <rules>
+                  <requireMavenVersion>
+                    <version>3.5.0</version>
+                  </requireMavenVersion>
+                </rules>    
+              </configuration>
+            </execution>
+          </executions>
+        </plugin>
+
+      </plugins>
+    </pluginManagement>
+
+    <plugins>
+
+      <!-- Javadoc plugin. -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-javadoc-plugin</artifactId>
+        <version>${maven-javadoc-plugin.version}</version>
+        <configuration>
+          <!--
+           | Apple's JVM sometimes requires more memory
+          -->
+          <additionalJOption>-J-Xmx1024m</additionalJOption>
+          <source>8</source>
+        </configuration>
+      </plugin>
+
+      <!-- Drop inherited behavior (i.e. don't put any more default LICENSE and NOTICE files in all artifacts) -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-remote-resources-plugin</artifactId>
+        <executions>
+          <execution>
+            <goals>
+              <goal>process</goal>
+            </goals>
+            <phase>none</phase>
+          </execution>
+        </executions>
+      </plugin>
+
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>buildnumber-maven-plugin</artifactId>
+        <version>${buildnumber-maven-plugin.version}</version>
+        <executions>
+          <execution>
+            <phase>validate</phase>
+            <goals>
+              <goal>create</goal>
+            </goals>
+          </execution>
+        </executions>
+        <configuration>
+          <doCheck>false</doCheck>
+          <doUpdate>false</doUpdate>
+          <!-- Use committed revision so it does not change every time svn update is run -->
+          <useLastCommittedRevision>true</useLastCommittedRevision>
+          <!-- default revision number if unavailable -->
+          <revisionOnScmFailure>??????</revisionOnScmFailure>
+        </configuration>
+      </plugin>
+
+      <!-- Compiler configuration. -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <version>${maven-compiler-plugin.version}</version>
+        <configuration>
+          <source>${javac.src.version}</source>
+          <target>${javac.target.version}</target>
+        </configuration>
+      </plugin>
+
+      <plugin>
+        <artifactId>maven-jar-plugin</artifactId>
+        <version>${maven-jar-plugin.version}</version>
+        <configuration>
+          <archive>
+            <manifest>
+              <addDefaultImplementationEntries>true</addDefaultImplementationEntries>
+              <addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
+            </manifest>
+            <manifestEntries>
+              <Implementation-Build>${implementation.build}</Implementation-Build>
+              <Implementation-Build-Date>${maven.build.timestamp}</Implementation-Build-Date>
+              <X-Compile-Source-JDK>${javac.src.version}</X-Compile-Source-JDK>
+              <X-Compile-Target-JDK>${javac.target.version}</X-Compile-Target-JDK>
+            </manifestEntries>
+          </archive>
+        </configuration>
+      </plugin>
+
+      <!-- Test runner configuration. -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <version>${maven-surefire-plugin.version}</version>
+        <configuration>
+          <!--
+            Declaring default file encoding: UTF-8
+            Enabling assertions.
+           -->
+          <argLine>-Dfile.encoding=UTF-8 -ea ${surefire-extra-args}</argLine>
+          <includes>
+            <include>**/*Test.java</include>
+            <include>**/*TestCase.java</include>
+          </includes>
+        </configuration>
+      </plugin>
+
+      <!-- Test coverage plugin. -->
+      <plugin>
+        <groupId>org.jacoco</groupId>
+        <artifactId>jacoco-maven-plugin</artifactId>
+        <version>${jacoco-maven-plugin.version}</version>
+        <executions>
+          <execution>
+            <id>prepare-agent</id>
+            <goals>
+              <goal>prepare-agent</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+
+      <plugin>
+        <groupId>com.github.spotbugs</groupId>
+        <artifactId>spotbugs-maven-plugin</artifactId>
+        <version>${maven-checkstyle-plugin.version}</version>
+      </plugin>
+    </plugins>
+  </build>
+
+  <reporting>
+    <plugins>
+      <!-- Main project infos. -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-project-info-reports-plugin</artifactId>
+        <version>${maven-project-info-reports-plugin.version}</version>
+        <reportSets>
+          <reportSet>
+            <reports>
+              <report>ci-management</report>
+              <report>dependencies</report>
+              <report>dependency-convergence</report>
+              <report>dependency-info</report>
+              <report>dependency-management</report>
+              <report>distribution-management</report>
+              <report>index</report>
+              <report>issue-management</report>
+              <report>licenses</report>
+              <report>mailing-lists</report>
+              <report>modules</report>
+              <report>plugin-management</report>
+              <report>plugins</report>
+              <report>scm</report>
+              <report>summary</report>
+              <report>team</report>
+            </reports>
+          </reportSet>
+        </reportSets>
+      </plugin>
+
+      <!-- Javadoc plugin. -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-javadoc-plugin</artifactId>
+        <version>${maven-javadoc-plugin.version}</version>
+        <configuration>
+          <!--
+           | Apple's JVM sometimes requires more memory
+          -->
+          <additionalJOption>-J-Xmx1024m</additionalJOption>
+          <source>8</source>
+        </configuration>
+      </plugin>
+
+      <!-- Browsable documentation. -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jxr-plugin</artifactId>
+        <version>${maven-jxr-plugin.version}</version>
+        <configuration>
+          <aggregate>true</aggregate>
+        </configuration>
+      </plugin>
+
+      <!-- Code-coverage report. -->
+      <plugin>
+        <groupId>org.jacoco</groupId>
+        <artifactId>jacoco-maven-plugin</artifactId>
+        <version>${jacoco-maven-plugin.version}</version>
+        <reportSets>
+          <reportSet>
+            <reports>
+              <report>report-aggregate</report>
+            </reports>
+          </reportSet>
+        </reportSets>
+      </plugin>
+
+      <plugin>
+        <groupId>com.github.spotbugs</groupId>
+        <artifactId>spotbugs-maven-plugin</artifactId>
+        <version>${maven-checkstyle-plugin.version}</version>
+      </plugin>
+
+      <!-- Check style report. -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-checkstyle-plugin</artifactId>
+        <version>${maven-checkstyle-plugin.version}</version>
+      </plugin>
+    </plugins>
+  </reporting>
+
+  <profiles>
+    <profile>
+      <id>release</id>
+      <build>
+        <plugins>
+          <!--plugin>
+            <groupId>org.apache.rat</groupId>
+            <artifactId>apache-rat-plugin</artifactId>
+            <version>${apache-rat-plugin.version}</version>
+            <executions>
+              <execution>
+                <id>rat-verify</id>
+                <phase>test</phase>
+                <goals>
+                  <goal>check</goal>
+                </goals>
+                <configuration>
+                  <excludes>
+                    <exclude>**/.gitignore</exclude>
+                    <exclude>**/any23-gcode-CHANGES.txt</exclude>
+                    <exclude>**/NOTICE-*.txt</exclude>
+                    <exclude>LICENSE.txt</exclude>
+                    <exclude>NOTICE.txt</exclude>
+                    <exclude>**/README.txt</exclude>
+                    <exclude>RELEASE-NOTES.txt</exclude>
+                    <exclude>*.bat</exclude>
+                    <exclude>*.sh</exclude>
+                    <exclude>**/*.csv</exclude>
+                    <exclude>**/*.json</exclude>
+                    <exclude>**/*.xls*</exclude>
+                    <exclude>**/*zip/*</exclude>
+                    <exclude>**/error.wsdl</exclude>
+                    <exclude>.idea/**</exclude>
+                    <exclude>.classpath</exclude>
+                    <exclude>.project</exclude>
+                    <exclude>.settings</exclude>
+                    <exclude>maven-eclipse.xml</exclude>
+                    <exclude>.externalToolBuilders/**</exclude>
+                    <exclude>.git/**</exclude>
+                    < TODO restore them once ANY23-98 is fixed >
+                    <exclude>**/application/nquads/test1.nq</exclude>
+                    <exclude>**/application/nquads/test2.nq</exclude>
+                    <exclude>**/application/rdfn3/test1</exclude>
+                    <exclude>**/application/rdfn3/test2</exclude>
+                    <exclude>**/application/rdfn3/test3</exclude>
+                    <exclude>**/application/rss1/test1</exclude>
+                    <exclude>**/cli/rover-test1.nq</exclude>
+                    < resources ALv2 compatible resources >
+                    <exclude>**/css/bootstrap.min.css</exclude>
+                    <exclude>**/js/bootstrap-modal.js</exclude>
+                    <exclude>**/js/jquery-1.7.2.min.js</exclude>
+                    <exclude>**/missing-og-namespace.html</exclude>
+                  </excludes>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin-->
+
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-source-plugin</artifactId>
+            <version>${maven-source-plugin.version}</version>
+            <executions>
+              <execution>
+                <id>attach-sources</id>
+                <goals>
+                  <goal>jar-no-fork</goal>
+                </goals>
+                <configuration>
+                  <archive>
+                    <manifest>
+                      <addDefaultImplementationEntries>true</addDefaultImplementationEntries>
+                      <addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
+                    </manifest>
+                    <manifestEntries>
+                      <Implementation-Build>${implementation.build}</Implementation-Build>
+                      <Implementation-Build-Date>${maven.build.timestamp}</Implementation-Build-Date>
+                      <X-Compile-Source-JDK>${javac.src.version}</X-Compile-Source-JDK>
+                      <X-Compile-Target-JDK>${javac.target.version}</X-Compile-Target-JDK>
+                    </manifestEntries>
+                  </archive>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-javadoc-plugin</artifactId>
+            <version>${maven-javadoc-plugin.version}</version>
+            <executions>
+              <execution>
+                <id>attach-javadocs</id>
+                <goals>
+                  <goal>jar</goal>
+                </goals>
+                <configuration>
+                  <source>8</source>
+                  <quiet>true</quiet>
+                  <archive>
+                    <manifest>
+                      <addDefaultImplementationEntries>true</addDefaultImplementationEntries>
+                      <addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
+                    </manifest>
+                    <manifestEntries>
+                      <Implementation-Build>${implementation.build}</Implementation-Build>
+                      <Implementation-Build-Date>${maven.build.timestamp}</Implementation-Build-Date>
+                      <X-Compile-Source-JDK>${javac.src.version}</X-Compile-Source-JDK>
+                      <X-Compile-Target-JDK>${javac.target.version}</X-Compile-Target-JDK>
+                    </manifestEntries>
+                  </archive>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-gpg-plugin</artifactId>
+            <version>${maven-gpg-plugin.version}</version>
+            <executions>
+              <execution>
+                <id>sign-artifacts</id>
+                <phase>verify</phase>
+                <goals>
+                  <goal>sign</goal>
+                </goals>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+
+    <profile>
+      <id>site</id>
+
+      <distributionManagement>
+        <site>
+          <id>any23.website</id>
+          <name>Apache Any23 website</name>
+          <url>${site.urlDeployment}</url>
+        </site>
+      </distributionManagement>
+
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-site-plugin</artifactId>
+            <version>${maven-site-plugin.version}</version>
+            <dependencies>
+              <dependency>
+                <groupId>org.apache.velocity</groupId>
+                <artifactId>velocity</artifactId>
+                <version>1.5</version>
+              </dependency>
+            </dependencies>
+            <configuration>
+              <locales>en</locales>
+              <reportPlugins>
+                <plugin>
+                  <groupId>org.apache.maven.plugins</groupId>
+                  <artifactId>maven-project-info-reports-plugin</artifactId>
+                  <version>2.4</version>
+                  <configuration>
+                    <dependencyDetailsEnabled>false</dependencyDetailsEnabled>
+                    <dependencyLocationsEnabled>false</dependencyLocationsEnabled>
+                    <anonymousConnection>scm:git:http://git.apache.org/any23.git</anonymousConnection>
+                    <developerConnection>scm:git:https://gitbox.apache.org/repos/asf/any23.git</developerConnection>
+                    <webAccessUrl>https://gitbox.apache.org/repos/asf/any23.git</webAccessUrl>
+                  </configuration>
+                  <reportSets>
+                    <reportSet>
+                      <reports>
+                        <report>index</report>
+                        <report>mailing-list</report>
+                        <report>project-team</report>
+                        <report>scm</report>
+                        <report>cim</report>
+                        <report>issue-tracking</report>
+                        <report>license</report>
+                      </reports>
+                    </reportSet>
+                  </reportSets>
+                </plugin>
+
+                <plugin>
+                  <groupId>org.apache.maven.plugins</groupId>
+                  <artifactId>maven-javadoc-plugin</artifactId>
+                  <version>${maven-javadoc-plugin.version}</version>
+                  <configuration>
+                    <source>8</source>
+                  </configuration>
+                  <reportSets>
+                    <reportSet>
+                      <reports>
+                        <report>aggregate</report>
+                      </reports>
+                    </reportSet>
+                  </reportSets>
+                </plugin>
+              </reportPlugins>
+            </configuration>
+          </plugin>
+
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-scm-publish-plugin</artifactId>
+            <version>1.0-beta-2</version>
+            <configuration>
+              <tryUpdate>true</tryUpdate>
+              <checkinComment>Apache Any23 site deployment</checkinComment>
+              <checkoutDirectory>${site.scmPubCheckoutDirectory}</checkoutDirectory>
+              <pubScmUrl>${site.deploymentBaseUrl}</pubScmUrl>
+              <content>${site.filePath}</content>
+            </configuration>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+
+    <profile>
+      <id>java-9-surefire</id>
+      <activation>
+        <jdk>9</jdk>
+      </activation>
+      <properties>
+        <surefire-extra-args>--add-modules java.xml.bind</surefire-extra-args>
+      </properties>
+    </profile>
+  </profiles>
+
+</project>
+