You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by le...@apache.org on 2017/08/23 20:26:58 UTC

[01/15] any23 git commit: ANY23-304 Add extractor for OpenIE

Repository: any23
Updated Branches:
  refs/heads/master 72035bf63 -> c40b7888b


http://git-wip-us.apache.org/repos/asf/any23/blob/2ecfbff1/test-resources/src/test/resources/org/apache/any23/extractor/openie/example-openie.html
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/org/apache/any23/extractor/openie/example-openie.html b/test-resources/src/test/resources/org/apache/any23/extractor/openie/example-openie.html
new file mode 100644
index 0000000..5211bd5
--- /dev/null
+++ b/test-resources/src/test/resources/org/apache/any23/extractor/openie/example-openie.html
@@ -0,0 +1,638 @@
+<!DOCTYPE html>
+<!--[if IEMobile 7]><html class="iem7"  lang="en" dir="ltr" class="no-js"><![endif]-->
+<!--[if lte IE 6]><html class="lt-ie9 lt-ie8 lt-ie7"  lang="en" dir="ltr" class="no-js"><![endif]-->
+<!--[if (IE 7)&(!IEMobile)]><html class="lt-ie9 lt-ie8"  lang="en" dir="ltr" class="no-js"><![endif]-->
+<!--[if IE 8]><html class="lt-ie9"  lang="en" dir="ltr" class="no-js"><![endif]-->
+<!--[if (gte IE 9)|(gt IEMobile 7)]><!--><html  lang="en" dir="ltr" class="no-js"><!--<![endif]-->
+
+<head profile="http://www.w3.org/1999/xhtml/vocab">
+	<meta charset="utf-8"></meta>
+<!--[if lt IE 9]><script src="sites/all/themes/podaac/js/html5shiv.js" />
+</script><![endif]--><link rel="shortcut icon" href="http://podaac.jpl.nasa.gov/sites/default/files/favicon.ico" type="image/vnd.microsoft.icon" />
+<link rel="canonical" href="/aquarius" />
+<meta name="Generator" content="Drupal 7 (http://drupal.org)"></meta>
+<link rel="shortlink" href="/node/406" />
+	<title>AQUARIUS | PO.DAAC</title>
+
+	<meta name="MobileOptimized" content="width"></meta>
+	<meta name="HandheldFriendly" content="true"></meta>
+	<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes"></meta>
+	<meta http-equiv="cleartype" content="on"></meta>
+
+	<style>
+@import url("http://podaac.jpl.nasa.gov/modules/system/system.base.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/modules/system/system.messages.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/modules/system/system.theme.css?ohzcdw");
+</style>
+<style>
+@import url("http://podaac.jpl.nasa.gov/sites/all/modules/contrib/jquery_update/replace/ui/themes/base/minified/jquery.ui.core.min.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/modules/contrib/jquery_update/replace/ui/themes/base/minified/jquery.ui.theme.min.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/modules/contrib/jquery_update/replace/ui/themes/base/minified/jquery.ui.menu.min.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/modules/contrib/jquery_update/replace/ui/themes/base/minified/jquery.ui.autocomplete.min.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/modules/contrib/jquery_update/replace/ui/themes/base/minified/jquery.ui.button.min.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/modules/contrib/jquery_update/replace/ui/themes/base/minified/jquery.ui.resizable.min.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/modules/contrib/jquery_update/replace/ui/themes/base/minified/jquery.ui.dialog.min.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/modules/contrib/jquery_update/replace/ui/themes/base/minified/jquery.ui.tabs.min.css?ohzcdw");
+</style>
+<style>
+@import url("http://podaac.jpl.nasa.gov/sites/all/modules/contrib/simplenews/simplenews.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/modules/aggregator/aggregator.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/modules/contrib/date/date_api/date.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/modules/contrib/date/date_popup/themes/datepicker.1.7.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/modules/field/theme/field.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/modules/contrib/menu_attach_block/menu_attach_block.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/modules/node/node.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/modules/custom/podaac_helper/css/podaac.sortable.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/modules/search/search.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/modules/user/user.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/modules/contrib/youtube/css/youtube.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/modules/contrib/views/css/views.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/modules/contrib/ckeditor/css/ckeditor.css?ohzcdw");
+</style>
+<style>
+@import url("http://podaac.jpl.nasa.gov/sites/all/modules/contrib/ctools/css/ctools.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/modules/custom/faceted_browse/resources/css/faceted-browse-table-dialog.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/modules/custom/faceted_browse/resources/css/faceted-browse-menu.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/modules/custom/podaac_helper/css/podaac.jpl-banner.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/modules/custom/podaac_helper/css/podaac.search.css?ohzcdw");
+</style>
+<style>
+@import url("http://podaac.jpl.nasa.gov/sites/all/themes/podaac/css/normalize.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/themes/podaac/css/tabs.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/themes/podaac/css/pages.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/themes/podaac/css/blocks.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/themes/podaac/css/navigation.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/themes/podaac/css/views-styles.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/themes/podaac/css/nodes.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/themes/podaac/css/comments.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/themes/podaac/css/forms.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/themes/podaac/css/fields.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/themes/podaac/css/content.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/themes/podaac/css/print.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/themes/podaac/css/front.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/themes/podaac/css/layouts/fixed-width.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/themes/podaac/css/layouts/responsive-header.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/themes/podaac/css/layouts/responsive-sidebars.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/themes/podaac/css/layouts/responsive-footer.css?ohzcdw");
+@import url("http://podaac.jpl.nasa.gov/sites/all/themes/podaac/css/tophat.css?ohzcdw");
+</style>
+	<script src="http://podaac.jpl.nasa.gov/sites/all/libraries/modernizr/modernizr.min.js?ohzcdw"></script>
+<script src="http://podaac.jpl.nasa.gov/sites/all/modules/contrib/jquery_update/replace/jquery/1.10/jquery.min.js?v=1.10.2"></script>
+<script src="http://podaac.jpl.nasa.gov/misc/jquery.once.js?v=1.2"></script>
+<script src="http://podaac.jpl.nasa.gov/misc/drupal.js?ohzcdw"></script>
+<script src="http://podaac.jpl.nasa.gov/sites/all/modules/contrib/jquery_update/replace/ui/ui/minified/jquery.ui.core.min.js?v=1.10.2"></script>
+<script src="http://podaac.jpl.nasa.gov/sites/all/modules/contrib/jquery_update/replace/ui/ui/minified/jquery.ui.widget.min.js?v=1.10.2"></script>
+<script src="http://podaac.jpl.nasa.gov/sites/all/modules/contrib/jquery_update/replace/ui/ui/minified/jquery.ui.effect.min.js?v=1.10.2"></script>
+<script src="http://podaac.jpl.nasa.gov/sites/all/modules/contrib/jquery_update/replace/ui/ui/minified/jquery.ui.position.min.js?v=1.10.2"></script>
+<script src="http://podaac.jpl.nasa.gov/sites/all/modules/contrib/jquery_update/replace/ui/ui/minified/jquery.ui.menu.min.js?v=1.10.2"></script>
+<script src="http://podaac.jpl.nasa.gov/sites/all/modules/contrib/jquery_update/replace/ui/ui/minified/jquery.ui.autocomplete.min.js?v=1.10.2"></script>
+<script src="http://podaac.jpl.nasa.gov/sites/all/modules/contrib/jquery_update/replace/ui/ui/minified/jquery.ui.effect-slide.min.js?v=1.10.2"></script>
+<script src="http://podaac.jpl.nasa.gov/sites/all/modules/contrib/jquery_update/replace/ui/ui/minified/jquery.ui.button.min.js?v=1.10.2"></script>
+<script src="http://podaac.jpl.nasa.gov/sites/all/modules/contrib/jquery_update/replace/ui/ui/minified/jquery.ui.mouse.min.js?v=1.10.2"></script>
+<script src="http://podaac.jpl.nasa.gov/sites/all/modules/contrib/jquery_update/replace/ui/ui/minified/jquery.ui.draggable.min.js?v=1.10.2"></script>
+<script src="http://podaac.jpl.nasa.gov/sites/all/modules/contrib/jquery_update/replace/ui/ui/minified/jquery.ui.resizable.min.js?v=1.10.2"></script>
+<script src="http://podaac.jpl.nasa.gov/sites/all/modules/contrib/jquery_update/replace/ui/ui/minified/jquery.ui.dialog.min.js?v=1.10.2"></script>
+<script src="http://podaac.jpl.nasa.gov/sites/all/modules/contrib/jquery_update/replace/ui/ui/minified/jquery.ui.tabs.min.js?v=1.10.2"></script>
+<script src="http://podaac.jpl.nasa.gov/sites/all/modules/custom/podaac_helper/js/podaac.sortable.js?ohzcdw"></script>
+<script src="http://podaac.jpl.nasa.gov/sites/all/modules/custom/captcha/captcha.js?ohzcdw"></script>
+<script src="http://podaac.jpl.nasa.gov/sites/all/modules/custom/faceted_browse/resources/js/jquery/jquery.tablesorter.js?ohzcdw"></script>
+<script src="http://podaac.jpl.nasa.gov/sites/all/modules/custom/faceted_browse/resources/js/faceted-browse-autocomplete.js?ohzcdw"></script>
+<script src="http://podaac.jpl.nasa.gov/sites/all/modules/custom/faceted_browse/resources/js/faceted-browse-menu.js?ohzcdw"></script>
+<script src="http://podaac.jpl.nasa.gov/sites/all/modules/custom/faceted_browse/resources/js/faceted-browse-table-dialog.js?ohzcdw"></script>
+<script src="http://podaac.jpl.nasa.gov/sites/all/modules/custom/podaac_helper/js/podaac.search.js?ohzcdw"></script>
+<script src="http://podaac.jpl.nasa.gov/sites/all/themes/podaac/js/navigation.js?ohzcdw"></script>
+<script src="http://podaac.jpl.nasa.gov/sites/all/themes/podaac/js/podaac.js?ohzcdw"></script>
+<script>jQuery.extend(Drupal.settings, {"basePath":"\/","pathPrefix":"","ajaxPageState":{"theme":"podaac","theme_token":"zS22V5NLa5jHbmp7exru4sNS9tcLbNcEcmJG9wOmIVU","css":{"modules\/system\/system.base.css":1,"modules\/system\/system.menus.css":1,"modules\/system\/system.messages.css":1,"modules\/system\/system.theme.css":1,"misc\/ui\/jquery.ui.core.css":1,"misc\/ui\/jquery.ui.theme.css":1,"misc\/ui\/jquery.ui.menu.css":1,"misc\/ui\/jquery.ui.autocomplete.css":1,"misc\/ui\/jquery.ui.button.css":1,"misc\/ui\/jquery.ui.resizable.css":1,"misc\/ui\/jquery.ui.dialog.css":1,"misc\/ui\/jquery.ui.tabs.css":1,"sites\/all\/modules\/contrib\/simplenews\/simplenews.css":1,"modules\/aggregator\/aggregator.css":1,"sites\/all\/modules\/contrib\/date\/date_api\/date.css":1,"sites\/all\/modules\/contrib\/date\/date_popup\/themes\/datepicker.1.7.css":1,"modules\/field\/theme\/field.css":1,"sites\/all\/modules\/contrib\/menu_attach_block\/menu_attach_block.css":1,"modules\/node\/node.css":1,"sites\/a
 ll\/modules\/custom\/podaac_helper\/css\/podaac.sortable.css":1,"modules\/search\/search.css":1,"modules\/user\/user.css":1,"sites\/all\/modules\/contrib\/youtube\/css\/youtube.css":1,"sites\/all\/modules\/contrib\/views\/css\/views.css":1,"sites\/all\/modules\/contrib\/ckeditor\/css\/ckeditor.css":1,"sites\/all\/modules\/contrib\/ctools\/css\/ctools.css":1,"sites\/all\/modules\/custom\/faceted_browse\/resources\/css\/faceted-browse-table-dialog.css":1,"sites\/all\/modules\/custom\/faceted_browse\/resources\/css\/faceted-browse-menu.css":1,"sites\/all\/modules\/custom\/podaac_helper\/css\/podaac.jpl-banner.css":1,"sites\/all\/modules\/custom\/podaac_helper\/css\/podaac.search.css":1,"sites\/all\/themes\/podaac\/system.menus.css":1,"sites\/all\/themes\/podaac\/css\/normalize.css":1,"sites\/all\/themes\/podaac\/css\/wireframes.css":1,"sites\/all\/themes\/podaac\/css\/page-backgrounds.css":1,"sites\/all\/themes\/podaac\/css\/tabs.css":1,"sites\/all\/themes\/podaac\/css\/pages.css":1,"s
 ites\/all\/themes\/podaac\/css\/blocks.css":1,"sites\/all\/themes\/podaac\/css\/navigation.css":1,"sites\/all\/themes\/podaac\/css\/views-styles.css":1,"sites\/all\/themes\/podaac\/css\/nodes.css":1,"sites\/all\/themes\/podaac\/css\/comments.css":1,"sites\/all\/themes\/podaac\/css\/forms.css":1,"sites\/all\/themes\/podaac\/css\/fields.css":1,"sites\/all\/themes\/podaac\/css\/content.css":1,"sites\/all\/themes\/podaac\/css\/print.css":1,"sites\/all\/themes\/podaac\/css\/front.css":1,"sites\/all\/themes\/podaac\/css\/layouts\/fixed-width.css":1,"sites\/all\/themes\/podaac\/css\/layouts\/responsive-header.css":1,"sites\/all\/themes\/podaac\/css\/layouts\/responsive-sidebars.css":1,"sites\/all\/themes\/podaac\/css\/layouts\/responsive-footer.css":1,"sites\/all\/themes\/podaac\/css\/tophat.css":1},"js":{"sites\/all\/libraries\/modernizr\/modernizr.min.js":1,"sites\/all\/modules\/contrib\/jquery_update\/replace\/jquery\/1.10\/jquery.min.js":1,"misc\/jquery.once.js":1,"misc\/drupal.js":1,"
 sites\/all\/modules\/contrib\/jquery_update\/replace\/ui\/ui\/minified\/jquery.ui.core.min.js":1,"sites\/all\/modules\/contrib\/jquery_update\/replace\/ui\/ui\/minified\/jquery.ui.widget.min.js":1,"sites\/all\/modules\/contrib\/jquery_update\/replace\/ui\/ui\/minified\/jquery.ui.effect.min.js":1,"sites\/all\/modules\/contrib\/jquery_update\/replace\/ui\/ui\/minified\/jquery.ui.position.min.js":1,"sites\/all\/modules\/contrib\/jquery_update\/replace\/ui\/ui\/minified\/jquery.ui.menu.min.js":1,"sites\/all\/modules\/contrib\/jquery_update\/replace\/ui\/ui\/minified\/jquery.ui.autocomplete.min.js":1,"sites\/all\/modules\/contrib\/jquery_update\/replace\/ui\/ui\/minified\/jquery.ui.effect-slide.min.js":1,"sites\/all\/modules\/contrib\/jquery_update\/replace\/ui\/ui\/minified\/jquery.ui.button.min.js":1,"sites\/all\/modules\/contrib\/jquery_update\/replace\/ui\/ui\/minified\/jquery.ui.mouse.min.js":1,"sites\/all\/modules\/contrib\/jquery_update\/replace\/ui\/ui\/minified\/jquery.ui.dragga
 ble.min.js":1,"sites\/all\/modules\/contrib\/jquery_update\/replace\/ui\/ui\/minified\/jquery.ui.resizable.min.js":1,"sites\/all\/modules\/contrib\/jquery_update\/replace\/ui\/ui\/minified\/jquery.ui.dialog.min.js":1,"sites\/all\/modules\/contrib\/jquery_update\/replace\/ui\/ui\/minified\/jquery.ui.tabs.min.js":1,"sites\/all\/modules\/custom\/podaac_helper\/js\/podaac.sortable.js":1,"sites\/all\/modules\/custom\/captcha\/captcha.js":1,"sites\/all\/modules\/custom\/faceted_browse\/resources\/js\/jquery\/jquery.tablesorter.js":1,"sites\/all\/modules\/custom\/faceted_browse\/resources\/js\/faceted-browse-autocomplete.js":1,"0":1,"sites\/all\/modules\/custom\/faceted_browse\/resources\/js\/faceted-browse-menu.js":1,"sites\/all\/modules\/custom\/faceted_browse\/resources\/js\/faceted-browse-table-dialog.js":1,"sites\/all\/modules\/custom\/podaac_helper\/js\/podaac.search.js":1,"sites\/all\/themes\/podaac\/js\/navigation.js":1,"sites\/all\/themes\/podaac\/js\/podaac.js":1}},"urlIsAjaxTrus
 ted":{"\/aquarius":true},"PODAAC":{"autocompleteuri":"\/api\/autocomplete"},"podaac_search":{"default_text":"Search"}});</script>
+</head>
+<body class="html not-front not-logged-in one-sidebar sidebar-second page-node page-node- page-node-406 node-type-mission section-aquarius" >
+		
+<div id="eosdis-tophat-container"></div>
+<div id="page">
+  <div id="header-container">
+    <header id="header" role="banner">
+  
+        <div class="region region-header">
+    <div id="block-podaac-helper-podaac-jpl-banner" class="block block-podaac-helper first odd">
+
+  <div class="block-content">
+              
+    <div id="JPL-banner">
+  
+  <div id="JPL-home">
+    <a href="http://www.nasa.gov" id="JPL-NASA-link"></a>    
+    <a href="http://www.jpl.nasa.gov" id="JPL-home-link">Jet Propulsion Laboratory</a>    
+    <a href="http://www.caltech.edu/" id="JPL-caltech-link">California Institute of Technology</a>  </div>
+  
+  <div id="JPL-link-container">
+    <ul id="JPL-links"><li class="jpl-home first"><a href="http://www.jpl.nasa.gov/index.cfm">JPL Home</a></li>
+<li class="jpl-earth"><a href="http://www.jpl.nasa.gov/earth/index.cfm">Earth</a></li>
+<li class="jpl-solar-system"><a href="http://www.jpl.nasa.gov/solar-system/index.cfm">Solar System</a></li>
+<li class="jpl-stars-galaxy"><a href="http://www.jpl.nasa.gov/stars-galaxies/index.cfm">Stars &amp; Galaxies</a></li>
+<li class="jpl-science-tech last"><a href="http://scienceandtechnology.jpl.nasa.gov/">Science &amp; Technology</a></li>
+</ul>    
+    <div id="JPL-icon-container">
+      <span id="JPL-icon-slogan">Bring the Universe to You</span>
+      
+      <ul id="JPL-icons"><li class="jpl-mail first"><a href="http://jpl.kintera.org/signup/" class="jpl-icon jpl-email-news-icon"></a></li>
+<li class="jpl-facebook"><a href="http://www.facebook.com/NASAJPL" class="jpl-icon jpl-facebook-icon"></a></li>
+<li class="jpl-twitter"><a href="http://twitter.com/NASAJPL" class="jpl-icon jpl-twitter-icon"></a></li>
+<li class="jpl-gplus"><a href="http://plus.google.com/113315419190905475766/posts" class="jpl-icon jpl-gplus-icon"></a></li>
+<li class="jpl-youtube"><a href="http://www.youtube.com/user/JPLnews" class="jpl-icon jpl-youtube-icon"></a></li>
+<li class="jpl-itunes"><a href="http://itunes.apple.com/podcast/hd-nasas-jet-propulsion-laboratory/id262254981" class="jpl-icon jpl-itunes-icon"></a></li>
+<li class="jpl-ustream"><a href="http://www.ustream.tv/nasajpl2" class="jpl-icon jpl-ustream-icon"></a></li>
+<li class="jpl-rss"><a href="http://www.jpl.nasa.gov/rss/" class="jpl-icon jpl-rss-icon"></a></li>
+<li class="jpl-blog"><a href="http://blogs.jpl.nasa.gov/" class="jpl-icon jpl-blog-icon"></a></li>
+<li class="jpl-mobile last"><a href="http://www.jpl.nasa.gov/onthego/" class="jpl-icon jpl-mobile-icon"></a></li>
+</ul>    </div>
+  </div>
+  <div class="clearfix podaac-clear"></div>
+  
+</div><!-- /.JPL-banner -->
+
+  </div>
+
+</div><!-- /.block -->
+<div id="block-podaac-helper-podaac-search" class="block block-podaac-helper even">
+
+  <div class="block-content">
+              
+    <form action="http://podaac.jpl.nasa.gov/helper/search" method="GET" id="podaac-search-form" accept-charset="UTF-8"><div><div class="form-item form-type-select form-item-type">
+  <label class="element-invisible" for="edit-type">Search Type </label>
+ <select class="not-chosen form-select" id="edit-type" name="type"><option value="website">Website</option><option value="dataset" selected="selected">Data</option></select>
+</div>
+<div class="form-item form-type-textfield form-item-search">
+  <label class="element-invisible" for="edit-search">Search </label>
+ <input title="Enter the terms you wish to search for." type="text" id="edit-search" name="search" value="Search" size="15" maxlength="128" class="form-text" /> <span class="field-suffix"><a href="/DatasetSearchHelp" id="podaac-search-help"><img src="http://podaac.jpl.nasa.gov/sites/all/modules/custom/podaac_helper/images/search-help.png" width="15" height="15" alt="" /></a></span>
+</div>
+</div></form>  </div>
+
+</div><!-- /.block -->
+<div id="block-block-14" class="block block-block last odd">
+
+  <div class="block-content">
+              
+    
+<!-- <div class="messages error" style="background: #fef5f1; color: #8c2e0b; border: 1px solid #ed541d; border-radius: 7px;">
+<h3 class="rtecenter">PO.DAAC Scheduled Hardware Maintenance on Thursday, February 16, 2017, from 2pm to 4pm (PST).</h3>
+
+<p class="rtecenter">PO.DAAC HiTIDE, LAS and MCC Tools will be temporarily unavailable during the scheduled maintenance window. We apologize for the inconvenience.</p>
+</div>
+-->
+
+<!-- OLD EXAMPLE <div class="messages error" style="background: #fef5f1; color: #8c2e0b; border: 1px solid #ed541d; border-radius: 7px;">
+<h3 class="rtecenter">JPL Scheduled Network Maintenance</h3>
+
+<p class="rtecenter"><font><span style="font-size:8pt">Please be informed that PO.DAAC public services may experienced intermittent downtime during the JPL Scheduled Network Maintenance window this Wednesday, June 15, 2016 ( 4am to 7am).</span></font></p>
+<p>All PO.DAAC public services (FTP, SecureFTP, OPeNDAP, THREDDS, Web Services, SOTO, HiTIDE, ...) will be unavailable and all data stream processing will be disrupted during the Scheduled Hardware Maintenance window.&nbsp;&nbsp; PO.DAAC Web Portal will be available for informational content access only (no data access).&nbsp;&nbsp; Users are encouraged to halt any automated processing software for data access on that day.&nbsp;&nbsp; We apologize for the inconvenience.</p>
+</div>
+#fefff1
+<div class="messages error" style="background: #fef5f1; color: #8c2e0b; border: 1px solid #ed541d; border-radius: 7px;">
+<h3 class="rtecenter">PO.DAAC Dataset Discovery is temporarily unavailable due to maintenance. We apologize for the inconvenience.</h3>
+</div>
+-->  </div>
+
+</div><!-- /.block -->
+  </div><!-- /.region -->
+  
+              <hgroup id="name-and-slogan">
+                      <a href="/" title="Home" rel="home" id="logo"><img src="http://podaac.jpl.nasa.gov/sites/all/themes/podaac/logo.png" alt="Home" /></a>
+                    
+                      <h1 id="site-name">
+              <a href="/" title="Home" rel="home"><span>PO.DAAC</span></a>
+            </h1>
+            
+                  </hgroup><!-- /#name-and-slogan -->
+        
+            
+      <div id="header-satellite"></div>
+      
+      <div id="podaac-follow-us">
+        <span id="podaac-follow-us-trigger">Follow Us</span>
+        <ul id="podaac-follow-us-links">
+          <li><a href="https://www.facebook.com/podaac" class="podaac-follow-us-icon" id="podaac-facebook" target="_blank" title="PODAAC Facebook"></a></li>
+          <li><a href="http://www.youtube.com/user/NASAJPLPODAAC" class="podaac-follow-us-icon" id="podaac-youtube" target="_blank" title="PODAAC YouTube"></a></li>
+          <li><a href="https://twitter.com/podaac" class="podaac-follow-us-icon" id="podaac-twitter" target="_blank" title="PODAAC Twitter"></a></li>
+        </ul>
+      </div>
+    </header>
+    
+    <div id="header-banner"></div>
+  </div>
+
+  <div id="main-container">
+    <div id="content-backdrop">
+      <div id="backdrop">
+        <div id="backdrop-gradient"></div>
+      </div>
+    </div>
+    
+    <div id="main">
+    <!--[if lt IE 8]>
+      <div id="browser-compatibility">
+        You are viewing PODAAC in an incompatible browser and may not have full usability of the site; 
+        to view the list of compatible browsers please visit our <a href="/browser-compatibility">FAQ</a>      </div>
+    <![endif]-->
+  
+      <div id="navigation">
+  
+          
+          <div class="region region-navigation">
+    <div id="block-menu-block-1" class="block block-menu-block first last odd" role="navigation">
+
+  <div class="block-content">
+              <h2 class="block-title">Navigation</h2>
+          
+    <div class="menu-block-wrapper menu-block-1 menu-name-menu-podaac-nav parent-mlid-0 menu-level-1">
+  <ul class="menu"><li class="first leaf menu-mlid-298"><a href="http://podaac.jpl.nasa.gov/" title="">Home</a></li>
+<li class="leaf menu-mlid-299"><a href="http://podaac.jpl.nasa.gov/datasetlist" title="Dataset Discovery">Dataset Discovery</a></li>
+<li class="expanded menu-mlid-349"><a href="http://podaac.jpl.nasa.gov/dataaccess" title="Data Access">Data Access</a><ul class="menu"><li class="first leaf menu-mlid-497"><a href="ftp://podaac-ftp.jpl.nasa.gov/" title="File Transfer Protocol (FTP)
+">FTP</a></li>
+<li class="leaf menu-mlid-380"><a href="http://opendap.jpl.nasa.gov/opendap/" title="Open-source Project for a Network Data Access Protocol (OPeNDAP)">OPeNDAP</a></li>
+<li class="leaf menu-mlid-729"><a href="http://thredds.jpl.nasa.gov" title="Thematic Real-time Environmental Distributed Data Services (THREDDS) Data Server">THREDDS</a></li>
+<li class="leaf menu-mlid-3212"><a href="http://podaac.jpl.nasa.gov/ws" title="PO.DAAC Web Services">Web Services</a></li>
+<li class="leaf menu-mlid-718"><a href="http://podaac-tools.jpl.nasa.gov/soto-2d/soto.html" title="SOTO (State of the Ocean)">SOTO (State of the Ocean)</a></li>
+<li class="leaf menu-mlid-987"><a href="http://podaac-tools.jpl.nasa.gov/hitide" title="High-level Tool for Interactive Data Extraction/L2 Subsetting Services (HiTIDE/L2SS)">HiTIDE (L2 Subsetting)</a></li>
+<li class="leaf menu-mlid-379"><a href="http://podaac-tools.jpl.nasa.gov/las" title="Live Access Server (LAS) ">LAS (L3 Subsetting)</a></li>
+<li class="leaf menu-mlid-721"><a href="http://podaac.jpl.nasa.gov/podaac_labs" title="New Ideas, Prototypes and Tools">PO.DAAC Labs</a></li>
+<li class="expanded menu-mlid-3213"><a href="http://podaac.jpl.nasa.gov/podaac_uat" title="PO.DAAC User Acceptance Tools">PO.DAAC UAT</a><ul class="menu"><li class="first leaf menu-mlid-3813"><a href="http://podaac-uat.jpl.nasa.gov/soto" title="">UAT SOTO (State of the Ocean)</a></li>
+<li class="leaf menu-mlid-3731"><a href="http://podaac-uat.jpl.nasa.gov/mcc" title="Online tool and web service designed to check and validate the contents of netCDF and HDF granules for the Climate and Forecast (CF) and Attribute Convention for Dataset Discovery (ACDD) metadata conventions. ">UAT Metadata Compliance Checker (MCC)</a></li>
+<li class="last leaf menu-mlid-3732"><a href="http://podaac-uat.jpl.nasa.gov/drive" title="FTP alternative way of browsing data at PO.DAAC">UAT PO.DAAC Drive</a></li>
+</ul></li>
+<li class="last expanded menu-mlid-3274"><span title="Earth Observing System Data and Information System (EOSDIS) Tools" class="nolink">EOSDIS Tools</span><ul class="menu"><li class="first leaf menu-mlid-498"><a href="http://reverb.echo.nasa.gov/reverb/" title="NASA Metadata and Service Discovery Tool">Reverb</a></li>
+<li class="last leaf menu-mlid-499"><a href="http://gcmd.nasa.gov/" title="Global Change Master Directory (GCMD)">GCMD</a></li>
+</ul></li>
+</ul></li>
+<li class="expanded menu-mlid-546"><a href="http://podaac.jpl.nasa.gov/CoreMeasurements" title="Core Measurements Description">Measurements</a><ul class="menu"><li class="first expanded menu-mlid-862"><a href="http://podaac.jpl.nasa.gov/gravity" title="Gravity">Gravity</a><ul class="menu"><li class="first last leaf menu-mlid-2023"><a href="http://podaac.jpl.nasa.gov/GRACE" title="GRACE">GRACE</a></li>
+</ul></li>
+<li class="expanded menu-mlid-378"><a href="http://podaac.jpl.nasa.gov/SeaSurfaceSalinity" title="Sea Surface Salinity">Sea Surface Salinity</a><ul class="menu"><li class="first leaf active menu-mlid-2027"><a href="http://podaac.jpl.nasa.gov/aquarius" title="Aquarius" class="active">AQUARIUS</a></li>
+<li class="leaf menu-mlid-3806"><a href="http://podaac.jpl.nasa.gov/SMAP" title="Soil Moisture Active Passive (SMAP)">SMAP</a></li>
+<li class="last leaf menu-mlid-3277"><a href="http://podaac.jpl.nasa.gov/SPURS" title="">SPURS</a></li>
+</ul></li>
+<li class="expanded menu-mlid-373"><a href="http://podaac.jpl.nasa.gov/SeaSurfaceTemperature" title="Sea Surface Temperature (SST)">Sea Surface Temperature (SST)</a><ul class="menu"><li class="first leaf menu-mlid-2026"><a href="http://podaac.jpl.nasa.gov/AQUA" title="">AQUA</a></li>
+<li class="leaf menu-mlid-700"><a href="http://podaac.jpl.nasa.gov/AVHRR-Pathfinder" title="AVHRR-Pathfinder  (Sea Surface Temperature)">AVHRR-Pathfinder</a></li>
+<li class="leaf menu-mlid-701"><a href="http://podaac.jpl.nasa.gov/GHRSST" title="GHRSST  (Sea Surface Temperature)">GHRSST</a></li>
+<li class="leaf menu-mlid-2031"><a href="http://podaac.jpl.nasa.gov/MODIS" title="MODIS">MODIS</a></li>
+<li class="last leaf menu-mlid-2019"><a href="http://podaac.jpl.nasa.gov/Terra" title="Terra">TERRA</a></li>
+</ul></li>
+<li class="leaf menu-mlid-375"><a href="http://podaac.jpl.nasa.gov/OceanCurrentsCirculation" title="Ocean Currents &amp; Circulation">Ocean Currents &amp; Circulation</a></li>
+<li class="expanded menu-mlid-863"><a href="http://podaac.jpl.nasa.gov/OceanSurfaceTopography" title="Ocean Surface Topography">Ocean Surface Topography</a><ul class="menu"><li class="first leaf menu-mlid-934"><a href="http://podaac.jpl.nasa.gov/JASON1">JASON 1</a></li>
+<li class="leaf menu-mlid-500"><a href="http://podaac.jpl.nasa.gov/OSTM-JASON2" title="OSTM - JASON 2">OSTM - JASON 2</a></li>
+<li class="leaf menu-mlid-3729"><a href="http://podaac.jpl.nasa.gov/JASON3" title="">JASON 3</a></li>
+<li class="leaf menu-mlid-2035"><a href="http://podaac.jpl.nasa.gov/SeaSAT" title="Seasat">Seasat</a></li>
+<li class="leaf menu-mlid-2039"><a href="http://podaac.jpl.nasa.gov/TOPEX-POSEIDON" title="TOPEX-POSEIDON">TOPEX-POSEIDON</a></li>
+<li class="last leaf menu-mlid-2042"><a href="http://podaac.jpl.nasa.gov/GEOS-3" title="">GEOS-3</a></li>
+</ul></li>
+<li class="expanded menu-mlid-377"><a href="http://podaac.jpl.nasa.gov/OceanWind" title="Ocean Wind">Ocean Wind</a><ul class="menu"><li class="first leaf menu-mlid-2379"><a href="http://podaac.jpl.nasa.gov/ADEOS-II" title="">ADEOS-II</a></li>
+<li class="leaf menu-mlid-2025"><a href="http://podaac.jpl.nasa.gov/AQUA">AQUA</a></li>
+<li class="leaf menu-mlid-3799"><a href="http://podaac.jpl.nasa.gov/CYGNSS" title="CYGNSS">CYGNSS</a></li>
+<li class="leaf menu-mlid-3795"><a href="http://podaac.jpl.nasa.gov/ISS-RapidScat">ISS-RapidScat</a></li>
+<li class="leaf menu-mlid-2029"><a href="http://podaac.jpl.nasa.gov/MEaSUREs" title="MEaSUREs">MEaSUREs</a></li>
+<li class="leaf menu-mlid-668"><a href="http://podaac.jpl.nasa.gov/MetOp" title="MetOp">MetOp</a></li>
+<li class="leaf menu-mlid-669"><a href="http://podaac.jpl.nasa.gov/NSCAT" title="NSCAT">NSCAT</a></li>
+<li class="leaf menu-mlid-2034"><a href="http://podaac.jpl.nasa.gov/QuikSCAT" title="QuikSCAT">QuikSCAT</a></li>
+<li class="leaf menu-mlid-2036"><a href="http://podaac.jpl.nasa.gov/SeaSAT" title="">Seasat</a></li>
+<li class="last leaf menu-mlid-2038"><a href="http://podaac.jpl.nasa.gov/SSMI" title="SSM/I (Ocean Wind)">SSM/I</a></li>
+</ul></li>
+<li class="last expanded menu-mlid-873"><a href="http://podaac.jpl.nasa.gov/SeaIce" title="Sea Ice">Sea Ice</a><ul class="menu"><li class="first leaf menu-mlid-2024"><a href="http://podaac.jpl.nasa.gov/ADEOS-II">ADEOS-II</a></li>
+<li class="leaf menu-mlid-1675"><a href="http://podaac.jpl.nasa.gov/NSCAT" title="">NSCAT</a></li>
+<li class="leaf menu-mlid-2018"><a href="http://podaac.jpl.nasa.gov/QuikSCAT" title="QuikSCAT">QuikSCAT</a></li>
+<li class="last leaf menu-mlid-2381"><a href="http://podaac.jpl.nasa.gov/SeaSAT" title="">Seasat</a></li>
+</ul></li>
+</ul></li>
+<li class="expanded active-trail menu-mlid-699"><a href="http://podaac.jpl.nasa.gov/missions" title="PO.DAAC Related Missions" class="active-trail">Missions</a><ul class="menu"><li class="first expanded active-trail menu-mlid-2393"><span title="" class="active-trail nolink">NASA Missions</span><ul class="menu"><li class="first leaf menu-mlid-2382"><a href="http://podaac.jpl.nasa.gov/ADEOS-II" title="">ADEOS-II</a></li>
+<li class="leaf menu-mlid-2383"><a href="http://podaac.jpl.nasa.gov/AQUA" title="">AQUA</a></li>
+<li class="leaf active-trail active menu-mlid-2384"><a href="http://podaac.jpl.nasa.gov/aquarius" title="" class="active-trail active">AQUARIUS</a></li>
+<li class="leaf menu-mlid-3800"><a href="http://podaac.jpl.nasa.gov/CYGNSS" title="CYGNSS">CYGNSS</a></li>
+<li class="leaf menu-mlid-2021"><a href="http://podaac.jpl.nasa.gov/GEOS-3" title="GEOS-3">GEOS-3</a></li>
+<li class="leaf menu-mlid-2022"><a href="http://podaac.jpl.nasa.gov/GHRSST" title="GHRSST">GHRSST</a></li>
+<li class="leaf menu-mlid-2385"><a href="http://podaac.jpl.nasa.gov/GRACE" title="">GRACE</a></li>
+<li class="leaf menu-mlid-3797"><a href="http://podaac.jpl.nasa.gov/ISS-RapidScat" title="">ISS-RapidScat</a></li>
+<li class="leaf menu-mlid-2028"><a href="http://podaac.jpl.nasa.gov/JASON1" title="JASON 1">JASON 1</a></li>
+<li class="leaf menu-mlid-3730"><a href="http://podaac.jpl.nasa.gov/JASON3" title="">JASON 3</a></li>
+<li class="leaf menu-mlid-2386"><a href="http://podaac.jpl.nasa.gov/MEaSUREs" title="">MEaSUREs</a></li>
+<li class="leaf menu-mlid-2387"><a href="http://podaac.jpl.nasa.gov/MODIS" title="">MODIS</a></li>
+<li class="leaf menu-mlid-2032"><a href="http://podaac.jpl.nasa.gov/NSCAT" title="NSCAT (Ocean Wind)">NSCAT</a></li>
+<li class="leaf menu-mlid-2033"><a href="http://podaac.jpl.nasa.gov/OSTM-JASON2" title="OSTM-JASON 2 (Ocean Surface Topography)">OSTM - JASON 2</a></li>
+<li class="leaf menu-mlid-2380"><a href="http://podaac.jpl.nasa.gov/QuikSCAT" title="">QuikSCAT</a></li>
+<li class="leaf menu-mlid-2389"><a href="http://podaac.jpl.nasa.gov/SeaSAT" title="">Seasat</a></li>
+<li class="leaf menu-mlid-3807"><a href="http://podaac.jpl.nasa.gov/SMAP" title="">SMAP</a></li>
+<li class="leaf menu-mlid-3276"><a href="http://podaac.jpl.nasa.gov/SPURS">SPURS</a></li>
+<li class="leaf menu-mlid-2390"><a href="http://podaac.jpl.nasa.gov/Terra" title="">TERRA</a></li>
+<li class="last leaf menu-mlid-2040"><a href="http://podaac.jpl.nasa.gov/TOPEX-POSEIDON" title="">TOPEX-POSEIDON</a></li>
+</ul></li>
+<li class="last expanded menu-mlid-2394"><span title="" class="nolink">Non-NASA Missions</span><ul class="menu"><li class="first leaf menu-mlid-2020"><a href="http://podaac.jpl.nasa.gov/AVHRR-Pathfinder" title="AVHRR-Pathfinder">AVHRR-Pathfinder</a></li>
+<li class="leaf menu-mlid-2030"><a href="http://podaac.jpl.nasa.gov/MetOp" title="MetOp">MetOp</a></li>
+<li class="last leaf menu-mlid-2037"><a href="http://podaac.jpl.nasa.gov/SSMI" title="SSM/I (Ocean Wind)">SSM/I</a></li>
+</ul></li>
+</ul></li>
+<li class="expanded menu-mlid-351"><a href="http://podaac.jpl.nasa.gov/AnimationsImages" title="Ocean Stories, Dataset Highlights, Images and Animations">Multimedia</a><ul class="menu"><li class="first collapsed menu-mlid-2348"><a href="http://podaac.jpl.nasa.gov/OceanEvents" title="Ocean Stories Highlighting PO.DAAC Datasets">Ocean Stories</a></li>
+<li class="leaf menu-mlid-469"><a href="http://podaac.jpl.nasa.gov/highlights" title="Dataset Highlights">Dataset Highlights</a></li>
+<li class="leaf menu-mlid-2064"><a href="http://podaac.jpl.nasa.gov/AnimationsImages/Animations" title="Animations">Animations</a></li>
+<li class="last leaf menu-mlid-2391"><a href="http://podaac.jpl.nasa.gov/AnimationsImages/Images" title="Images">Images</a></li>
+</ul></li>
+<li class="expanded menu-mlid-352"><a href="http://podaac.jpl.nasa.gov/UserCommunity" title="PO.DAAC Information Relevant to the User Community">Community</a><ul class="menu"><li class="first leaf menu-mlid-476"><a href="http://podaac.jpl.nasa.gov/announcements" title="PO.DAAC Related Announcements">Announcements</a></li>
+<li class="leaf menu-mlid-437"><a href="http://podaac.jpl.nasa.gov/events" title="Events in the Oceanography Community">Events</a></li>
+<li class="leaf menu-mlid-2536"><a href="http://podaac.jpl.nasa.gov/system-alerts" title="System Alerts">System Alerts</a></li>
+<li class="collapsed menu-mlid-2551"><a href="http://podaac.jpl.nasa.gov/spotlight" title="PO.DAAC Activities in the Oceanography Community">Spotlight</a></li>
+<li class="leaf menu-mlid-2699"><a href="http://podaac.jpl.nasa.gov/PO.DAAC_DataManagementPractices" title="Data Best Practices">Data Best Practices</a></li>
+<li class="leaf menu-mlid-493"><a href="http://podaac.jpl.nasa.gov/education" title="Education">Education</a></li>
+<li class="leaf menu-mlid-494"><a href="http://podaac.jpl.nasa.gov/MailingList" title="Mailing List">Mailing List</a></li>
+<li class="leaf menu-mlid-495"><a href="http://podaac.jpl.nasa.gov/UserWorkingGroup" title="User Working Group (UWG)">User Working Group (UWG)</a></li>
+<li class="last leaf menu-mlid-3808"><a href="http://podaac.jpl.nasa.gov/MeetingsandWorkshops" title="Meetings and Workshops">Meetings and Workshops</a></li>
+</ul></li>
+<li class="leaf menu-mlid-717"><a href="http://podaac.jpl.nasa.gov/forum" title="PO.DAAC Forum">Forum</a></li>
+<li class="last expanded menu-mlid-354"><a href="http://podaac.jpl.nasa.gov/help" title="Additional PO.DAAC Information">About</a><ul class="menu"><li class="first leaf menu-mlid-391"><a href="http://podaac.jpl.nasa.gov/AboutPodaac" title="About Us">About Us</a></li>
+<li class="leaf menu-mlid-485"><a href="http://podaac.jpl.nasa.gov/Acronyms" title="Acronyms">Acronyms</a></li>
+<li class="leaf menu-mlid-383"><a href="http://podaac.jpl.nasa.gov/Glossary" title="Glossary">Glossary</a></li>
+<li class="leaf menu-mlid-537"><a href="http://podaac.jpl.nasa.gov/DataTerminologyandFormats" title="Data Terminology">Data Terminology</a></li>
+<li class="leaf menu-mlid-488"><a href="http://podaac.jpl.nasa.gov/CitingPODAAC" title="PO.DAAC Data Citation and Acknowledgements">Data Citation</a></li>
+<li class="last leaf menu-mlid-3146"><a href="http://podaac.jpl.nasa.gov/metrics" title="PO.DAAC Metrics">PO.DAAC Metrics</a></li>
+</ul></li>
+</ul></div>
+  </div>
+
+</div><!-- /.block -->
+  </div><!-- /.region -->
+      
+        <div class="clearfix"></div>
+  
+      </div><!-- /#navigation -->
+      
+      <div id="content-container">
+        
+              
+        <div id="content-container">
+          <div class="region-content">
+            <div id="content" class="column" role="main">
+                                          <a id="main-content"></a>
+                                            <h1 class="title" id="page-title">AQUARIUS</h1>
+                                                                                    
+
+
+<article class="node-406 node node-mission view-mode-full clearfix">
+
+  
+  <div class="field field-name-body field-type-text-with-summary field-label-hidden"><div class="field-items"><div class="field-item even"><p><strong>Mission Specification &amp; Status</strong></p>
+
+<p>The Aquarius/SAC-D mission, launched on June 10, 2011,  is a joint venture between NASA and the Argentinean Space Agency (CONAE). The mission features the sea surface salinity sensor Aquarius and is the first mission with the primary goal of measuring sea surface salinity (SSS) from space. Data from Aquarius will play a large role in understanding both climate change and the global water cycle.</p>
+
+<p>On June 7, 2015 at 12:53:17 UTC the Aquarius/SAC-D observatory suffered a mission-ending hardware failure resulting in the permanent cessation of data flows.  The entire Aquarius data record spans a full 3 year, 9 month period from 8/25/2011 – 6/7/2015.  Version 4.0 of the Aquarius data is the Official NASA end-of-prime mission data for the Aquarius/SAC-D mission.  While no further forward processing of data is possible, a release of a V5.0 end-of-mission dataset is expected in future.<br /><img alt="The Aquarius/SAC-D satellite" height="202" src="//podaac.jpl.nasa.gov/sites/default/files/content/Aquarius_SAC-D.JPG" title="The Aquarius/SAC-D satellite" width="330" /><br />
+This instrument carries 3 radiometers, and 1 scatterometer.  They are operating at 1.4 GHz &amp; 1.2 GHz respectively.  The data collected by the radiometer are being used together with sea surface temperature collected from another platform(s), to derive salinity data.  This is corrected for surface roughness using data from the Aquarius scatterometer.</p>
+
+<p>The satellite will cross the equator at 6am and pm.  The Aquarius instrument will continuously point away from the sun to avoid glint.<br />
+ <br />
+PO.DAAC will be providing Level 2 SSS data as well as gridded Level 3 degree SSS products generated by the Aquarius Ground Segment at Goddard.  Level 3 products will be produced with temporal resolutions of daily, 8 day, monthly, 3 months, and annual.  Monthly and seasonal climatology products from Aqaurius are also available. The Aquarius instrument will provide global coverage every 7 days. The spatial resolution at Level 2 will be approximately 100km.  L3 products are gridded at 1 degree spatial resolution.<br />
+ </p>
+</div></div></div>
+  
+  
+</article><!-- /.node -->
+<!-- block__no_wrapper -->
+<div id="block-cck-blocks-field-block" class="block block-cck-blocks last even">
+
+  <div class="block-content">
+              
+    <div class="field field-name-field-block field-type-field-collection field-label-hidden"><div class="field-items"><div class="field-item even"><div class="entity entity-field-collection-item field-collection-item-field-block clearfix">
+  <div class="content">
+    <div class="field field-name-field-title field-type-text field-label-hidden"><div class="field-items"><div class="field-item even">News and Announcements</div></div></div><div class="field field-name-field-body field-type-text-with-summary field-label-hidden"><div class="field-items"><div class="field-item even"><p><strong>OFFICIAL NASA AQUARIUS/SAC-D VERSION 4.0 END-OF-PRIME-MISSION DATA SET RELEASED</strong></p>
+
+<p>July 17, 2015</p>
+
+<p>The PO.DAAC is pleased to announce the availability of the version 4.0 Aquarius/SAC-D data.  This is the official NASA/Aquarius Project end-of-prime-mission dataset spanning the complete 3 year, 9 nine month period of Aquarius science data availability, from August 25, 2011 through June 7, 2015 when an unrecoverable hardware failure caused the end of the mission. This end-of-prime mission dataset does not preclude future reprocessing.  An updated version (V5.0) is planned for release in 2016, and subsequent updates will be released when measurable improvements are achieved.</p>
+
+<p>Data sets comprising this release include the Level 2 orbital data and Level 3 mapped salinity, wind speed, and derived density products at 1 degree spatial resolution for ascending, descending and combined passes and for the following time intervals: daily, 7 day, monthly, seasonal, annual.  New products (added since V3.0), in addition to Density, include 7-day and 28-day running mean products plus seasonal and monthly climatology datasets. Included as part of v4.0 are also a complementary set of similarly gridded L3 ancillary SST products. A summary of improvements with this new version of the Aquarius data is available <a href="ftp://podaac-ftp.jpl.nasa.gov/allData/aquarius/docs/v4/Aquarius_V4.0-V3.0_SummaryOfChanges.pdf">here</a>. All users are advised to work with v4.0 over any previous versions.   </p>
+
+<p>The Aquarius v4.0 data sets are described and discoverable via the <a href="//podaac.jpl.nasa.gov/datasetlist?search=aquarius%20%252Bproject">PO.DAAC data portal</a>.   Access to these data is via PO.DAAC’s public FTP site:   <a href="ftp://podaac-ftp.jpl.nasa.gov/allData/aquarius/">ftp://podaac-ftp.jpl.nasa.gov/allData/aquarius/</a> .  The data are also accessible via a range of PO.DAAC tools and services: <a href="http://opendap.jpl.nasa.gov/opendap/SalinityDensity/aquarius/">OPeNDAP</a>, <a href="http://thredds.jpl.nasa.gov/thredds/podaac_catalogs/AQUARIUS_L3_SMI_V20_catalog.html">THREDDS</a>, <a href="//podaac.jpl.nasa.gov/aquarius/gallery">Aquarius Level 3 Browser</a>, <a href="//podaac.jpl.nasa.gov/las">LAS</a>, the <a href="http://podaac-tools.jpl.nasa.gov/hitide/">HiTIDE</a> L2 subsetter and associated <a href="//podaac.jpl.nasa.gov/ws/">Web Services</a>.</p>
+
+<p>The <em>Aquarius Data Users Guide</em>, <em>Aquarius Data Validation Document</em>, and other primary technical <a href="ftp://podaac-ftp.jpl.nasa.gov/allData/aquarius/docs">documentation</a> , are available from the FTP site together with <a href="ftp://podaac-ftp.jpl.nasa.gov/allData/aquarius/sw">reader software</a>.   General information regarding Aquarius/SAC-D mission is available from the <a href="http://aquarius.nasa.gov">mission website</a> and also via PO.DAAC’s <a href="//podaac.jpl.nasa.gov/aquarius">Aquarius</a>  and <a href="//podaac.jpl.nasa.gov/SeaSurfaceSalinity">salinity</a> webpages.</p>
+
+<p>Should you have any questions, please contact us at: <a href="mailto:podaac@podaac.jpl.nasa.gov">podaac@podaac.jpl.nasa.gov</a></p>
+
+<p> </p>
+
+<hr /><p><strong>NASA's Aquarius Sea Surface Salinity</strong></p>
+
+<p><iframe align="middle" frameborder="0" height="408" scrolling="no" src="http://svs.gsfc.nasa.gov/vis/a000000/a004200/a004234/aquarius_sss_3yrs_SpinningGlobe_north_1080p.mp4" width="720"></iframe></p>
+
+<p><em>This video provides a global tour of sea surface salinity using measurements taken by NASA's Aquarius instrument aboard the Aquarius/SAC-D spacecraft over the period September 2011 through September 2014. Red represents areas of high salinity, while blue represents areas of low salinity. Aquarius is a focused effort to measure sea surface salinity and providing the global view of salinity variability needed for climate studies. The mission has been a collaboration between NASA and the Space Agency of Argentina (Comisión Nacional de Actividades Espaciales).</em></p>
+
+<p><em>This video is public domain and can be downloaded at: </em><a href="http://svs.gsfc.nasa.gov/cgi-bin/details.cgi?aid=4234" target="_blank">http://svs.gsfc.nasa.gov/cgi-bin/details.cgi?aid=4234</a></p>
+
+<hr /><p>See all "<a href="http://aquarius.nasa.gov/news-status.html" style="line-height: 1.6em;" target="_blank">Official Aquarius/SAC-D</a>" Announcements</p>
+
+<p>See all <a href="//podaac.jpl.nasa.gov/announcements" style="line-height: 1.6em;" target="_blank">PO.DAAC Announcements</a></p>
+</div></div></div>  </div>
+</div>
+</div></div></div>  </div>
+
+</div><!-- /.block -->
+<!-- region__no_wrapper -->
+            </div><!-- /#content -->
+          </div>
+        </div>
+  
+          
+                  <aside class="sidebars">
+                      
+                        <div class="region-sidebar-second-container">
+                <section class="region region-sidebar-second column sidebar">
+    <div id="block-cck-blocks-field-right-sidebar-block" class="block block-cck-blocks first last odd">
+
+  <div class="block-content">
+              
+    <div class="field field-name-field-right-sidebar-block field-type-field-collection field-label-hidden"><div class="field-items"><div class="field-item even"><div class="entity entity-field-collection-item field-collection-item-field-right-sidebar-block clearfix">
+  <div class="content">
+    <div class="field field-name-field-title field-type-text field-label-hidden"><div class="field-items"><div class="field-item even">Data Links</div></div></div><div class="field field-name-field-body field-type-text-with-summary field-label-hidden"><div class="field-items"><div class="field-item even"><ul><li><a href="//podaac.jpl.nasa.gov/datasetlist?search=aquarius%20%252Bproject"><strong>Browse Datasets for Aquarius Project Data</strong> <strong>at PO.DAAC</strong></a></li>
+	<li><a href="ftp://podaac-ftp.jpl.nasa.gov/allData/aquarius/" target="_blank">PO.DAAC FTP Data Access</a></li>
+	<li><a href="http://nsidc.org/data/aquarius/index.html" target="_blank">Aquarius Soil Moisture Data at NSIDC</a></li>
+</ul></div></div></div>  </div>
+</div>
+</div><div class="field-item odd"><div class="entity entity-field-collection-item field-collection-item-field-right-sidebar-block clearfix">
+  <div class="content">
+    <div class="field field-name-field-title field-type-text field-label-hidden"><div class="field-items"><div class="field-item even">PO.DAAC Tools and Services</div></div></div><div class="field field-name-field-body field-type-text-with-summary field-label-hidden"><div class="field-items"><div class="field-item even"><ul><li><a href="ftp://podaac-ftp.jpl.nasa.gov/allData/aquarius/" target="_blank"><strong>FTP</strong></a></li>
+	<li><strong><a href="http://podaac-opendap.jpl.nasa.gov/opendap/allData/aquarius/" target="_blank">OPeNDAP</a></strong></li>
+	<li><strong>THREDDS:  </strong><a href="http://thredds.jpl.nasa.gov/thredds/catalog/ncml_aggregation/SalinityDensity/aquarius/catalog.html" target="_blank">Salinity/Density</a>, <a href="http://thredds.jpl.nasa.gov/thredds/catalog/ncml_aggregation/OceanWinds/aquarius/catalog.html" target="_blank">Ocean Winds</a></li>
+	<li><a href="//podaac.jpl.nasa.gov/ws/" target="_blank"><strong>PODAAC-WS</strong></a></li>
+	<li><strong><a href="//podaac.jpl.nasa.gov/aquarius/gallery">Aquarius Level 3 Image Browser</a></strong></li>
+	<li><a href="//podaac.jpl.nasa.gov/las" target="_blank"><strong>LAS</strong></a></li>
+	<li><a href="http://podaac-tools.jpl.nasa.gov/hitide/" target="_blank"><strong>HiTIDE</strong></a></li>
+</ul></div></div></div>  </div>
+</div>
+</div><div class="field-item even"><div class="entity entity-field-collection-item field-collection-item-field-right-sidebar-block clearfix">
+  <div class="content">
+    <div class="field field-name-field-title field-type-text field-label-hidden"><div class="field-items"><div class="field-item even">Technical Documentation</div></div></div><div class="field field-name-field-body field-type-text-with-summary field-label-hidden"><div class="field-items"><div class="field-item even"><ul><li><a href="ftp://podaac-ftp.jpl.nasa.gov/allData/aquarius/docs/v4/AQ-010-UG-0008_AquariusUserGuide_DatasetV4.0.pdf" target="_blank">Aquarius V4.0 Data Users Guide</a> (.pdf)</li>
+	<li><a href="ftp://podaac-ftp.jpl.nasa.gov/allData/aquarius/docs/v4/Aquarius_V4.0-V3.0_SummaryOfChanges.pdf" target="_blank">Aquarius V4.0 Summary of Improvements</a> (.pdf)   </li>
+	<li><a href="ftp://podaac-ftp.jpl.nasa.gov/allData/aquarius/docs/v4/AQ-014-PS-0007_AquariusLevel1Aspecification_DatasetVersion3.0.pdf" target="_blank">L1A Data Specification</a> (.pdf)</li>
+	<li><a href="ftp://podaac-ftp.jpl.nasa.gov/allData/aquarius/docs/v4/AQ-014-PS-0018_AquariusLevel2specification_DatasetVersion4.0.pdf" target="_blank">L2 V4.0 Data format description </a> (.pdf)</li>
+	<li><a href="ftp://podaac-ftp.jpl.nasa.gov/allData/aquarius/docs/v4/AQ-014-PS-0018_Ocean_Level-3_Standard_Mapped_Image_Products.pdf" target="_blank">L3 V4.0 Data format description</a>  (.pdf)    </li>
+	<li><a href="ftp://podaac-ftp.jpl.nasa.gov/allData/aquarius/docs/v4/AQ-014-PS-0016_AquariusSalinityDataValidationAnalysis_DatasetVersion4.0and3.0.pdf" target="_blank">Aquarius V4.0 and V3.0 Salinity data validation analysis</a> (.pdf)</li>
+	<li><a href="ftp://podaac-ftp.jpl.nasa.gov/allData/aquarius/docs/v2/AQ-014-PS-0016_AquariusSalinityDataValidationAnalysis_DatasetVersion2.0.pdf" target="_blank">Aquarius V2.0 Salinity data validation analysis</a> (.pdf)</li>
+	<li><a href="ftp://podaac-ftp.jpl.nasa.gov/allData/aquarius/docs/v3/AQ-014-PS-0006_ProposalForFlags&amp;Masks_DatasetVersion3.0.pdf" target="_blank">Aquarius V3.0 Flags &amp; Masks</a> (.pdf)</li>
+	<li><a href="ftp://podaac-ftp.jpl.nasa.gov/allData/aquarius/docs/v2/AQ-014-PS-0017_AquariusATBD_Level2.pdf" target="_blank">L2-ATBD  (Algorithm Theoretical Basis Document)</a> (.pdf)</li>
+	<li><a href="ftp://podaac-ftp.jpl.nasa.gov/allData/aquarius/docs/v2/AQ-014-PS-0017_AquariusATBD_Level2_Addendum1.pdf" target="_blank">L2-ATBD Addendum 1</a>  (.pdf)</li>
+	<li><a href="ftp://podaac-ftp.jpl.nasa.gov/allData/aquarius/docs/v2/AQ-014-PS-0017_AquariusATBD_Level2_Addendum2_27Feb2013.pdf" target="_blank">L2-ATBD Addendum  2</a>  (.pdf) </li>
+	<li><a href="ftp://podaac-ftp.jpl.nasa.gov/allData/aquarius/docs/v3/AQ-014-PS-0017_AquariusATBD_Level2_Addendum3_DatasetVersion3.0.pdf" target="_blank">L2-ATBD Addendum  3  - includes V3.0 algorithm, APC &amp; calibration updates</a> (.pdf)</li>
+	<li><a href="ftp://podaac-ftp.jpl.nasa.gov/allData/aquarius/docs/v4/AQ-014-PS-0017_AquariusATBD_Level2_Addendum4_DatasetVersion4.0.pdf" target="_blank"><span>L2-ATBD</span> Addendum  4  - includes V4.0 algorithm updates</a> (.pdf)</li>
+	<li><a href="ftp://podaac-ftp.jpl.nasa.gov/allData/aquarius/docs/v4/AQ-014-PS-0017_AquariusATBD_uncertainties_Addendum5_DatasetVersion4.0.pdf">Aquarius Salinity Uncertainty estimation for V4.0</a> (.pdf)</li>
+	<li><a href="ftp://podaac-ftp.jpl.nasa.gov/allData/aquarius/docs/v4/AQ-014-PS-0017_AquariusATBD_ComputationOfAquariusSeaSurfaceDensity_DatasetVersion4.0.pdf">Aquarius-Derived Sea Surface Density estimation for V4.0</a> (.pdf)</li>
+	<li><a href="ftp://podaac-ftp.jpl.nasa.gov/allData/aquarius/docs/v3/AQ-014-PS-0017_Aquarius_AntennaPatternCoefficientUpdates_DatasetVersion3.0.pdf" target="_blank">APC update memo for V3.0</a></li>
+	<li><a href="ftp://podaac-ftp.jpl.nasa.gov/allData/aquarius/docs/v2/AQ-014-PS-0019_AquariusATBD_scatterometer.pdf" target="_blank">Radar ATBD  and Radar RFI algorithm</a> (.pdf)</li>
+	<li><a href="ftp://podaac-ftp.jpl.nasa.gov/allData/aquarius/docs/v2/AQ-014-PS-0015_AquariusInstrumentCalibratrionDescriptionDocument.pdf" target="_blank">Radiometer calibration methodology with RFI algorithm description</a> (.pdf)</li>
+	<li><a href="ftp://podaac-ftp.jpl.nasa.gov/allData/aquarius/docs/v2/AQ-014-PS-0020_AquariusPointingAnalysisRSS-TR-021313.pdf" target="_blank">Pointing correction analysis</a> (.pdf)</li>
+	<li><a href="ftp://podaac-ftp.jpl.nasa.gov/allData/aquarius/docs/v4/AQ-014-PS-0017_AquariusScatterometerCalibrationReview.pdf">Radar calibration report</a> (.pdf)</li>
+	<li><a href="ftp://podaac-ftp.jpl.nasa.gov/allData/aquarius/docs/v2/AquariusLevel3_GriddingSmoothingPaper_Lilly&amp;Lagerloef2008.pdf" target="_blank">L3 smoothing algorithm description </a>(.pdf)</li>
+</ul><p><strong>Note:</strong> All Aquarius technical documents, including for prior versions, are available <a href="ftp://podaac-ftp.jpl.nasa.gov/allData/aquarius/docs/" target="_blank">here</a>  </p>
+</div></div></div>  </div>
+</div>
+</div><div class="field-item odd"><div class="entity entity-field-collection-item field-collection-item-field-right-sidebar-block clearfix">
+  <div class="content">
+    <div class="field field-name-field-title field-type-text field-label-hidden"><div class="field-items"><div class="field-item even">Known Issues</div></div></div><div class="field field-name-field-body field-type-text-with-summary field-label-hidden"><div class="field-items"><div class="field-item even"><ul><li><strong><a href="ftp://podaac-ftp.jpl.nasa.gov/allData/aquarius/README.KnownIssues.txt" target="_blank">Aquarius Data Issues</a></strong></li>
+</ul></div></div></div>  </div>
+</div>
+</div><div class="field-item even"><div class="entity entity-field-collection-item field-collection-item-field-right-sidebar-block clearfix">
+  <div class="content">
+    <div class="field field-name-field-title field-type-text field-label-hidden"><div class="field-items"><div class="field-item even">Educational</div></div></div><div class="field field-name-field-body field-type-text-with-summary field-label-hidden"><div class="field-items"><div class="field-item even"><p><strong>"ADVICE": A four-part webinar series on Aquarius Data  hosted by NASA/EOSDIS</strong></p>
+
+<ul><li><a class="ext" href="https://wiki.earthdata.nasa.gov/display/advice/ADVICE+Training+Home" target="_blank">Webinar Information &amp; Training Materials</a></li>
+	<li><a class="ext" href="https://youtu.be/q1tTjZB-jD8" target="_blank">NASA Aquarius Data Training Session #1</a></li>
+	<li><a class="ext" href="https://youtu.be/GVKni0xF-N4" target="_blank">NASA Aquarius Data Training Session #2</a></li>
+	<li><a class="ext" href="https://youtu.be/IAKvkeX-skA" target="_blank">NASA Aquarius Data Training Session #3</a></li>
+	<li><a class="ext" href="https://youtu.be/elEH-12bRVU" target="_blank">NASA Aquarius Data Training Session #4</a>  </li>
+</ul><p><strong>Video Tutorials on PO.DAAC Aquarius Data Services</strong></p>
+
+<ul><li><a href="//podaac.jpl.nasa.gov/forum/viewtopic.php?f=20&amp;t=375">Discovering Aquarius Data</a> (<a href="//podaac.jpl.nasa.gov/forum/viewtopic.php?f=20&amp;t=375">Video</a> , <a href="ftp://podaac.jpl.nasa.gov/misc/web/tutorials/aquarius-tutorials/advice_data%20discover.PDF">pdf</a>)</li>
+	<li><a href="//podaac.jpl.nasa.gov/forum/viewtopic.php?f=20&amp;t=376">Accessing Aquarius Data</a>  (<a href="//podaac.jpl.nasa.gov/forum/viewtopic.php?f=20&amp;t=376">Video</a> , <a href="ftp://podaac.jpl.nasa.gov/misc/web/tutorials/aquarius-tutorials/advice_data%20access.PDF">pdf</a>)</li>
+	<li><a href="//podaac.jpl.nasa.gov/forum/viewtopic.php?f=20&amp;t=377">Visualizing &amp; Subsetting</a>   (<a href="//podaac.jpl.nasa.gov/forum/viewtopic.php?f=20&amp;t=377">Video </a>, <a href="ftp://podaac.jpl.nasa.gov/misc/web/tutorials/aquarius-tutorials/LAS%20Quick_Start_Tutorial.pdf">pdf</a>)</li>
+</ul></div></div></div>  </div>
+</div>
+</div><div class="field-item odd"><div class="entity entity-field-collection-item field-collection-item-field-right-sidebar-block clearfix">
+  <div class="content">
+    <div class="field field-name-field-title field-type-text field-label-hidden"><div class="field-items"><div class="field-item even">Related Links</div></div></div><div class="field field-name-field-body field-type-text-with-summary field-label-hidden"><div class="field-items"><div class="field-item even"><ul><li><a href="https://wiki.earthdata.nasa.gov/display/advice/ADVICE+Training+Home" target="_blank">Aquarius/SAC-D Mission Website</a></li>
+	<li><a href="http://aquarius.nasa.gov/data.html" target="_blank">Aquarius Data Access &amp; Telemetry Monitoring Resources</a></li>
+	<li><a href="http://oceancolor.gsfc.nasa.gov/WIKI/AQ(2f)GS.html" target="_blank">Aquarius Ground System WIKI</a></li>
+	<li><a href="http://oceancolor.gsfc.nasa.gov/sdpscgi/public/aquarius_report.cgi" target="_blank">Aquarius Mission Event Log Listings</a></li>
+	<li><a href="http://oceancolor.gsfc.nasa.gov/WIKI/AQ%282f%29GS%282f%29SW%282f%29UpdatesPostLaunch.html" target="_blank">Major Events and Software Change Log</a></li>
+	<li><a href="//podaac.jpl.nasa.gov/SeaSurfaceSalinity">Learn about Sea Surface Salinity</a></li>
+	<li><a href="http://www.jpl.nasa.gov/news/press_kits/aquariusLaunch.pdf" target="_blank">Aquarius/SAC-D Press Kit</a> (.pdf)</li>
+	<li><a href="http://www.jpl.nasa.gov/news/press_kits/Aquarius_SAC-DScienceWritersGuide.pdf" target="_blank">Aquarius Science Writers' Guide </a>(.pdf)</li>
+	<li><a href="http://aquarius.jpl.nasa.gov/AQUARIUS/index.jsp" target="_blank">Jet Propulsion Laboratory Aquarius/SAC-D Mission Education Page</a></li>
+	<li><a href="http://www.esr.org/aquarius_sat/aquarius_main.html" target="_blank">Earth &amp; Space Research Aquarius SAC-D Mission Page</a></li>
+	<li><a href="http://www.conae.gov.ar/" target="_blank">Comision Nacional de Actividades </a><a href="http://www.conae.gov.ar/" target="_blank">Espaciale</a><a href="http://www.conae.gov.ar/" target="_blank">s</a><a href="http://www.conae.gov.ar/" target="_blank"> (</a><a href="http://www.conae.gov.ar/" target="_blank">Argentina's Space Agency)</a></li>
+</ul></div></div></div>  </div>
+</div>
+</div><div class="field-item even"><div class="entity entity-field-collection-item field-collection-item-field-right-sidebar-block clearfix">
+  <div class="content">
+    <div class="field field-name-field-body field-type-text-with-summary field-label-hidden"><div class="field-items"><div class="field-item even"><div><img alt="Aquarius/SAC-D" src="//podaac.jpl.nasa.gov/sites/default/files/content/seawater.jpg" /></div>
+
+<p>Aquarius Mission Launch:<br /><a href="http://www.ustream.tv/recorded/15286484" style="line-height: 1.6em;">http://www.ustream.tv/recorded/15286484</a></p>
+
+<p>Aquarius Post-Launch News Conference:<br /><a href="http://www.ustream.tv/recorded/15288695">http://www.ustream.tv/recorded/15288695</a></p>
+
+<p>Contact:<br /><a href="mailto:Salinity@podaac.jpl.nasa.gov?subject=Aquarius%20Mission">Salinity@podaac.jpl.nasa.gov</a></p>
+</div></div></div>  </div>
+</div>
+</div><div class="field-item odd"><div class="entity entity-field-collection-item field-collection-item-field-right-sidebar-block clearfix">
+  <div class="content">
+    <div class="field field-name-field-title field-type-text field-label-hidden"><div class="field-items"><div class="field-item even">Aquarius Requirements</div></div></div><div class="field field-name-field-body field-type-text-with-summary field-label-hidden"><div class="field-items"><div class="field-item even"><ul><li>390 km swath</li>
+	<li>8 samples per month near equator</li>
+	<li>0.2 psu (0.0002) accuracy after temporal averaging over a month</li>
+	<li>Temporal Resolution 1 month</li>
+	<li>Spatial Resolution 100 km</li>
+</ul></div></div></div>  </div>
+</div>
+</div><div class="field-item even"><div class="entity entity-field-collection-item field-collection-item-field-right-sidebar-block clearfix">
+  <div class="content">
+    <div class="field field-name-field-title field-type-text field-label-hidden"><div class="field-items"><div class="field-item even">Instrument Specifications</div></div></div><div class="field field-name-field-body field-type-text-with-summary field-label-hidden"><div class="field-items"><div class="field-item even"><p><strong><u>Radiometer &amp; Scatterometer</u></strong><br />
+Footprint size - 62x68, 68x62, 75x100<br />
+Footprint size - 76x94, 84x120, 96x156 <br />
+Orbit 657 km Sun Synchronous</p>
+
+<p>    •<strong> <u>Radiometer</u></strong></p>
+
+<p class="rteindent1">Frequency n = 1.413 GHz +/- 12.5 MHz<br />
+Wavelength l = c/n ~ 0.212 m<br />
+L band = 0.39 - 1.55 GHz</p>
+
+<p>    • <strong><u>Scatterometer</u></strong></p>
+
+<p class="rteindent1">Frequency n = 1.26 GHz<br />
+Wavelength l = c/n ~ 0.238 m</p>
+</div></div></div>  </div>
+</div>
+</div></div></div>  </div>
+
+</div><!-- /.block -->
+  </section><!-- region__sidebar -->
+            </div>
+                      </aside><!-- /.sidebars -->
+              </div>
+
+      <div class="clearfix"></div>
+            <div class="clearfix"></div>
+        
+    </div><!-- /#main -->
+  
+  </div><!-- /#main-container -->
+
+</div><!-- /#page -->
+
+  <footer id="bottom" class="region region-bottom">
+    <div id="block-menu-block-2" class="block block-menu-block first odd" role="navigation">
+
+  <div class="block-content">
+              
+    <div class="menu-block-wrapper menu-block-2 menu-name-menu-footer-menu parent-mlid-0 menu-level-1">
+  <ul class="menu"><li class="first leaf menu-mlid-3805"><a href="/rss.xml" title="">RSS Feed</a></li>
+<li class="leaf menu-mlid-1996"><a href="http://www.jpl.nasa.gov/copyrights.cfm" title="">Privacy</a></li>
+<li class="leaf menu-mlid-1997"><a href="/CitingPODAAC" title="">Data Citation</a></li>
+<li class="leaf menu-mlid-1998"><a href="/Glossary" title="">Glossary</a></li>
+<li class="leaf menu-mlid-1999"><a href="/AboutPodaac" title="">About PO.DAAC</a></li>
+<li class="last leaf menu-mlid-2000"><a href="mailto:podaac@podaac.jpl.nasa.gov" title="">Contact</a></li>
+</ul></div>
+  </div>
+
+</div><!-- /.block -->
+<div id="block-simplenews-5" class="block block-simplenews last even">
+
+  <div class="block-content">
+              <h2 class="block-title">PO.DAAC Mailing List</h2>
+          
+    <a name="subscription"></a> 
+<span class="subscription-close"></span>
+  
+
+  <p>Upon successful submission, you will receive a confirmation e-mail in your inbox.</p>
+
+  <form class="simplenews-subscribe" action="/aquarius" method="post" id="simplenews-block-form-5" accept-charset="UTF-8"><div><div class="form-item form-type-textfield form-item-mail">
+  <label for="edit-mail">E-mail <span class="form-required" title="This field is required.">*</span></label>
+ <input type="text" id="edit-mail" name="mail" value="" size="20" maxlength="128" class="form-text required" />
+</div>
+<div class="captcha"><input type="hidden" name="captcha_sid" value="238886364" />
+<input type="hidden" name="captcha_token" value="f92de936550327d21572490a0f361096" />
+<img src="/image_captcha?sid=238886364&amp;ts=1487822471" width="180" height="60" alt="Image CAPTCHA" title="Image CAPTCHA" /><div class="form-item form-type-textfield form-item-captcha-response">
+  <label for="edit-captcha-response">What code is in the image? <span class="form-required" title="This field is required.">*</span></label>
+ <input type="text" id="edit-captcha-response" name="captcha_response" value="" size="15" maxlength="128" class="form-text required" />
+<div class="description">Enter the characters shown in the image.</div>
+</div>
+</div><input type="submit" id="edit-submit--2" name="op" value="Subscribe" class="form-submit" /><input type="hidden" name="form_build_id" value="form-feWDyR_g2l-d9ZABPMc9KKi67IUR4Crf0c1GK0Boxxk" />
+<input type="hidden" name="form_id" value="simplenews_block_form_5" />
+</div></form>  </div>
+
+</div><!-- /.block -->
+  	<div id="subscription-link-container">
+  		Get PO.DAAC Updates by Email <a id="subscription-link" href="#">Subscribe</a>
+  	</div>
+		<div id="podaac-jpl-clearance">
+			<span class="podaac-jpl-clearance-label">Clearance Number:</span> CL05-0770
+		</div>
+  </footer><!-- region__bottom -->
+
+	<!-- NetTracker Page Tag -->
+	<!-- Copyright 2004 Sane Solutions, LLC.	All rights reserved. -->
+		<script src="/sites/all/themes/podaac/net_tracker_files/ntpagetag.js" type="text/javascript"></script>
+	<noscript>
+		<img src="http://ws1.ems.eosdis.nasa.gov/images/ntpagetag.gif?js=0" class="element-invisible" />
+	</noscript>
+
+</body>
+</html>


[14/15] any23 git commit: ANY23-304 Add extractor for OpenIE

Posted by le...@apache.org.
ANY23-304 Add extractor for OpenIE


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/ef146144
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/ef146144
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/ef146144

Branch: refs/heads/master
Commit: ef14614473f608d275eecd4c10b3ab2e50391167
Parents: b39d220
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Wed Aug 23 12:15:56 2017 -0700
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Wed Aug 23 12:15:56 2017 -0700

----------------------------------------------------------------------
 cli/pom.xml                                     |  5 +++--
 .../java/org/apache/any23/plugin/PluginIT.java  | 21 ++++++++++++++------
 2 files changed, 18 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/ef146144/cli/pom.xml
----------------------------------------------------------------------
diff --git a/cli/pom.xml b/cli/pom.xml
index 79e8cab..8700f92 100644
--- a/cli/pom.xml
+++ b/cli/pom.xml
@@ -70,12 +70,13 @@
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>
-    <dependency>
+    <!-- Disabled due to memory overhead during test execution -->
+    <!--dependency>
       <groupId>${project.groupId}</groupId>
       <artifactId>apache-any23-openie</artifactId>
       <version>${project.version}</version>
       <scope>runtime</scope>
-    </dependency>
+    </dependency-->
     <dependency>
       <groupId>${project.groupId}.plugins</groupId>
       <artifactId>apache-any23-office-scraper</artifactId>

http://git-wip-us.apache.org/repos/asf/any23/blob/ef146144/plugins/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java
----------------------------------------------------------------------
diff --git a/plugins/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java b/plugins/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java
index 1abeb2b..1b69463 100644
--- a/plugins/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java
+++ b/plugins/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java
@@ -41,7 +41,9 @@ import static org.junit.Assert.assertTrue;
  */
 public class PluginIT {
 
-    private static final int NUM_OF_EXTRACTORS = 34;
+    private static final int NUM_OF_EXTRACTORS_INCL_OPENIE = 34;
+    
+    private static final int NUM_OF_EXTRACTORS_EXCL_OPENIE = 33;
 
     private static final String PLUGIN_DIR = "target/plugins-build/";
 
@@ -79,12 +81,19 @@ public class PluginIT {
                 new ExtractorRegistryImpl(),
                 HTML_SCRAPER_TARGET_DIR,  // Required to satisfy class dependencies.
                 HTML_SCRAPER_DEPENDENCY_DIR,
-                OFFICE_SCRAPER_TARGET_DIR
-, OFFICE_SCRAPER_DEPENDENCY_DIR // Required to satisfy class dependencies.
-        );
-        assertEquals("Did not find the number of expected extractors", NUM_OF_EXTRACTORS ,        // HTMLScraper Plugin, OfficeScraper Plugin.
-                extractorGroup.getNumOfExtractors()
+                OFFICE_SCRAPER_TARGET_DIR,
+                OFFICE_SCRAPER_DEPENDENCY_DIR // Required to satisfy class dependencies.
         );
+        try {
+          Class.forName("org.apache.any23.extractor.openie.OpenIEExtractor", false, this.getClass().getClassLoader());
+          assertEquals("Did not find the number of expected extractors", NUM_OF_EXTRACTORS_INCL_OPENIE ,
+                  extractorGroup.getNumOfExtractors()
+          );
+        } catch (ClassNotFoundException e) {
+          assertEquals("Did not find the number of expected extractors", NUM_OF_EXTRACTORS_EXCL_OPENIE ,
+                  extractorGroup.getNumOfExtractors()
+          );
+        }
     }
 
     /**


[10/15] any23 git commit: Resolve all documentation conflicts

Posted by le...@apache.org.
Resolve all documentation conflicts


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/a2d07fc8
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/a2d07fc8
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/a2d07fc8

Branch: refs/heads/master
Commit: a2d07fc857623561b808e96d00378ffd748cb47c
Parents: 1b0c5ff d67bdff
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Wed Mar 1 18:03:09 2017 -0800
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Wed Mar 1 18:03:09 2017 -0800

----------------------------------------------------------------------
 src/site/apt/build-src.apt           |  22 +--
 src/site/apt/configuration.apt       |   6 +-
 src/site/apt/dev-data-conversion.apt |   2 +-
 src/site/apt/getting-started.apt     | 214 +++++++++++++++++-------------
 src/site/apt/index.apt               |   9 +-
 src/site/apt/install.apt             |   2 +-
 src/site/apt/supported-formats.apt   |   2 +
 src/site/xdoc/download.xml.vm        | 130 +++++++++---------
 8 files changed, 213 insertions(+), 174 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/a2d07fc8/src/site/apt/configuration.apt
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/any23/blob/a2d07fc8/src/site/apt/dev-data-conversion.apt
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/any23/blob/a2d07fc8/src/site/apt/getting-started.apt
----------------------------------------------------------------------


[11/15] any23 git commit: ANY23-304 merge with master branch

Posted by le...@apache.org.
ANY23-304 merge with master branch


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/d4008bc8
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/d4008bc8
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/d4008bc8

Branch: refs/heads/master
Commit: d4008bc800a4ae9e2cfdcda4fec8c519fbee8ca2
Parents: a2d07fc 72035bf
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Wed Jul 26 14:10:34 2017 -0700
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Wed Jul 26 14:10:34 2017 -0700

----------------------------------------------------------------------
 README.md                                       |  34 ++--
 .../main/java/org/apache/any23/vocab/CSV.java   |   2 +-
 .../java/org/apache/any23/vocab/SINDICE.java    |   1 -
 .../main/java/org/apache/any23/cli/Rover.java   |   4 +-
 .../org/apache/any23/cli/SimpleRoverTest.java   | 126 +++++++++++++++
 .../org/apache/any23/cli/YAMLRoverTest.java     |  79 +++++++++
 cli/src/test/resources/log4j.properties         |  26 +++
 core/pom.xml                                    |  12 +-
 .../extractor/SingleDocumentExtraction.java     |   3 +-
 .../rdf/FunctionalSyntaxExtractor.java          |  53 ++++++
 .../rdf/FunctionalSyntaxExtractorFactory.java   |  59 +++++++
 .../rdf/ManchesterSyntaxExtractor.java          |  53 ++++++
 .../rdf/ManchesterSyntaxExtractorFactory.java   |  59 +++++++
 .../any23/extractor/rdf/RDFParserFactory.java   |  41 +++++
 .../any23/extractor/yaml/YAMLExtractor.java     |   3 +-
 .../any23/writer/CompositeTripleHandler.java    |   4 +-
 .../any23/writer/LoggingTripleHandler.java      |  31 ++--
 .../org.apache.any23.extractor.ExtractorFactory |   2 +
 .../extractor/rdf/example-functionalsyntax.ofn  |   5 +
 .../extractor/rdf/example-manchestersyntax.omn  |   5 +
 .../apache/any23/prefixes/prefixes.properties   |  21 +--
 .../rdf/FunctionalSyntaxExtractorTest.java      |  80 +++++++++
 .../rdf/ManchesterSyntaxExtractorTest.java      |  80 +++++++++
 .../extractor/yaml/YAMLTikaParserTest.java      |  72 +++++++++
 .../apache/any23/mime/TikaMIMETypeDetector.java |   3 +-
 .../main/java/org/apache/any23/vocab/Excel.java |   2 +-
 pom.xml                                         | 162 ++++++++++++++-----
 service/pom.xml                                 |   5 +-
 .../org/apache/any23/servlet/ServletTest.java   |   2 +-
 src/site/xdoc/download.xml.vm                   |  14 +-
 .../owl-functional/example-functionalsyntax.ofn |   5 +
 .../owl-manchester/example-manchestersyntax.omn |   5 +
 32 files changed, 951 insertions(+), 102 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/d4008bc8/cli/src/main/java/org/apache/any23/cli/Rover.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/any23/blob/d4008bc8/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/any23/blob/d4008bc8/core/src/main/java/org/apache/any23/extractor/yaml/YAMLExtractor.java
----------------------------------------------------------------------
diff --cc core/src/main/java/org/apache/any23/extractor/yaml/YAMLExtractor.java
index 19bccd1,5c73082..1e968c0
--- a/core/src/main/java/org/apache/any23/extractor/yaml/YAMLExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/yaml/YAMLExtractor.java
@@@ -60,10 -61,10 +60,11 @@@ public class YAMLExtractor implements E
      public void run(ExtractionParameters extractionParameters, ExtractionContext context, InputStream in,
              ExtractionResult out)
              throws IOException, ExtractionException {
 -        IRI documentURI = context.getDocumentIRI();
 -        documentRoot = makeUri("root", documentURI, false);
+ 
 -        log.debug("process: {}", documentURI.toString());
 +        IRI documentIRI = context.getDocumentIRI();
 +        documentRoot = RDFUtils.iri(documentIRI.toString() + "root");
 +
 +        log.debug("Processing: {}", documentIRI.toString());
          out.writeNamespace(vocab.PREFIX, vocab.NS);
          out.writeNamespace(RDF.PREFIX, RDF.NAMESPACE);
          out.writeNamespace(RDFS.PREFIX, RDFS.NAMESPACE);

http://git-wip-us.apache.org/repos/asf/any23/blob/d4008bc8/pom.xml
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/any23/blob/d4008bc8/service/pom.xml
----------------------------------------------------------------------


[05/15] any23 git commit: Create consistent package naming

Posted by le...@apache.org.
Create consistent package naming


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/01abf8f7
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/01abf8f7
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/01abf8f7

Branch: refs/heads/master
Commit: 01abf8f7e7afe8b5f540f1d688c68b8313c405cb
Parents: 0910104
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Thu Feb 23 21:44:31 2017 -0800
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Thu Feb 23 21:44:31 2017 -0800

----------------------------------------------------------------------
 .../any23/extractor/openie/OpenIEExtractor.java | 129 +++++++++++++++++++
 .../openie/OpenIEExtractorFactory.java          |  52 ++++++++
 .../apache/any23/openie/OpenIEExtractor.java    | 129 -------------------
 .../any23/openie/OpenIEExtractorFactory.java    |  52 --------
 4 files changed, 181 insertions(+), 181 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/01abf8f7/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractor.java
----------------------------------------------------------------------
diff --git a/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractor.java b/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractor.java
new file mode 100644
index 0000000..b8fda29
--- /dev/null
+++ b/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractor.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.any23.openie;
+
+import java.io.IOException;
+import java.util.List;
+
+import javax.xml.transform.TransformerConfigurationException;
+import javax.xml.transform.TransformerFactoryConfigurationError;
+
+import org.apache.any23.extractor.Extractor;
+import org.apache.any23.configuration.Configuration;
+import org.apache.any23.configuration.DefaultConfiguration;
+import org.apache.any23.extractor.ExtractionContext;
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.rdf.RDFUtils;
+import org.apache.any23.util.StreamUtils;
+import org.apache.tika.Tika;
+import org.apache.tika.exception.TikaException;
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.Resource;
+import org.eclipse.rdf4j.model.Value;
+import org.eclipse.rdf4j.model.vocabulary.RDF;
+import org.eclipse.rdf4j.model.vocabulary.RDFS;
+import org.apache.any23.extractor.ExtractionException;
+import org.apache.any23.extractor.ExtractionParameters;
+import org.apache.any23.extractor.ExtractionResult;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.w3c.dom.Document;
+
+import edu.knowitall.openie.Argument;
+import edu.knowitall.openie.Instance;
+import edu.knowitall.openie.OpenIE;
+import edu.knowitall.tool.parse.ClearParser;
+import edu.knowitall.tool.postag.ClearPostagger;
+import edu.knowitall.tool.srl.ClearSrl;
+import edu.knowitall.tool.tokenize.ClearTokenizer;
+import scala.collection.JavaConversions;
+import scala.collection.Seq;
+
+/**
+ * An <a href="https://github.com/allenai/openie-standalone">OpenIE</a> 
+ * extractor able to generate <i>RDF</i> statements from 
+ * sentences representing relations in the text.
+ */
+public class OpenIEExtractor implements Extractor.TagSoupDOMExtractor {
+
+    private static final Logger LOG = LoggerFactory.getLogger(OpenIEExtractor.class);
+
+    private IRI documentRoot;
+
+    /**
+     * default constructor
+     */
+    OpenIEExtractor() {
+        // default constructor
+    }
+
+    /**
+     * @see org.apache.any23.extractor.Extractor#getDescription()
+     */
+    @Override
+    public ExtractorDescription getDescription() {
+        return OpenIEExtractorFactory.getDescriptionInstance();
+    }
+
+    @Override
+    public void run(ExtractionParameters extractionParameters,
+            ExtractionContext context, Document in, ExtractionResult out)
+                    throws IOException, ExtractionException {
+
+        IRI documentIRI = context.getDocumentIRI();
+        documentRoot = RDFUtils.iri(documentIRI.toString() + "root");
+        out.writeNamespace(RDF.PREFIX, RDF.NAMESPACE);
+        out.writeNamespace(RDFS.PREFIX, RDFS.NAMESPACE);
+        LOG.debug("Processing: {}", documentIRI.toString());
+
+        OpenIE openIE = new OpenIE(
+                new ClearParser(
+                        new ClearPostagger(
+                                new ClearTokenizer())), new ClearSrl(), false, false);
+
+        Seq<Instance> extractions = null;
+        Tika tika = new Tika();
+        try {
+            extractions = openIE.extract(tika.parseToString(StreamUtils.documentToInputStream(in)));
+        } catch (TransformerConfigurationException | TransformerFactoryConfigurationError e) {
+            LOG.error("Encountered error during OpenIE extraction.", e);
+        } catch (TikaException e) {
+            LOG.error("Encountered error whilst parsing InputStream with Tika.", e);
+        }
+
+        List<Instance> listExtractions = JavaConversions.seqAsJavaList(extractions);
+        // for each extraction instance we can obtain a number of extraction elements
+        // instance.confidence() - a confidence value for the extraction itself
+        // instance.extr().context() - an optional representation of the context for this extraction
+        // instance.extr().arg1().text() - subject
+        // instance.extr().rel().text() - predicate
+        // instance.extr().arg2s().text() - object
+        for(Instance instance : listExtractions) {
+            final Configuration immutableConf = DefaultConfiguration.singleton();
+            if (instance.confidence() > Double.parseDouble(immutableConf.getProperty("any23.extraction.openie.confidence.threshold", "0.5"))) {
+                List<Argument> listArg2s = JavaConversions.seqAsJavaList(instance.extr().arg2s());
+                for(Argument argument : listArg2s) {
+                    Resource subject = RDFUtils.makeIRI(instance.extr().arg1().text(), documentIRI);
+                    IRI predicate = (IRI) RDFUtils.makeIRI(instance.extr().rel().text(), documentIRI);
+                    Value object = RDFUtils.toValue(argument.text());
+                    out.writeTriple(subject, predicate, object);
+                }
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/01abf8f7/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractorFactory.java
----------------------------------------------------------------------
diff --git a/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractorFactory.java b/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractorFactory.java
new file mode 100644
index 0000000..4a1696a
--- /dev/null
+++ b/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractorFactory.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.any23.openie;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.Prefixes;
+
+/**
+ * @author lewismc
+ *
+ */
+public class OpenIEExtractorFactory extends SimpleExtractorFactory<OpenIEExtractor>
+    implements ExtractorFactory<OpenIEExtractor> {
+
+    public static final String NAME = "openie";
+
+    public static final Prefixes prefixes = null;
+
+    private static final ExtractorDescription descriptionInstance = new OpenIEExtractorFactory();
+
+    public OpenIEExtractorFactory() {
+        super(NAME, prefixes, Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"), "example-openie.html");
+    }
+
+    @Override
+    public OpenIEExtractor createExtractor() {
+        return new OpenIEExtractor();
+    }
+
+    public static ExtractorDescription getDescriptionInstance() {
+        return descriptionInstance;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/01abf8f7/openie/src/main/java/org/apache/any23/openie/OpenIEExtractor.java
----------------------------------------------------------------------
diff --git a/openie/src/main/java/org/apache/any23/openie/OpenIEExtractor.java b/openie/src/main/java/org/apache/any23/openie/OpenIEExtractor.java
deleted file mode 100644
index b8fda29..0000000
--- a/openie/src/main/java/org/apache/any23/openie/OpenIEExtractor.java
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.any23.openie;
-
-import java.io.IOException;
-import java.util.List;
-
-import javax.xml.transform.TransformerConfigurationException;
-import javax.xml.transform.TransformerFactoryConfigurationError;
-
-import org.apache.any23.extractor.Extractor;
-import org.apache.any23.configuration.Configuration;
-import org.apache.any23.configuration.DefaultConfiguration;
-import org.apache.any23.extractor.ExtractionContext;
-import org.apache.any23.extractor.ExtractorDescription;
-import org.apache.any23.rdf.RDFUtils;
-import org.apache.any23.util.StreamUtils;
-import org.apache.tika.Tika;
-import org.apache.tika.exception.TikaException;
-import org.eclipse.rdf4j.model.IRI;
-import org.eclipse.rdf4j.model.Resource;
-import org.eclipse.rdf4j.model.Value;
-import org.eclipse.rdf4j.model.vocabulary.RDF;
-import org.eclipse.rdf4j.model.vocabulary.RDFS;
-import org.apache.any23.extractor.ExtractionException;
-import org.apache.any23.extractor.ExtractionParameters;
-import org.apache.any23.extractor.ExtractionResult;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.w3c.dom.Document;
-
-import edu.knowitall.openie.Argument;
-import edu.knowitall.openie.Instance;
-import edu.knowitall.openie.OpenIE;
-import edu.knowitall.tool.parse.ClearParser;
-import edu.knowitall.tool.postag.ClearPostagger;
-import edu.knowitall.tool.srl.ClearSrl;
-import edu.knowitall.tool.tokenize.ClearTokenizer;
-import scala.collection.JavaConversions;
-import scala.collection.Seq;
-
-/**
- * An <a href="https://github.com/allenai/openie-standalone">OpenIE</a> 
- * extractor able to generate <i>RDF</i> statements from 
- * sentences representing relations in the text.
- */
-public class OpenIEExtractor implements Extractor.TagSoupDOMExtractor {
-
-    private static final Logger LOG = LoggerFactory.getLogger(OpenIEExtractor.class);
-
-    private IRI documentRoot;
-
-    /**
-     * default constructor
-     */
-    OpenIEExtractor() {
-        // default constructor
-    }
-
-    /**
-     * @see org.apache.any23.extractor.Extractor#getDescription()
-     */
-    @Override
-    public ExtractorDescription getDescription() {
-        return OpenIEExtractorFactory.getDescriptionInstance();
-    }
-
-    @Override
-    public void run(ExtractionParameters extractionParameters,
-            ExtractionContext context, Document in, ExtractionResult out)
-                    throws IOException, ExtractionException {
-
-        IRI documentIRI = context.getDocumentIRI();
-        documentRoot = RDFUtils.iri(documentIRI.toString() + "root");
-        out.writeNamespace(RDF.PREFIX, RDF.NAMESPACE);
-        out.writeNamespace(RDFS.PREFIX, RDFS.NAMESPACE);
-        LOG.debug("Processing: {}", documentIRI.toString());
-
-        OpenIE openIE = new OpenIE(
-                new ClearParser(
-                        new ClearPostagger(
-                                new ClearTokenizer())), new ClearSrl(), false, false);
-
-        Seq<Instance> extractions = null;
-        Tika tika = new Tika();
-        try {
-            extractions = openIE.extract(tika.parseToString(StreamUtils.documentToInputStream(in)));
-        } catch (TransformerConfigurationException | TransformerFactoryConfigurationError e) {
-            LOG.error("Encountered error during OpenIE extraction.", e);
-        } catch (TikaException e) {
-            LOG.error("Encountered error whilst parsing InputStream with Tika.", e);
-        }
-
-        List<Instance> listExtractions = JavaConversions.seqAsJavaList(extractions);
-        // for each extraction instance we can obtain a number of extraction elements
-        // instance.confidence() - a confidence value for the extraction itself
-        // instance.extr().context() - an optional representation of the context for this extraction
-        // instance.extr().arg1().text() - subject
-        // instance.extr().rel().text() - predicate
-        // instance.extr().arg2s().text() - object
-        for(Instance instance : listExtractions) {
-            final Configuration immutableConf = DefaultConfiguration.singleton();
-            if (instance.confidence() > Double.parseDouble(immutableConf.getProperty("any23.extraction.openie.confidence.threshold", "0.5"))) {
-                List<Argument> listArg2s = JavaConversions.seqAsJavaList(instance.extr().arg2s());
-                for(Argument argument : listArg2s) {
-                    Resource subject = RDFUtils.makeIRI(instance.extr().arg1().text(), documentIRI);
-                    IRI predicate = (IRI) RDFUtils.makeIRI(instance.extr().rel().text(), documentIRI);
-                    Value object = RDFUtils.toValue(argument.text());
-                    out.writeTriple(subject, predicate, object);
-                }
-            }
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/any23/blob/01abf8f7/openie/src/main/java/org/apache/any23/openie/OpenIEExtractorFactory.java
----------------------------------------------------------------------
diff --git a/openie/src/main/java/org/apache/any23/openie/OpenIEExtractorFactory.java b/openie/src/main/java/org/apache/any23/openie/OpenIEExtractorFactory.java
deleted file mode 100644
index 4a1696a..0000000
--- a/openie/src/main/java/org/apache/any23/openie/OpenIEExtractorFactory.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.any23.openie;
-
-import java.util.Arrays;
-
-import org.apache.any23.extractor.ExtractorDescription;
-import org.apache.any23.extractor.ExtractorFactory;
-import org.apache.any23.extractor.SimpleExtractorFactory;
-import org.apache.any23.rdf.Prefixes;
-
-/**
- * @author lewismc
- *
- */
-public class OpenIEExtractorFactory extends SimpleExtractorFactory<OpenIEExtractor>
-    implements ExtractorFactory<OpenIEExtractor> {
-
-    public static final String NAME = "openie";
-
-    public static final Prefixes prefixes = null;
-
-    private static final ExtractorDescription descriptionInstance = new OpenIEExtractorFactory();
-
-    public OpenIEExtractorFactory() {
-        super(NAME, prefixes, Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"), "example-openie.html");
-    }
-
-    @Override
-    public OpenIEExtractor createExtractor() {
-        return new OpenIEExtractor();
-    }
-
-    public static ExtractorDescription getDescriptionInstance() {
-        return descriptionInstance;
-    }
-
-}


[15/15] any23 git commit: ANY23-304 skip tests in openie module

Posted by le...@apache.org.
ANY23-304 skip tests in openie module


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/c40b7888
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/c40b7888
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/c40b7888

Branch: refs/heads/master
Commit: c40b7888b9978bc81e6cbe1e05ea77af50367bed
Parents: ef14614
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Wed Aug 23 13:26:23 2017 -0700
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Wed Aug 23 13:26:23 2017 -0700

----------------------------------------------------------------------
 openie/pom.xml | 9 +++++++++
 1 file changed, 9 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/c40b7888/openie/pom.xml
----------------------------------------------------------------------
diff --git a/openie/pom.xml b/openie/pom.xml
index 32d4a0c..8596f91 100644
--- a/openie/pom.xml
+++ b/openie/pom.xml
@@ -97,6 +97,15 @@
   </dependencies>
 
   <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <configuration>
+          <skipTests>true</skipTests>
+        </configuration>
+      </plugin>
+    </plugins>
     <pluginManagement>
       <plugins>
         <plugin>


[04/15] any23 git commit: Make pom relative parents consistent

Posted by le...@apache.org.
Make pom relative parents consistent


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/0910104d
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/0910104d
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/0910104d

Branch: refs/heads/master
Commit: 0910104d64fab9291b5950ebb3f1acb6ca9e7121
Parents: 6871755
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Thu Feb 23 18:06:06 2017 -0800
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Thu Feb 23 18:06:06 2017 -0800

----------------------------------------------------------------------
 csvutils/pom.xml | 2 +-
 encoding/pom.xml | 2 +-
 openie/pom.xml   | 2 +-
 service/pom.xml  | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/0910104d/csvutils/pom.xml
----------------------------------------------------------------------
diff --git a/csvutils/pom.xml b/csvutils/pom.xml
index 8f5b18d..71146b6 100644
--- a/csvutils/pom.xml
+++ b/csvutils/pom.xml
@@ -22,7 +22,7 @@
     <artifactId>apache-any23</artifactId>
     <groupId>org.apache.any23</groupId>
     <version>2.1-SNAPSHOT</version>
-    <relativePath>..</relativePath>
+    <relativePath>../</relativePath>
   </parent>
 
   <artifactId>apache-any23-csvutils</artifactId>

http://git-wip-us.apache.org/repos/asf/any23/blob/0910104d/encoding/pom.xml
----------------------------------------------------------------------
diff --git a/encoding/pom.xml b/encoding/pom.xml
index 287da3f..e315015 100644
--- a/encoding/pom.xml
+++ b/encoding/pom.xml
@@ -22,7 +22,7 @@
     <artifactId>apache-any23</artifactId>
     <groupId>org.apache.any23</groupId>
     <version>2.1-SNAPSHOT</version>
-    <relativePath>..</relativePath>
+    <relativePath>../</relativePath>
   </parent>
 
   <artifactId>apache-any23-encoding</artifactId>

http://git-wip-us.apache.org/repos/asf/any23/blob/0910104d/openie/pom.xml
----------------------------------------------------------------------
diff --git a/openie/pom.xml b/openie/pom.xml
index 799684d..9745b7a 100644
--- a/openie/pom.xml
+++ b/openie/pom.xml
@@ -23,7 +23,7 @@
     <artifactId>apache-any23</artifactId>
     <groupId>org.apache.any23</groupId>
     <version>2.1-SNAPSHOT</version>
-    <relativePath></relativePath>
+    <relativePath>../</relativePath>
   </parent>
 
   <repositories>

http://git-wip-us.apache.org/repos/asf/any23/blob/0910104d/service/pom.xml
----------------------------------------------------------------------
diff --git a/service/pom.xml b/service/pom.xml
index b4f9426..a1a60ed 100644
--- a/service/pom.xml
+++ b/service/pom.xml
@@ -22,7 +22,7 @@
     <groupId>org.apache.any23</groupId>
     <artifactId>apache-any23</artifactId>
     <version>2.1-SNAPSHOT</version>
-    <relativePath>../pom.xml</relativePath>
+    <relativePath>../</relativePath>
   </parent>
 
   <artifactId>apache-any23-service</artifactId>


[09/15] any23 git commit: ANY23-304 Address comments from ansell

Posted by le...@apache.org.
ANY23-304 Address comments from ansell


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/1b0c5ff2
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/1b0c5ff2
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/1b0c5ff2

Branch: refs/heads/master
Commit: 1b0c5ff22bb61a9cd992b909c776592a081216e4
Parents: 89d1d85
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Wed Mar 1 17:54:38 2017 -0800
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Wed Mar 1 17:54:38 2017 -0800

----------------------------------------------------------------------
 cli/pom.xml                                     | 14 +++---
 .../org/apache/any23/cli/ToolRunnerTest.java    | 15 ++++--
 .../any23/extractor/openie/OpenIEExtractor.java |  5 +-
 .../any23/openie/OpenIEExtractorTest.java       |  1 -
 plugins/basic-crawler/pom.xml                   | 34 +++++++++++++
 src/site/apt/any23-plugins.apt                  | 16 +++----
 src/site/apt/configuration.apt                  | 10 ++--
 src/site/apt/dev-csv-extractor.apt              |  2 +-
 src/site/apt/dev-data-conversion.apt            | 20 ++++----
 src/site/apt/dev-data-extraction.apt            | 20 ++++----
 src/site/apt/dev-microformat-extractors.apt     | 12 ++---
 src/site/apt/dev-validation-fix.apt             | 12 ++---
 src/site/apt/dev-xpath-extractor.apt            |  2 +-
 src/site/apt/extractors.apt                     | 50 ++++++++++----------
 src/site/apt/getting-started.apt                |  2 +-
 src/site/apt/plugin-basic-crawler.apt           |  4 +-
 src/site/apt/plugin-office-scraper.apt          |  2 +-
 17 files changed, 131 insertions(+), 90 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/cli/pom.xml
----------------------------------------------------------------------
diff --git a/cli/pom.xml b/cli/pom.xml
index 3f183ae..79e8cab 100644
--- a/cli/pom.xml
+++ b/cli/pom.xml
@@ -74,22 +74,20 @@
       <groupId>${project.groupId}</groupId>
       <artifactId>apache-any23-openie</artifactId>
       <version>${project.version}</version>
-    </dependency>
-    <!-- dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>apache-any23-basic-crawler</artifactId>
-      <version>${project.version}</version>
+      <scope>runtime</scope>
     </dependency>
     <dependency>
-      <groupId>${project.groupId}</groupId>
+      <groupId>${project.groupId}.plugins</groupId>
       <artifactId>apache-any23-office-scraper</artifactId>
       <version>${project.version}</version>
+      <scope>runtime</scope>
     </dependency>
     <dependency>
-      <groupId>${project.groupId}</groupId>
+      <groupId>${project.groupId}.plugins</groupId>
       <artifactId>apache-any23-html-scraper</artifactId>
       <version>${project.version}</version>
-    </dependency-->
+      <scope>runtime</scope>
+    </dependency>
     <dependency>
       <groupId>commons-lang</groupId>
       <artifactId>commons-lang</artifactId>

http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/cli/src/test/java/org/apache/any23/cli/ToolRunnerTest.java
----------------------------------------------------------------------
diff --git a/cli/src/test/java/org/apache/any23/cli/ToolRunnerTest.java b/cli/src/test/java/org/apache/any23/cli/ToolRunnerTest.java
index 881a782..11484bb 100644
--- a/cli/src/test/java/org/apache/any23/cli/ToolRunnerTest.java
+++ b/cli/src/test/java/org/apache/any23/cli/ToolRunnerTest.java
@@ -17,7 +17,7 @@
 
 package org.apache.any23.cli;
 
-import junit.framework.Assert;
+import org.junit.Assert;
 import org.junit.Test;
 
 import java.io.IOException;
@@ -34,7 +34,13 @@ import static org.junit.Assert.assertTrue;
  */
 public class ToolRunnerTest {
 
-    private final Set<Class<? extends Tool>> coreTools = new HashSet<Class<? extends Tool>>(){{
+    private final Set<Class<? extends Tool>> coreTools = new HashSet<Class<? extends Tool>>(){
+        /**
+         * 
+         */
+        private static final long serialVersionUID = 1L;
+
+    {
         add(ExtractorDocumentation.class);
         add(MicrodataParser.class);
         add(MimeDetector.class);
@@ -48,7 +54,10 @@ public class ToolRunnerTest {
         Iterator<Tool> tools = new ToolRunner().getToolsInClasspath();
         assertTrue("No core tools have been detected", tools.hasNext());
         while (tools.hasNext()) {
-            assertTrue("Some core tools have not been detected.", coreTools.contains(tools.next().getClass()));
+            assertTrue("Discrepancy between expected and detected tools on classpath. "
+                    + "Expected ExtractorDocumentation.class,"
+                    + "MicrodataParser.class, MimeDetector.class, PluginVerifier.class"
+                    + "Rover.class and VocabPrinter.class.", coreTools.contains(tools.next().getClass()));
         }
     }
 

http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractor.java
----------------------------------------------------------------------
diff --git a/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractor.java b/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractor.java
index bef40de..812ed9c 100644
--- a/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractor.java
+++ b/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractor.java
@@ -113,9 +113,10 @@ public class OpenIEExtractor implements Extractor.TagSoupDOMExtractor {
         // instance.extr().arg1().text() - subject
         // instance.extr().rel().text() - predicate
         // instance.extr().arg2s().text() - object
+        final Configuration immutableConf = DefaultConfiguration.singleton();
+        Double threshold = Double.parseDouble(immutableConf.getProperty("any23.extraction.openie.confidence.threshold", "0.5"));
         for(Instance instance : listExtractions) {
-            final Configuration immutableConf = DefaultConfiguration.singleton();
-            if (instance.confidence() > Double.parseDouble(immutableConf.getProperty("any23.extraction.openie.confidence.threshold", "0.5"))) {
+            if (instance.confidence() > threshold) {
                 List<Argument> listArg2s = JavaConversions.seqAsJavaList(instance.extr().arg2s());
                 for(Argument argument : listArg2s) {
                     Resource subject = RDFUtils.makeIRI(instance.extr().arg1().text(), documentIRI);

http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java
----------------------------------------------------------------------
diff --git a/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java b/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java
index 0ba03fd..9dfad94 100644
--- a/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java
+++ b/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java
@@ -57,7 +57,6 @@ public class OpenIEExtractorTest {
         extractor = null;
     }
 
-    //@Ignore("This typically results in a JVM crash... disabled for the time being.")
     @Test
     public void testExtractFromHTMLDocument() 
       throws IOException, ExtractionException, TripleHandlerException {

http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/plugins/basic-crawler/pom.xml
----------------------------------------------------------------------
diff --git a/plugins/basic-crawler/pom.xml b/plugins/basic-crawler/pom.xml
index c9769fd..4fdf257 100644
--- a/plugins/basic-crawler/pom.xml
+++ b/plugins/basic-crawler/pom.xml
@@ -139,6 +139,40 @@
           </execution>
         </executions>
       </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <version>3.0.0</version>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+            <configuration>
+              <transformers>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+                  <manifestEntries>
+                    <Main-Class>org.apache.any23.cli.Crawler</Main-Class>
+                    <Build-Number>${implementation.build}</Build-Number>
+                  </manifestEntries>
+                </transformer>
+              </transformers>
+              <filters>
+                <filter>
+                  <artifact>*:*</artifact>
+                  <excludes>
+                    <exclude>META-INF/*.SF</exclude>
+                    <exclude>META-INF/*.DSA</exclude>
+                    <exclude>META-INF/*.RSA</exclude>
+                  </excludes>
+                </filter>
+              </filters>
+              <finalName>${project.artifactId}-uber-${project.version}</finalName>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
     </plugins>
   </build>
 

http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/src/site/apt/any23-plugins.apt
----------------------------------------------------------------------
diff --git a/src/site/apt/any23-plugins.apt b/src/site/apt/any23-plugins.apt
index 7bd297b..f429e2d 100644
--- a/src/site/apt/any23-plugins.apt
+++ b/src/site/apt/any23-plugins.apt
@@ -31,9 +31,9 @@ Apache Any23 Plugins
 
     A plugin is a standard <Maven3> module containing any implementation of
 
-    * {{{./xref/org/apache/any23/plugin/ExtractorPlugin.html}ExtractorPlugin}}
+    * {{{./apidocs/org/apache/any23/plugin/ExtractorPlugin.html}ExtractorPlugin}}
 
-    * {{{./xref/org/apache/any23/cli/Tool.html}Tool}}
+    * {{{./apidocs/org/apache/any23/cli/Tool.html}Tool}}
 
 * How to Register a Plugin
 
@@ -50,13 +50,13 @@ export CLASSPATH_PREFIX=../../../plugins/basic-crawler/target/any23-basic-crawle
    * adding its <JAR> to the <$HOME/.any23/plugins> directory.
 
    A plugin can be added to the <Apache Any23 library API> by using the
-   {{{./xref/org/apache/any23/plugin/Any23PluginManager.html}Any23PluginManager}}#createInstance(Configuration configuration, File... pluginLocations)
+   {{{./apidocs/org/apache/any23/plugin/Any23PluginManager.html}Any23PluginManager}}#createInstance(Configuration configuration, File... pluginLocations)
    method.
 
    TODO: plugin support in Apache Any23 Service
 
     Any implementation of <ExtractorPlugin> will automatically registered to the
-    {{{./xref/org/apache/any23/extractor/ExtractorRegistry.html}ExtractorRegistry}}.
+    {{{./apidocs/org/apache/any23/extractor/ExtractorRegistry.html}ExtractorRegistry}}.
 
     Any detected implementation of <Tool> will be listed by the <ToolRunner>
     command-line tool in <any23-root/><<bin/any23>> .
@@ -74,7 +74,7 @@ export CLASSPATH_PREFIX=../../../plugins/basic-crawler/target/any23-basic-crawle
 
    An <Extractor Plugin> is a class:
 
-   * implementing the {{{./xref/org/apache/any23/plugin/ExtractorPlugin.html}ExtractorPlugin}} interface;
+   * implementing the {{{./apidocs/org/apache/any23/plugin/ExtractorPlugin.html}ExtractorPlugin}} interface;
 
    * packaged under <<org.apache.any23.plugin>> .
 
@@ -107,7 +107,7 @@ public class HTMLScraperPlugin implements ExtractorPlugin {
 
    A <Tool Plugin> is a Java class that:
 
-   * implementing the {{{./xref/org/apache/any23/cli/Tool.html}Tool}} interface;
+   * implementing the {{{./apidocs/org/apache/any23/cli/Tool.html}Tool}} interface;
 
    * CLI parameters are extracted by annotating the class members with {{{http://jcommander.org/}JCommander}} annotations.
 
@@ -152,7 +152,7 @@ public class MyExecutableTool implements Tool {
 
   * Crawler CLI Tool
 
-    The {{{./xref/org/apache/any23/cli/Crawler.html}Crawler CLI Tool}} is an extension of the
-    {{{./xref/org/apache/any23/cli/Rover.html}Rover CLI Tool}} to add site crawling basic
+    The {{{./apidocs/org/apache/any23/cli/Crawler.html}Crawler CLI Tool}} is an extension of the
+    {{{./apidocs/org/apache/any23/cli/Rover.html}Rover CLI Tool}} to add site crawling basic
     capabilities. More information about the <CLI> can be found at
     {{{./getting-started.html#crawler-tool}Getting Started - Crawler Tool}} section.

http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/src/site/apt/configuration.apt
----------------------------------------------------------------------
diff --git a/src/site/apt/configuration.apt b/src/site/apt/configuration.apt
index 63ff1fd..076f806 100644
--- a/src/site/apt/configuration.apt
+++ b/src/site/apt/configuration.apt
@@ -27,8 +27,8 @@ Configuration
     The core module contains the main library code and the command-line implementation.
 
     The main library configuration parameters are managed by the
-    {{{./xref/org/apache/any23/configuration/DefaultConfiguration.html} Configuration}}
-    class. The default values are declared within the {{{http://any23.googlecode.com/svn/trunk/any23-core/src/main/resources/default-configuration.properties} default-configuration.properties}}
+    {{{./apidocs/org/apache/any23/configuration/DefaultConfiguration.html} Configuration}}
+    class. The default values are declared within the {{{https://github.com/apache/any23/blob/master/api/src/main/resources/default-configuration.properties} default-configuration.properties}}
     file. The following sections explain how to override the default configuration.
 
 ** Override Default Configuration from Command-line
@@ -52,7 +52,7 @@ any23-core/bin/$ ANY23_OPTS="-Dany23.http.client.max.connections=10" any23 http:
 
 ** Override Default Configuration Programmatically
 
-    The {{{./xref/org/apache/any23/configuration/Configuration.html} Configuration}}
+    The {{{./apidocs/org/apache/any23/configuration/Configuration.html} Configuration}}
     properties can be accessed in read-only mode just retrieving the configuration <<singleton>> instance.\
     Such instance is <immutable>:
 
@@ -62,7 +62,7 @@ final String propertyValue = immutableConf.getProperty("propertyName", "default
 ...
 +----------------------------------------------------------------------------------------------
 
-    To obtain a <modifiable> {{{./xref/org/apache/any23/configuration/Configuration.html} Configuration}}
+    To obtain a <modifiable> {{{./apidocs/org/apache/any23/configuration/Configuration.html} Configuration}}
     instead it is possible to use the <<copy()>> method.\
     One of the <<Apache Any23>> constructors accepts a <<Configuration>> object that allows to customize the behavior
     of the <<Apache Any23>> instance for its entire life-cycle.
@@ -77,7 +77,7 @@ final Apache Any23 any23 = new Apache Any23(modifiableConf, "extractor1", ...);
 * Use of ExtractionParameters
 
     It is possible to customize the behavior of a single data extraction by providing an
-    {{{./xref/org/apache/any23/extractor/ExtractionParameters.html} ExtractionParameters}}
+    {{{./apidocs/org/apache/any23/extractor/ExtractionParameters.html} ExtractionParameters}}
     instance to one the <Apache Any23#extract()> methods accepting it. <<ExtractionParameters>> allows to customize any <property> and <flag>
     other then the <<specific extraction options>>.\
     If no custom parameters are specified the default configuration values are used.

http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/src/site/apt/dev-csv-extractor.apt
----------------------------------------------------------------------
diff --git a/src/site/apt/dev-csv-extractor.apt b/src/site/apt/dev-csv-extractor.apt
index 7eb7c8a..24b0d6c 100644
--- a/src/site/apt/dev-csv-extractor.apt
+++ b/src/site/apt/dev-csv-extractor.apt
@@ -22,7 +22,7 @@
 
 CSV Extractor Algorithm
 
-  The {{{./xref/org/apache/any23/extractor/csv/CSVExtractor.html}CSV Extractor}} produces 
+  The {{{./apidocs/org/apache/any23/extractor/csv/CSVExtractor.html}CSV Extractor}} produces 
   an RDF representation of a CSV file compliant with the {{{http://www.ietf.org/rfc/rfc4180.txt}RFC 4180}} 
   and that foresees an header.
   Such extractor relies on the presence of an header to use the named fields as RDF properties.

http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/src/site/apt/dev-data-conversion.apt
----------------------------------------------------------------------
diff --git a/src/site/apt/dev-data-conversion.apt b/src/site/apt/dev-data-conversion.apt
index 319eadd..1749b9a 100644
--- a/src/site/apt/dev-data-conversion.apt
+++ b/src/site/apt/dev-data-conversion.apt
@@ -47,35 +47,35 @@ Data Conversion
  useful for the transformation. The facade constructor accepts a list of extractor names, if specified
  the extraction will be done only over this list, otherwise the data <MIME Type> will detected and will be applied
  all the compatible extractors declared within the
- {{{./xref/org/apache/any23/extractor/ExtractorRegistry.html}ExtractorRegistry}}.
+ {{{./apidocs/org/apache/any23/extractor/ExtractorRegistry.html}ExtractorRegistry}}.
 
  The <<line 2>> defines the input string containing some {{{http://www.w3.org/TeamSubmission/turtle/}Turtle}} data.
 
- At <<line 3>> we instantiate a {{{./xref/org/apache/any23/source/StringDocumentSource.html}StringDocumentSource}},
+ At <<line 3>> we instantiate a {{{./apidocs/org/apache/any23/source/StringDocumentSource.html}StringDocumentSource}},
   specifying a content and a the source <IRI>.
  The <IRI> should be the source of the content data, and must be valid.
- Besides the {{{./xref/org/apache/any23/source/StringDocumentSource.html}StringDocumentSource}},
+ Besides the {{{./apidocs/org/apache/any23/source/StringDocumentSource.html}StringDocumentSource}},
  you can also provide input from other sources, such as <HTTP> requests
- and local files. See the classes in the sources {{{./xref/org/apache/any23/source/package-summary.html}package}}.
+ and local files. See the classes in the sources {{{./apidocs/org/apache/any23/source/package-summary.html}package}}.
 
  The <<line 4>> defines a buffered output stream that will be used to store the data produced by the
  writer declared at <<line 5>>.
 
  A writer stores the extracted triples in some destination.
- We use an {{{./xref/org/apache/any23/writer/NTriplesWriter.html}NTriplesWriter}} here that writes
+ We use an {{{./apidocs/org/apache/any23/writer/NTriplesWriter.html}NTriplesWriter}} here that writes
  into a <<ByteArrayOutputStream>>. The main <<RDF>> formats writers are available and it is possible also to store
  the triples directly into an <<RDF4J>> repository to query them via <<SPARQL>>.
- See {{{./xref/org/apache/any23/writer/RepositoryWriter.html}RepositoryWriter}} and the writer
- {{{./xref/org/apache/any23/writer/package-summary.html}package}}.
+ See {{{./apidocs/org/apache/any23/writer/RepositoryWriter.html}RepositoryWriter}} and the writer
+ {{{./apidocs/org/apache/any23/writer/package-summary.html}package}}.
 
  The extractor method invoked at <<line 6>> performs the metadata extraction.
- This method accepts as first argument a {{{./xref/org/apache/any23/source/DocumentSource.html}DocumentSource}} and as
- second argument a {{{./xref/org/apache/any23/writer/TripleHandler.html}TripleHandler}},
+ This method accepts as first argument a {{{./apidocs/org/apache/any23/source/DocumentSource.html}DocumentSource}} and as
+ second argument a {{{./apidocs/org/apache/any23/writer/TripleHandler.html}TripleHandler}},
  that will receive the sequence parsing events generated by the applied extractors. The extract method defines also
  another signature where it is possible to specify a charset encoding for the input data. If <<null>>, the charset
  will be auto detected.
 
- The {{{./xref/org/apache/any23/writer/TripleHandler.html}TripleHandler}} needs to be explicitly closed,
+ The {{{./apidocs/org/apache/any23/writer/TripleHandler.html}TripleHandler}} needs to be explicitly closed,
  this is done safely in a <<finally>> block at <<line 7>>.
 
  The expected output is <UTF-8> encoded at <<line 8>>:

http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/src/site/apt/dev-data-extraction.apt
----------------------------------------------------------------------
diff --git a/src/site/apt/dev-data-extraction.apt b/src/site/apt/dev-data-extraction.apt
index 2a5bda2..1f67a53 100644
--- a/src/site/apt/dev-data-extraction.apt
+++ b/src/site/apt/dev-data-extraction.apt
@@ -45,21 +45,21 @@ Data Extraction
    the usage of specific extractors.
 
    The <<line 2>> defines the <HTTP User Agent>, used to identify the client during <HTTP> data collection.
-   At <<line 3>> we use the runner to create an instance of {{{./xref/org/apache/any23/http/HTTPClient.html}HTTPClient}},
-   used by {{{./xref/org/apache/any23/source/HTTPDocumentSource.html}HTTPDocumentSource}} for <HTTP> content fetching.
+   At <<line 3>> we use the runner to create an instance of {{{./apidocs/org/apache/any23/http/HTTPClient.html}HTTPClient}},
+   used by {{{./apidocs/org/apache/any23/source/HTTPDocumentSource.html}HTTPDocumentSource}} for <HTTP> content fetching.
 
-   The <<line 4>> instantiates an {{{./xref/org/apache/any23/source/HTTPDocumentSource.html}HTTPDocumentSource}} instance,
-   specifying the {{{./xref/org/apache/any23/http/HTTPClient.html}HTTPClient}} and the URL addressing the content
+   The <<line 4>> instantiates an {{{./apidocs/org/apache/any23/source/HTTPDocumentSource.html}HTTPDocumentSource}} instance,
+   specifying the {{{./apidocs/org/apache/any23/http/HTTPClient.html}HTTPClient}} and the URL addressing the content
    to be processed.
 
    At <<line 5>> we define a buffered output stream used to store data produced by the
-   {{{./xref/org/apache/any23/writer/TripleHandler.html}TripleHandler}} defined at <<line 6>>.
+   {{{./apidocs/org/apache/any23/writer/TripleHandler.html}TripleHandler}} defined at <<line 6>>.
 
    The extraction method at <<line 7>> will run the metadata extraction.
    The produced metadata will be written within the passed
-   {{{./xref/org/apache/any23/writer/TripleHandler.html}TripleHandler}} instance.
+   {{{./apidocs/org/apache/any23/writer/TripleHandler.html}TripleHandler}} instance.
 
-   The {{{./xref/org/apache/any23/writer/TripleHandler.html}TripleHandler}} needs to be explicitly closed,
+   The {{{./apidocs/org/apache/any23/writer/TripleHandler.html}TripleHandler}} needs to be explicitly closed,
    this is done safely in a <<finally>> block at <<line 8>>.
 
    The expected output is <UTF-8> encoded at <<line 9>> and is:
@@ -96,11 +96,11 @@ Filter Out Accidental Triples
    To remove accidental triples <<Apache Any23>> provides a set of useful filters, located
    within the <<org.apache.any23.filter>> package.
 
-   The filter {{{./xref/org/apache/any23/filter/IgnoreTitlesOfEmptyDocuments.html}IgnoreTitlesOfEmptyDocuments}}
-   removes triples generated by the {{{./xref/org/apache/any23/extractor/html/TitleExtractor.html}TitleExtractor}}
+   The filter {{{./apidocs/org/apache/any23/filter/IgnoreTitlesOfEmptyDocuments.html}IgnoreTitlesOfEmptyDocuments}}
+   removes triples generated by the {{{./apidocs/org/apache/any23/extractor/html/TitleExtractor.html}TitleExtractor}}
    whether the document is empty.
 
-   The filter {{{./xref/org/apache/any23/filter/IgnoreAccidentalRDFa.html}IgnoreAccidentalRDFa}} removes accidental
+   The filter {{{./apidocs/org/apache/any23/filter/IgnoreAccidentalRDFa.html}IgnoreAccidentalRDFa}} removes accidental
    <<CSS>> related triples.
 
 +------------------------------------

http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/src/site/apt/dev-microformat-extractors.apt
----------------------------------------------------------------------
diff --git a/src/site/apt/dev-microformat-extractors.apt b/src/site/apt/dev-microformat-extractors.apt
index e5db96e..4f03d71 100644
--- a/src/site/apt/dev-microformat-extractors.apt
+++ b/src/site/apt/dev-microformat-extractors.apt
@@ -40,7 +40,7 @@ Microformat Extractors
  More specifically:
 
   * Embedding explicitly the logic within the
-  {{{./xref/org/apache/any23/extractor/html/package-summary.html}Microformats Extractors}}
+  {{{./apidocs/org/apache/any23/extractor/html/package-summary.html}Microformats Extractors}}
 
   * Using the default <<Apache Any23>> nesting feature.
 
@@ -60,7 +60,7 @@ Microformat Extractors
 </span>
 +----------------------------------------------------------------------------------------------
 
- Since, as shown below, the {{{./xref/org/apache/any23/extractor/html/HCardExtractor.html}HCardExtractor}}
+ Since, as shown below, the {{{./apidocs/org/apache/any23/extractor/html/HCardExtractor.html}HCardExtractor}}
  contains the code to handle nested hAddress,
 
 +------------------------------
@@ -101,12 +101,12 @@ private boolean addSubMicroformat(String className, Resource resource, IRI prope
 +-----------------------------------------------------------------------------------------------------
 
  It is higly recommended to decorate the extractors who natively handle the nesting relatioship using the
-  {{{./xref/org/apache/any23/extractor/html/annotations/Includes.html}@Includes}} annotation. This annotation,
+  {{{./apidocs/org/apache/any23/extractor/html/annotations/Includes.html}@Includes}} annotation. This annotation,
   if present, avoid the production of <nesting_original> and <nesting_structured> RDF statements.
 
-  The following example shows how the {{{./xref/org/apache/any23/extractor/html/annotations/Includes.html}@Includes}} annotation
-  could be used to claim the fact that {{{./xref/org/apache/any23/extractor/html/HCardExtractor.html}HCardExtractor}} natively
-  embedds the {{{./xref/org/apache/any23/extractor/html/AdrExtractor.html}AdrExtractor}}.
+  The following example shows how the {{{./apidocs/org/apache/any23/extractor/html/annotations/Includes.html}@Includes}} annotation
+  could be used to claim the fact that {{{./apidocs/org/apache/any23/extractor/html/HCardExtractor.html}HCardExtractor}} natively
+  embedds the {{{./apidocs/org/apache/any23/extractor/html/AdrExtractor.html}AdrExtractor}}.
 
 +----------------------------------------------------------------------------------------------
 @Includes( extractors = AdrExtractor.class )

http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/src/site/apt/dev-validation-fix.apt
----------------------------------------------------------------------
diff --git a/src/site/apt/dev-validation-fix.apt b/src/site/apt/dev-validation-fix.apt
index 96a9bf2..c588fc2 100644
--- a/src/site/apt/dev-validation-fix.apt
+++ b/src/site/apt/dev-validation-fix.apt
@@ -35,9 +35,9 @@ Validation and Fixing
    This pages describes the <<Apache Any23>> rule-based approach, that allows it to detect, fix and correctly extract
    RDF from those ill-formed RDFa in XHTML pages.
 
-   More specifically, <<Apache Any23>> allows you to write a {{{./xref/org/apache/any23/validator/Rule.html}Rule}}
-   able to detect the errors, a {{{./xref/org/apache/any23/validator/Fix.html}Fix}} containing the logic to fix the problem and a
-   {{{./xref/org/apache/any23/validator/Validator.html}Validator}} which acts as a register of rules and fixes. The Validator
+   More specifically, <<Apache Any23>> allows you to write a {{{./apidocs/org/apache/any23/validator/Rule.html}Rule}}
+   able to detect the errors, a {{{./apidocs/org/apache/any23/validator/Fix.html}Fix}} containing the logic to fix the problem and a
+   {{{./apidocs/org/apache/any23/validator/Validator.html}Validator}} which acts as a register of rules and fixes. The Validator
    calls all the registered rules and when one of them is applied it calls the associated Fix.
 
    The following code snipped shows how to programmatically detect and fix a very common data error with <<Apache Any23>>.
@@ -64,8 +64,8 @@ Validation and Fixing
 </div>
 +------------------------------------------------------------------------------------------
 
-   With the <<Apache Any23>> {{{./xref/org/apache/any23/validator/package-summary.html}Validator}} classes it's possible to solve this
-   problem simply implementing the {{{./xref/org/apache/any23/validator/Rule.html}Rule}} interface as described below:
+   With the <<Apache Any23>> {{{./apidocs/org/apache/any23/validator/package-summary.html}Validator}} classes it's possible to solve this
+   problem simply implementing the {{{./apidocs/org/apache/any23/validator/Rule.html}Rule}} interface as described below:
 
 +------------------------------------------------------------------------------------------
 public class MissingOpenGraphNamespaceRule implements Rule {
@@ -100,7 +100,7 @@ public class MissingOpenGraphNamespaceRule implements Rule {
 }
 +------------------------------------------------------------------------------------------
 
-   The {{{./xref/org/apache/any23/validator/rule/MissingOpenGraphNamespaceRule.html}MissingOpenGraphNamespaceRule}} inspects the DOM
+   The {{{./apidocs/org/apache/any23/validator/rule/MissingOpenGraphNamespaceRule.html}MissingOpenGraphNamespaceRule}} inspects the DOM
    structure of the HTML page and if it finds some META tags with some RDFa property (of the OpenGraph Protocol vocabulary, in this case)
    it looks for the declaration of that name space. If there is no declaration it return <<true>>, that means that an error has been detected
    within the document.

http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/src/site/apt/dev-xpath-extractor.apt
----------------------------------------------------------------------
diff --git a/src/site/apt/dev-xpath-extractor.apt b/src/site/apt/dev-xpath-extractor.apt
index 4bae9c2..fc9df63 100644
--- a/src/site/apt/dev-xpath-extractor.apt
+++ b/src/site/apt/dev-xpath-extractor.apt
@@ -28,4 +28,4 @@ XPath Extractor
     activated by a regular expression over the page URL.
     When an extraction rule is activated all the variables it defines are
     evaluated and then a NQuads template is expanded for generating statements.
-    See {{{./xref/org/apache/any23/extractor/xpath/package-summary.html}Javadoc}}.
+    See {{{./apidocs/org/apache/any23/extractor/xpath/package-summary.html}Javadoc}}.

http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/src/site/apt/extractors.apt
----------------------------------------------------------------------
diff --git a/src/site/apt/extractors.apt b/src/site/apt/extractors.apt
index ddce55b..4031b05 100644
--- a/src/site/apt/extractors.apt
+++ b/src/site/apt/extractors.apt
@@ -22,7 +22,7 @@
 
 Apache Any23 Extractors
 
-  This page enlists all the Apache Any23 Extractors (see source code {{{./xref/org/apache/any23/extractor/package-summary.html}package}}).
+  This page enlists all the Apache Any23 Extractors (see source code {{{./apidocs/org/apache/any23/extractor/package-summary.html}package}}).
 
 * Microformat Extractors
 
@@ -31,68 +31,68 @@ Apache Any23 Extractors
       Specific details about *Microformats* extractors can be found {{{./dev-microformat-extractors.html}here}}.
       In particular the *Microformats Nesting* representation policy is described {{{./dev-microformat-extractors.html#microformat-nesting}here}}.
 
-      {{{./xref/org/apache/any23/extractor/html/AdrExtractor.html}AdrExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/html/AdrExtractor.html}AdrExtractor}}
 
-      {{{./xref/org/apache/any23/extractor/html/GeoExtractor.html}GeoExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/html/GeoExtractor.html}GeoExtractor}}
 
-      {{{./xref/org/apache/any23/extractor/html/HCalendarExtractor.html}HCalendar}}
+      {{{./apidocs/org/apache/any23/extractor/html/HCalendarExtractor.html}HCalendar}}
 
-      {{{./xref/org/apache/any23/extractor/html/HCardExtractor.html}HCard}}
+      {{{./apidocs/org/apache/any23/extractor/html/HCardExtractor.html}HCard}}
 
-      {{{./xref/org/apache/any23/extractor/html/HListingExtractor.html}HListing}}
+      {{{./apidocs/org/apache/any23/extractor/html/HListingExtractor.html}HListing}}
 
-      {{{./xref/org/apache/any23/extractor/html/HResumeExtractor.html}HResume}}
+      {{{./apidocs/org/apache/any23/extractor/html/HResumeExtractor.html}HResume}}
 
-      {{{./xref/org/apache/any23/extractor/html/HReviewExtractor.html}HReview}}
+      {{{./apidocs/org/apache/any23/extractor/html/HReviewExtractor.html}HReview}}
 
-      {{{./xref/org/apache/any23/extractor/html/SpeciesExtractor.html}SpeciesExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/html/SpeciesExtractor.html}SpeciesExtractor}}
 
-      {{{./xref/org/apache/any23/extractor/html/LicenseExtractor.html}LicenseExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/html/LicenseExtractor.html}LicenseExtractor}}
 
-      {{{./xref/org/apache/any23/extractor/html/XFNExtractor.html}XFNExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/html/XFNExtractor.html}XFNExtractor}}
 
-      {{{./xref/org/apache/any23/extractor/html/HRecipeExtractor.html}HRecipeExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/html/HRecipeExtractor.html}HRecipeExtractor}}
 
 *  RDFa [1.0 , 1.1]
 
       The following extractors refer to the {{{http://www.w3.org/TR/rdfa-syntax/}RDFa 1.0}}
       and {{{http://www.w3.org/TR/rdfa-core/}RDFa 1.1}} specifications.
 
-      {{{./xref/org/apache/any23/extractor/rdfa/RDFaExtractor.html}RDFaExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/rdfa/RDFaExtractor.html}RDFaExtractor}}
 
 * Microdata
 
       The following extractors refer to the {{{http://dev.w3.org/html5/md/}Microdata specifications}}.
 
-      {{{./xref/org/apache/any23/extractor/microdata/MicrodataExtractor.html}MicrodataExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/microdata/MicrodataExtractor.html}MicrodataExtractor}}
 
 *  RDF
 
-      {{{./xref/org/apache/any23/extractor/rdf/RDFXMLExtractor.html}RDFXMLExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/rdf/RDFXMLExtractor.html}RDFXMLExtractor}}
 
-      {{{./xref/org/apache/any23/extractor/rdf/NQuadsExtractor.html}NQuadsExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/rdf/NQuadsExtractor.html}NQuadsExtractor}}
 
-      {{{./xref/org/apache/any23/extractor/rdf/TurtleExtractor.html}TurtleExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/rdf/TurtleExtractor.html}TurtleExtractor}}
 
-      {{{./xref/org/apache/any23/extractor/rdf/NTriplesExtractor.html}NTriplesExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/rdf/NTriplesExtractor.html}NTriplesExtractor}}
 
 * Metadata Extractors
 
-      {{{./xref/org/apache/any23/extractor/html/TitleExtractor.html}TitleExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/html/TitleExtractor.html}TitleExtractor}}
 
-      {{{./xref/org/apache/any23/extractor/html/HTMLMetaExtractor.html}HTMLMetaExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/html/HTMLMetaExtractor.html}HTMLMetaExtractor}}
 
-      {{{./xref/org/apache/any23/extractor/html/HeadLinkExtractor.html}HeadLinkExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/html/HeadLinkExtractor.html}HeadLinkExtractor}}
 
-      {{{./xref/org/apache/any23/extractor/html/ICBMExtractor.html}ICBMExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/html/ICBMExtractor.html}ICBMExtractor}}
 
-      {{{./xref/org/apache/any23/extractor/html/TurtleHTMLExtractor.html}TurtleHTMLExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/html/TurtleHTMLExtractor.html}TurtleHTMLExtractor}}
 
 * Content Extractors
 
-      {{{./xref/org/apache/any23/extractor/xpath/XPathExtractor.html}XPath Extractor}} (<<Experimental>>)
+      {{{./apidocs/org/apache/any23/extractor/xpath/XPathExtractor.html}XPath Extractor}} (<<Experimental>>)
 
-      {{{./xref/org/apache/any23/extractor/csv/CSVExtractor.html}CSV Extractor}} (See the extraction {{{./dev-csv-extractor.html}algorithm}}.)
+      {{{./apidocs/org/apache/any23/extractor/csv/CSVExtractor.html}CSV Extractor}} (See the extraction {{{./dev-csv-extractor.html}algorithm}}.)
 
 Get more documentation
 

http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/src/site/apt/getting-started.apt
----------------------------------------------------------------------
diff --git a/src/site/apt/getting-started.apt b/src/site/apt/getting-started.apt
index 5f60b93..75861da 100644
--- a/src/site/apt/getting-started.apt
+++ b/src/site/apt/getting-started.apt
@@ -334,7 +334,7 @@ any23-service$ ./bin/any23server
     from the command line in order to start up the server, then go to {{{http://localhost:8080/}}}
     to access the web interface. A live demo version of such service is running at {{{http://any23.org/}}}.
     You can also start the server from Java by running the
-    {{{./xref/org/apache/any23/servlet/Servlet.html}Apache Any23 Servlet}} class. Maven can be used to create a WAR
+    {{{./apidocs/org/apache/any23/servlet/Servlet.html}Apache Any23 Servlet}} class. Maven can be used to create a WAR
     file for deployment into an existing servlet container such as {{{http://tomcat.apache.org/}Apache Tomcat}}.
 
 * Use <<Apache Any23>> as a Library

http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/src/site/apt/plugin-basic-crawler.apt
----------------------------------------------------------------------
diff --git a/src/site/apt/plugin-basic-crawler.apt b/src/site/apt/plugin-basic-crawler.apt
index 94828da..dcbcee9 100644
--- a/src/site/apt/plugin-basic-crawler.apt
+++ b/src/site/apt/plugin-basic-crawler.apt
@@ -22,8 +22,8 @@
 
 Basic Crawler Plugin
 
-  The <Basic Crawler Plugin> implements a <CLI> {{{./xref/org/apache/any23/cli/Tool.html}Tool}} extending
-  {{{./xref/org/apache/any23/cli/Rover.html}Rover}} to add <site crawling> capabilities.
+  The <Basic Crawler Plugin> implements a <CLI> {{{./apidocs/org/apache/any23/cli/Tool.html}Tool}} extending
+  {{{./apidocs/org/apache/any23/cli/Rover.html}Rover}} to add <site crawling> capabilities.
 
   The tool can be used to extract semantic content from a small/medium size sites.
 

http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/src/site/apt/plugin-office-scraper.apt
----------------------------------------------------------------------
diff --git a/src/site/apt/plugin-office-scraper.apt b/src/site/apt/plugin-office-scraper.apt
index 3508f95..fcdff24 100644
--- a/src/site/apt/plugin-office-scraper.apt
+++ b/src/site/apt/plugin-office-scraper.apt
@@ -24,7 +24,7 @@ Office Scraper Plugins
 
  * <Excel Plugin>
 
-   The {{{./xref/org/apache/any23/plugin/officescraper/ExcelPlugin.html}ExcelPlugin}} converts any
+   The {{{./apidocs/org/apache/any23/plugin/officescraper/ExcelPlugin.html}ExcelPlugin}} converts any
    <<Microsoft Excel>> <97-2007> document to <RDF>.
 
    <<TODO: add conversion schema.>>


[07/15] any23 git commit: Fix package naming in OpenIE TestCase

Posted by le...@apache.org.
Fix package naming in OpenIE TestCase


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/1bb96c4f
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/1bb96c4f
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/1bb96c4f

Branch: refs/heads/master
Commit: 1bb96c4f7ada59741e18bbd02a02842a2af4fdaa
Parents: 2f54725
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Mon Feb 27 15:35:42 2017 -0800
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Mon Feb 27 15:35:42 2017 -0800

----------------------------------------------------------------------
 .../java/org/apache/any23/extractor/openie/OpenIEExtractor.java    | 2 +-
 .../src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/1bb96c4f/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractor.java
----------------------------------------------------------------------
diff --git a/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractor.java b/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractor.java
index 21f03c3..bef40de 100644
--- a/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractor.java
+++ b/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractor.java
@@ -68,7 +68,7 @@ public class OpenIEExtractor implements Extractor.TagSoupDOMExtractor {
     /**
      * default constructor
      */
-    OpenIEExtractor() {
+    public OpenIEExtractor() {
         // default constructor
     }
 

http://git-wip-us.apache.org/repos/asf/any23/blob/1bb96c4f/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java
----------------------------------------------------------------------
diff --git a/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java b/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java
index 3561bdd..0ba03fd 100644
--- a/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java
+++ b/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java
@@ -24,6 +24,7 @@ import org.apache.any23.extractor.ExtractionException;
 import org.apache.any23.extractor.ExtractionParameters;
 import org.apache.any23.extractor.ExtractionResult;
 import org.apache.any23.extractor.ExtractionResultImpl;
+import org.apache.any23.extractor.openie.OpenIEExtractor;
 import org.apache.any23.rdf.RDFUtils;
 import org.apache.any23.util.StreamUtils;
 import org.apache.any23.writer.RDFXMLWriter;


[06/15] any23 git commit: ANY23-304 update package names and introduce Service Loading for OpenIE module

Posted by le...@apache.org.
ANY23-304 update package names and introduce Service Loading for OpenIE module


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/2f547250
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/2f547250
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/2f547250

Branch: refs/heads/master
Commit: 2f54725049f0cbc152e9e27045c0f06e93c24647
Parents: 01abf8f
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Mon Feb 27 09:56:12 2017 -0800
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Mon Feb 27 09:56:12 2017 -0800

----------------------------------------------------------------------
 cli/pom.xml                                                     | 5 +++++
 .../java/org/apache/any23/extractor/openie/OpenIEExtractor.java | 2 +-
 .../apache/any23/extractor/openie/OpenIEExtractorFactory.java   | 2 +-
 .../services/org.apache.any23.extractor.ExtractorFactory        | 2 +-
 4 files changed, 8 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/2f547250/cli/pom.xml
----------------------------------------------------------------------
diff --git a/cli/pom.xml b/cli/pom.xml
index 5acedfb..6af107e 100644
--- a/cli/pom.xml
+++ b/cli/pom.xml
@@ -33,6 +33,11 @@
   <dependencies>
     <dependency>
       <groupId>${project.groupId}</groupId>
+      <artifactId>apache-any23-openie</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
       <artifactId>apache-any23-api</artifactId>
       <version>${project.version}</version>
     </dependency>

http://git-wip-us.apache.org/repos/asf/any23/blob/2f547250/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractor.java
----------------------------------------------------------------------
diff --git a/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractor.java b/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractor.java
index b8fda29..21f03c3 100644
--- a/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractor.java
+++ b/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractor.java
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.any23.openie;
+package org.apache.any23.extractor.openie;
 
 import java.io.IOException;
 import java.util.List;

http://git-wip-us.apache.org/repos/asf/any23/blob/2f547250/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractorFactory.java
----------------------------------------------------------------------
diff --git a/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractorFactory.java b/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractorFactory.java
index 4a1696a..31760d2 100644
--- a/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractorFactory.java
+++ b/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractorFactory.java
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.any23.openie;
+package org.apache.any23.extractor.openie;
 
 import java.util.Arrays;
 

http://git-wip-us.apache.org/repos/asf/any23/blob/2f547250/openie/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory
----------------------------------------------------------------------
diff --git a/openie/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory b/openie/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory
index 48754d9..4faf7ce 100644
--- a/openie/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory
+++ b/openie/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory
@@ -1 +1 @@
-org.apache.any23.openie.OpenIEExtractorFactory
\ No newline at end of file
+org.apache.any23.extractor.openie.OpenIEExtractorFactory
\ No newline at end of file


[02/15] any23 git commit: ANY23-304 Add extractor for OpenIE

Posted by le...@apache.org.
ANY23-304 Add extractor for OpenIE


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/2ecfbff1
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/2ecfbff1
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/2ecfbff1

Branch: refs/heads/master
Commit: 2ecfbff1dddaf57689b725feddba47c7921f726d
Parents: bc46c72
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Thu Feb 23 17:26:03 2017 -0800
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Thu Feb 23 17:26:03 2017 -0800

----------------------------------------------------------------------
 .../configuration/DefaultConfiguration.java     |  23 +-
 .../DefaultModifiableConfiguration.java         |   4 +-
 .../java/org/apache/any23/vocab/Vocabulary.java |  26 +-
 .../resources/default-configuration.properties  |   4 +
 .../extractor/SingleDocumentExtraction.java     |   6 +-
 .../extractor/html/EmbeddedJSONLDExtractor.java |   4 +-
 .../any23/extractor/html/GeoExtractor.java      |   7 +-
 .../any23/extractor/html/TagSoupParser.java     |   2 -
 .../any23/extractor/xpath/XPathExtractor.java   |   3 +-
 .../any23/extractor/yaml/YAMLExtractor.java     |  58 +-
 .../java/org/apache/any23/rdf/RDFUtils.java     |  50 +-
 .../java/org/apache/any23/util/StreamUtils.java |  69 +-
 .../any23/extractor/yaml/YAMLExtractorTest.java |   1 -
 openie/pom.xml                                  | 154 +++++
 .../apache/any23/openie/OpenIEExtractor.java    | 129 ++++
 .../any23/openie/OpenIEExtractorFactory.java    |  52 ++
 .../any23/openie/OpenIEExtractorTest.java       |  87 +++
 pom.xml                                         |   1 +
 .../any23/extractor/openie/example-openie.html  | 638 +++++++++++++++++++
 19 files changed, 1230 insertions(+), 88 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/2ecfbff1/api/src/main/java/org/apache/any23/configuration/DefaultConfiguration.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/org/apache/any23/configuration/DefaultConfiguration.java b/api/src/main/java/org/apache/any23/configuration/DefaultConfiguration.java
index 6edaf34..170548e 100644
--- a/api/src/main/java/org/apache/any23/configuration/DefaultConfiguration.java
+++ b/api/src/main/java/org/apache/any23/configuration/DefaultConfiguration.java
@@ -48,6 +48,14 @@ public class DefaultConfiguration implements Configuration {
 
     protected final Properties properties;
 
+    protected DefaultConfiguration(Properties properties) {
+        this.properties = properties;
+    }
+
+    private DefaultConfiguration() {
+        this( loadDefaultProperties() );
+    }
+
     /**
      * @return the singleton configuration instance.
      *         Such instance is unmodifiable.
@@ -74,22 +82,17 @@ public class DefaultConfiguration implements Configuration {
         return properties;
     }
 
-    protected DefaultConfiguration(Properties properties) {
-        this.properties = properties;
-    }
-
-    private DefaultConfiguration() {
-        this( loadDefaultProperties() );
-    }
-
+    @Override
     public synchronized String[] getProperties() {
         return properties.keySet().toArray( new String[properties.size()] );
     }
 
+    @Override
     public synchronized boolean defineProperty(String propertyName) {
         return properties.containsKey(propertyName);
     }
 
+    @Override
     public synchronized String getProperty(String propertyName, String defaultValue) {
         final String value = getPropertyValue(propertyName);
         if(value == null) {
@@ -98,6 +101,7 @@ public class DefaultConfiguration implements Configuration {
         return value;
     }
 
+    @Override
     public synchronized String getPropertyOrFail(String propertyName) {
         final String propertyValue = getPropertyValue(propertyName);
         if(propertyValue == null) {
@@ -111,6 +115,7 @@ public class DefaultConfiguration implements Configuration {
         return propertyValue;
     }
 
+    @Override
     public synchronized int getPropertyIntOrFail(String propertyName) {
         final String value = getPropertyOrFail(propertyName);
         final String trimValue = value.trim();
@@ -121,6 +126,7 @@ public class DefaultConfiguration implements Configuration {
         }
     }
 
+    @Override
     public synchronized boolean getFlagProperty(final String propertyName) {
         final String value = getPropertyOrFail(propertyName);
         if(value == null) {
@@ -140,6 +146,7 @@ public class DefaultConfiguration implements Configuration {
         );
     }
 
+    @Override
     public synchronized String getConfigurationDump() {
         final String[] defaultProperties = getProperties();
         final StringBuilder sb = new StringBuilder();

http://git-wip-us.apache.org/repos/asf/any23/blob/2ecfbff1/api/src/main/java/org/apache/any23/configuration/DefaultModifiableConfiguration.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/org/apache/any23/configuration/DefaultModifiableConfiguration.java b/api/src/main/java/org/apache/any23/configuration/DefaultModifiableConfiguration.java
index 82ceaad..055d39c 100644
--- a/api/src/main/java/org/apache/any23/configuration/DefaultModifiableConfiguration.java
+++ b/api/src/main/java/org/apache/any23/configuration/DefaultModifiableConfiguration.java
@@ -30,8 +30,10 @@ public class DefaultModifiableConfiguration extends DefaultConfiguration impleme
         super(properties);
     }
 
+    @Override
     public synchronized String setProperty(String propertyName, String propertyValue) {
-        if( ! defineProperty(propertyName) ) throw new IllegalArgumentException(
+        if( ! defineProperty(propertyName) )
+            throw new IllegalArgumentException(
                 String.format("Property '%s' is not defined in configuration.", propertyName)
         );
         return (String) properties.setProperty(propertyName, propertyValue);

http://git-wip-us.apache.org/repos/asf/any23/blob/2ecfbff1/api/src/main/java/org/apache/any23/vocab/Vocabulary.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/org/apache/any23/vocab/Vocabulary.java b/api/src/main/java/org/apache/any23/vocab/Vocabulary.java
index 8c8204f..718f514 100644
--- a/api/src/main/java/org/apache/any23/vocab/Vocabulary.java
+++ b/api/src/main/java/org/apache/any23/vocab/Vocabulary.java
@@ -157,8 +157,8 @@ public abstract class Vocabulary {
         if(classes == null) {
             return new IRI[0];
         }
-        final Collection<IRI> IRIs = classes.values();
-        return IRIs.toArray( new IRI[ IRIs.size() ] );
+        final Collection<IRI> iris = classes.values();
+        return iris.toArray( new IRI[ iris.size() ] );
     }
 
     /**
@@ -168,8 +168,8 @@ public abstract class Vocabulary {
         if(properties == null) {
             return new IRI[0];
         }
-        final Collection<IRI> IRIs = properties.values();
-        return IRIs.toArray( new IRI[ IRIs.size() ] );
+        final Collection<IRI> iris = properties.values();
+        return iris.toArray( new IRI[ iris.size() ] );
     }
 
     /**
@@ -197,11 +197,11 @@ public abstract class Vocabulary {
     /**
      * Creates a IRI.
      *
-     * @param IRIStr the IRI string
+     * @param iriStr the IRI string
      * @return the IRI instance.
      */
-    protected IRI createIRI(String IRIStr) {
-        return SimpleValueFactory.getInstance().createIRI(IRIStr);
+    protected IRI createIRI(String iriStr) {
+        return SimpleValueFactory.getInstance().createIRI(iriStr);
     }
 
     /**
@@ -214,7 +214,7 @@ public abstract class Vocabulary {
     protected IRI createClass(String namespace, String resource) {
         IRI res = createIRI(namespace, resource);
         if(classes == null) {
-            classes = new HashMap<String, IRI>(10);
+            classes = new HashMap<>(10);
         }
         classes.put(resource, res);
         return res;
@@ -230,7 +230,7 @@ public abstract class Vocabulary {
     protected IRI createProperty(String namespace, String property) {
         IRI res = createIRI(namespace, property);
         if(properties == null) {
-            properties = new HashMap<String, IRI>(10);
+            properties = new HashMap<>(10);
         }
         properties.put(property, res);
         return res;
@@ -248,14 +248,16 @@ public abstract class Vocabulary {
     }
 
     private void fillResourceToCommentMap() {
-        if(resourceToCommentMap != null) return;
-        final Map<IRI,String> newMap = new HashMap<IRI, String>();
+        if(resourceToCommentMap != null)
+            return;
+        final Map<IRI,String> newMap = new HashMap<>();
         for (Field field : this.getClass().getFields()) {
             try {
                 final Object value = field.get(this);
                 if(value instanceof IRI) {
                     final Comment comment = field.getAnnotation(Comment.class);
-                    if(comment != null) newMap.put((IRI) value, comment.value());
+                    if(comment != null)
+                        newMap.put((IRI) value, comment.value());
                 }
             } catch (IllegalAccessException iae) {
                 throw new RuntimeException("Error while creating resource to comment map.", iae);

http://git-wip-us.apache.org/repos/asf/any23/blob/2ecfbff1/api/src/main/resources/default-configuration.properties
----------------------------------------------------------------------
diff --git a/api/src/main/resources/default-configuration.properties b/api/src/main/resources/default-configuration.properties
index d047a83..4f68586 100644
--- a/api/src/main/resources/default-configuration.properties
+++ b/api/src/main/resources/default-configuration.properties
@@ -72,3 +72,7 @@ any23.extraction.head.meta=on
 # Allows to specify a CSV file separator and comment delimeter
 any23.extraction.csv.field=,
 any23.extraction.csv.comment=#
+
+# A confidence threshold for the OpenIE extractions
+# Any extractions below this value will not be processed.
+any23.extraction.openie.confidence.threshold=0.5

http://git-wip-us.apache.org/repos/asf/any23/blob/2ecfbff1/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java b/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java
index d88edf7..295f4e9 100644
--- a/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java
+++ b/core/src/main/java/org/apache/any23/extractor/SingleDocumentExtraction.java
@@ -231,10 +231,10 @@ public class SingleDocumentExtraction {
             log.debug(sb.toString());
         }
 
-        final List<ResourceRoot> resourceRoots = new ArrayList<ResourceRoot>();
-        final List<PropertyPath> propertyPaths = new ArrayList<PropertyPath>();
+        final List<ResourceRoot> resourceRoots = new ArrayList<>();
+        final List<PropertyPath> propertyPaths = new ArrayList<>();
         final Map<String,Collection<IssueReport.Issue>> extractorToIssues =
-            new HashMap<String,Collection<IssueReport.Issue>>();
+            new HashMap<>();
         
         // Invoke all extractors.
         try {

http://git-wip-us.apache.org/repos/asf/any23/blob/2ecfbff1/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
index 818fc98..db58586 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
@@ -56,7 +56,7 @@ public class EmbeddedJSONLDExtractor implements Extractor.TagSoupDOMExtractor {
 
 	private IRI profile;
 
-	private Map<String, IRI> prefixes = new HashMap<String, IRI>();
+	private Map<String, IRI> prefixes = new HashMap<>();
 
 	private String documentLang;
 
@@ -137,7 +137,7 @@ public class EmbeddedJSONLDExtractor implements Extractor.TagSoupDOMExtractor {
 			ExtractionContext extractionContext, ExtractionResult out)
 			throws IOException, ExtractionException {
 		List<Node> scriptNodes = DomUtils.findAll(in, "/HTML/HEAD/SCRIPT");
-		Set<JSONLDScript> result = new HashSet<JSONLDScript>();
+		Set<JSONLDScript> result = new HashSet<>();
 		extractor = new JSONLDExtractorFactory().createExtractor();
 		for (Node jsonldNode : scriptNodes) {
 			NamedNodeMap attributes = jsonldNode.getAttributes();

http://git-wip-us.apache.org/repos/asf/any23/blob/2ecfbff1/core/src/main/java/org/apache/any23/extractor/html/GeoExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/GeoExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/GeoExtractor.java
index d85af79..ed7e5d3 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/GeoExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/GeoExtractor.java
@@ -50,7 +50,8 @@ public class GeoExtractor extends EntityBasedMicroformatExtractor {
     }
 
     protected boolean extractEntity(Node node, ExtractionResult out) {
-        if (null == node) return false;
+        if (null == node)
+            return false;
         //try lat & lon
         final HTMLDocument document = new HTMLDocument(node);
         HTMLDocument.TextField latNode = document.getSingularTextField("latitude" );
@@ -59,13 +60,13 @@ public class GeoExtractor extends EntityBasedMicroformatExtractor {
         String lon = lonNode.value();
         if ("".equals(lat) || "".equals(lon)) {
             String[] both = document.getSingularUrlField("geo").value().split(";");
-            if (both.length != 2) return false;
+            if (both.length != 2)
+                return false;
             lat = both[0];
             lon = both[1];
         }
         BNode geo = getBlankNodeFor(node);
         out.writeTriple(geo, RDF.TYPE, vVCARD.Location);
-        final String extractorName = getDescription().getExtractorName();
         conditionallyAddStringProperty(
                 latNode.source(),
                 geo, vVCARD.latitude , lat

http://git-wip-us.apache.org/repos/asf/any23/blob/2ecfbff1/core/src/main/java/org/apache/any23/extractor/html/TagSoupParser.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/TagSoupParser.java b/core/src/main/java/org/apache/any23/extractor/html/TagSoupParser.java
index e6eb9cd..9ef72f4 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/TagSoupParser.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/TagSoupParser.java
@@ -25,8 +25,6 @@ import org.apache.xerces.xni.QName;
 import org.apache.xerces.xni.XMLAttributes;
 import org.apache.xerces.xni.XNIException;
 import org.cyberneko.html.parsers.DOMParser;
-import org.eclipse.rdf4j.model.IRI;
-import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.w3c.dom.Document;

http://git-wip-us.apache.org/repos/asf/any23/blob/2ecfbff1/core/src/main/java/org/apache/any23/extractor/xpath/XPathExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/xpath/XPathExtractor.java b/core/src/main/java/org/apache/any23/extractor/xpath/XPathExtractor.java
index b04533c..1fe1b02 100644
--- a/core/src/main/java/org/apache/any23/extractor/xpath/XPathExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/xpath/XPathExtractor.java
@@ -39,9 +39,10 @@ import java.util.List;
  */
 public class XPathExtractor implements Extractor.TagSoupDOMExtractor {
 
-    private final List<XPathExtractionRule> xPathExtractionRules = new ArrayList<XPathExtractionRule>();
+    private final List<XPathExtractionRule> xPathExtractionRules = new ArrayList<>();
 
     public XPathExtractor() {
+        //default constructor
     }
     
     public XPathExtractor(List<XPathExtractionRule> rules) {

http://git-wip-us.apache.org/repos/asf/any23/blob/2ecfbff1/core/src/main/java/org/apache/any23/extractor/yaml/YAMLExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/yaml/YAMLExtractor.java b/core/src/main/java/org/apache/any23/extractor/yaml/YAMLExtractor.java
index 64548f1..19bccd1 100644
--- a/core/src/main/java/org/apache/any23/extractor/yaml/YAMLExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/yaml/YAMLExtractor.java
@@ -17,8 +17,6 @@ package org.apache.any23.extractor.yaml;
 
 import java.io.IOException;
 import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -29,9 +27,7 @@ import org.apache.any23.extractor.ExtractionResult;
 import org.apache.any23.extractor.Extractor;
 import org.apache.any23.extractor.ExtractorDescription;
 import org.apache.any23.rdf.RDFUtils;
-import org.apache.any23.util.StringUtils;
 import org.apache.any23.vocab.YAML;
-import org.apache.commons.lang.WordUtils;
 import org.eclipse.rdf4j.model.Resource;
 import org.eclipse.rdf4j.model.IRI;
 import org.eclipse.rdf4j.model.Value;
@@ -64,10 +60,10 @@ public class YAMLExtractor implements Extractor.ContentExtractor {
     public void run(ExtractionParameters extractionParameters, ExtractionContext context, InputStream in,
             ExtractionResult out)
             throws IOException, ExtractionException {
-        IRI documentURI = context.getDocumentIRI();
-        documentRoot = RDFUtils.uri(documentURI.toString() + "root");
+        IRI documentIRI = context.getDocumentIRI();
+        documentRoot = RDFUtils.iri(documentIRI.toString() + "root");
 
-        log.debug("process: {}", documentURI.toString());
+        log.debug("Processing: {}", documentIRI.toString());
         out.writeNamespace(vocab.PREFIX, vocab.NS);
         out.writeNamespace(RDF.PREFIX, RDF.NAMESPACE);
         out.writeNamespace(RDFS.PREFIX, RDFS.NAMESPACE);
@@ -77,10 +73,10 @@ public class YAMLExtractor implements Extractor.ContentExtractor {
 
         // Iterate over page(s)
         for (Object p : docIterate) {
-            Resource pageNode = YAMLExtractor.this.makeUri("document", documentURI);
+            Resource pageNode = RDFUtils.makeIRI("document", documentIRI, true);
             out.writeTriple(documentRoot, vocab.contains, pageNode);
             out.writeTriple(pageNode, RDF.TYPE, vocab.document);
-            out.writeTriple(pageNode, vocab.contains, buildNode(documentURI, p, out));
+            out.writeTriple(pageNode, vocab.contains, buildNode(documentIRI, p, out));
         }
 
     }
@@ -99,9 +95,9 @@ public class YAMLExtractor implements Extractor.ContentExtractor {
         if (treeData == null) {
             return RDF.NIL;
         } else if (treeData instanceof Map) {
-            return processMap(fileURI, (Map) treeData, out);
+            return processMap(fileURI, (Map<String, Object>) treeData, out);
         } else if (treeData instanceof List) {
-            return processList(fileURI, (List) treeData, out);
+            return processList(fileURI, (List<?>) treeData, out);
         } else if (treeData instanceof Long) {
             return RDFUtils.literal(((Long) treeData));
         } else if (treeData instanceof Integer) {
@@ -120,9 +116,9 @@ public class YAMLExtractor implements Extractor.ContentExtractor {
     }
 
     private Value processMap(IRI file, Map<String, Object> node, ExtractionResult out) {
-        Resource nodeURI = YAMLExtractor.this.makeUri(file);
+        Resource nodeURI = RDFUtils.makeIRI(file);
         for (String k : node.keySet()) {
-            Resource predicate = makeUri(k, file, false);
+            Resource predicate = RDFUtils.makeIRI(k, file, true);
             Value value = buildNode(file, node.get(k), out);
             out.writeTriple(nodeURI, RDF.TYPE, vocab.node);
             out.writeTriple(nodeURI, (IRI) predicate, value);
@@ -132,13 +128,13 @@ public class YAMLExtractor implements Extractor.ContentExtractor {
         return nodeURI;
     }
 
-    private Value processList(IRI fileURI, Iterable iter, ExtractionResult out) {
+    private Value processList(IRI fileURI, Iterable<?> iter, ExtractionResult out) {
         Resource node = YAMLExtractor.this.makeUri();
         out.writeTriple(node, RDF.TYPE, RDF.LIST);
 
         Resource pList = null; // previous RDF iter node
         Resource cList = node; // cutternt RDF iter node
-        Iterator listIter = iter.iterator();
+        Iterator<?> listIter = iter.iterator();
         while (listIter.hasNext()) {
             // If previous RDF iter node is given lint with current one
             if (pList != null) {
@@ -161,36 +157,4 @@ public class YAMLExtractor implements Extractor.ContentExtractor {
         nodeId++;
         return bnode;
     }
-
-    private Resource makeUri(IRI docUri) {
-        return makeUri("node", docUri);
-    }
-
-    private Resource makeUri(String type, IRI docUri) {
-        return makeUri(type, docUri, true);
-    }
-
-    private Resource makeUri(String type, IRI docUri, boolean addId) {
-
-        // preprocess string: converts - -> _
-        //                    converts <space>: word1 word2 -> word1Word2
-        String newType = StringUtils.implementJavaNaming(type);
-
-        String uriString;
-        if (docUri.toString().endsWith("/")) {
-            uriString = docUri.toString() + newType;
-        } else {
-            uriString = docUri.toString() + "#" + newType;
-        }
-
-        if (addId) {
-            uriString = uriString + "_" + Integer.toString(nodeId);
-        }
-
-        Resource node = RDFUtils.uri(uriString);
-        if (addId) {
-            nodeId++;
-        }
-        return node;
-    }
 }

http://git-wip-us.apache.org/repos/asf/any23/blob/2ecfbff1/core/src/main/java/org/apache/any23/rdf/RDFUtils.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/rdf/RDFUtils.java b/core/src/main/java/org/apache/any23/rdf/RDFUtils.java
index bbfe5ec..f6e3a8c 100644
--- a/core/src/main/java/org/apache/any23/rdf/RDFUtils.java
+++ b/core/src/main/java/org/apache/any23/rdf/RDFUtils.java
@@ -18,7 +18,9 @@
 package org.apache.any23.rdf;
 
 import org.apache.any23.util.MathUtils;
+import org.apache.any23.util.StringUtils;
 import org.eclipse.rdf4j.model.BNode;
+import org.eclipse.rdf4j.model.IRI;
 import org.eclipse.rdf4j.model.Literal;
 import org.eclipse.rdf4j.model.Resource;
 import org.eclipse.rdf4j.model.Statement;
@@ -60,6 +62,8 @@ import java.util.Optional;
  */
 public class RDFUtils {
 
+    private static int nodeId = 0;
+
     private static final ValueFactory valueFactory = SimpleValueFactory.getInstance();
 
     /**
@@ -71,7 +75,8 @@ public class RDFUtils {
      */
     public static String fixAbsoluteIRI(String uri) {
         String fixed = fixIRIWithException(uri);
-        if (!fixed.matches("[a-zA-Z0-9]+:/.*")) throw new IllegalArgumentException("not a absolute org.eclipse.rdf4j.model.IRI: " + uri);
+        if (!fixed.matches("[a-zA-Z0-9]+:/.*"))
+            throw new IllegalArgumentException("not a absolute org.eclipse.rdf4j.model.IRI: " + uri);
         // Add trailing slash if org.eclipse.rdf4j.model.IRI has only authority but no path.
         if (fixed.matches("https?://[a-zA-Z0-9.-]+(:[0-9+])?")) {
             fixed = fixed + "/";
@@ -129,7 +134,8 @@ public class RDFUtils {
      * @return the unescaped string.
      */
     public static String fixIRIWithException(String unescapedIRI) {
-        if (unescapedIRI == null) throw new IllegalArgumentException("org.eclipse.rdf4j.model.IRI was null");
+        if (unescapedIRI == null)
+            throw new IllegalArgumentException("org.eclipse.rdf4j.model.IRI was null");
 
         //    Remove starting and ending whitespace
         String escapedIRI = unescapedIRI.trim();
@@ -141,7 +147,8 @@ public class RDFUtils {
         escapedIRI = escapedIRI.replaceAll("\n", "");
 
         //'Remove starting  "\" or '"'
-        if (escapedIRI.startsWith("\\") || escapedIRI.startsWith("\"")) escapedIRI = escapedIRI.substring(1);
+        if (escapedIRI.startsWith("\\") || escapedIRI.startsWith("\""))
+            escapedIRI = escapedIRI.substring(1);
         //Remove  ending   "\" or '"'
         if (escapedIRI.endsWith("\\") || escapedIRI.endsWith("\""))
             escapedIRI = escapedIRI.substring(0, escapedIRI.length() - 1);
@@ -406,7 +413,8 @@ public class RDFUtils {
      * @return a value instance.
      */
     public static Value toValue(String s) {
-        if ("a".equals(s)) return RDF.TYPE;
+        if ("a".equals(s))
+            return RDF.TYPE;
         if (s.matches("[a-z0-9]+:.*")) {
             return PopularPrefixes.get().expand(s);
         }
@@ -466,7 +474,8 @@ public class RDFUtils {
      * @throws IllegalArgumentException if no extension matches.
      */
     public static Optional<RDFFormat> getFormatByExtension(String ext) {
-        if( ! ext.startsWith(".") ) ext = "." + ext;
+        if( ! ext.startsWith(".") )
+            ext = "." + ext;
         return Rio.getParserFormatForFileName(ext);
     }
 
@@ -564,6 +573,37 @@ public class RDFUtils {
         }
     }
 
+    public static Resource makeIRI(IRI docUri) {
+        return makeIRI("node", docUri);
+    }
+
+    public static Resource makeIRI(String type, IRI docIRI) {
+        return makeIRI(type, docIRI, false);
+    }
+
+    public static Resource makeIRI(String type, IRI docIRI, boolean addId) {
+
+        // preprocess string: converts - -> _
+        //                    converts <space>: word1 word2 -> word1Word2
+        String newType = StringUtils.implementJavaNaming(type);
+
+        String iriString;
+        if (docIRI.toString().endsWith("/")) {
+            iriString = docIRI.toString() + newType;
+        } else {
+            iriString = docIRI.toString() + "#" + newType;
+        }
+
+        if (addId) {
+            iriString = iriString + "_" + Integer.toString(nodeId);
+        }
+
+        Resource node = RDFUtils.iri(iriString);
+        if (addId) {
+            nodeId++;
+        }
+        return node;
+    }
     private RDFUtils() {}
 
 }

http://git-wip-us.apache.org/repos/asf/any23/blob/2ecfbff1/core/src/main/java/org/apache/any23/util/StreamUtils.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/util/StreamUtils.java b/core/src/main/java/org/apache/any23/util/StreamUtils.java
index 2022f0e..a456655 100644
--- a/core/src/main/java/org/apache/any23/util/StreamUtils.java
+++ b/core/src/main/java/org/apache/any23/util/StreamUtils.java
@@ -17,10 +17,17 @@
 
 package org.apache.any23.util;
 
+import org.apache.commons.io.ByteOrderMark;
+import org.apache.commons.io.input.BOMInputStream;
+import org.apache.xerces.impl.io.MalformedByteSequenceException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import org.w3c.dom.Document;
+import org.xml.sax.SAXException;
 
 import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
 import java.io.Closeable;
 import java.io.IOException;
 import java.io.InputStream;
@@ -28,6 +35,18 @@ import java.io.InputStreamReader;
 import java.util.ArrayList;
 import java.util.List;
 
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.transform.Result;
+import javax.xml.transform.Source;
+import javax.xml.transform.TransformerConfigurationException;
+import javax.xml.transform.TransformerException;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.TransformerFactoryConfigurationError;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+
 /**
  * Contains general utility functions for handling streams.
  *
@@ -93,9 +112,9 @@ public class StreamUtils {
      * @return the string content.
      * @throws IOException if an error occurs while consuming the <code>is</code> stream.
      */
-     public static String asString(InputStream is) throws IOException {
-         return asString(is, false);
-     }
+    public static String asString(InputStream is) throws IOException {
+        return asString(is, false);
+    }
 
     /**
      * Closes the closable interface and reports error if any.
@@ -112,4 +131,48 @@ public class StreamUtils {
         }
     }
 
+    /**
+     * Converts a {@link org.w3c.dom.Document} to an
+     * {@link java.io.InputStream}
+     * @throws TransformerFactoryConfigurationError 
+     * @throws TransformerConfigurationException 
+     */
+    public static InputStream documentToInputStream(Document doc) 
+            throws TransformerConfigurationException, TransformerFactoryConfigurationError {
+        ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
+        Source xmlSource = new DOMSource(doc);
+        Result outputTarget = new StreamResult(outputStream);
+        try {
+            TransformerFactory.newInstance().newTransformer().transform(xmlSource, outputTarget);
+        } catch (TransformerException e) {
+            logger.error("Error during transformation: {}", e);
+        }
+        return new ByteArrayInputStream(outputStream.toByteArray());
+    }
+
+    public static Document inputStreamToDocument(InputStream is) throws MalformedByteSequenceException {
+        DocumentBuilderFactory factory = null;
+        DocumentBuilder builder = null;
+        Document doc = null;
+
+        try {
+            factory = DocumentBuilderFactory.newInstance();
+            builder = factory.newDocumentBuilder();
+        } catch (ParserConfigurationException e) {
+            logger.error("Error converting InputStream to Document: {}", e);
+        }
+
+        try {
+            BOMInputStream bomIn = new BOMInputStream(is, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE,
+                    ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_32BE, ByteOrderMark.UTF_32LE);
+            if (bomIn.hasBOM()) {
+                @SuppressWarnings("unused")
+                int firstNonBOMByte = bomIn.read(); // Skips BOM
+            }
+            doc = builder.parse(bomIn);
+        } catch (SAXException | IOException e) {
+            logger.error("Error converting InputStream to Document: {}", e);
+        }
+        return doc;
+    }
 }

http://git-wip-us.apache.org/repos/asf/any23/blob/2ecfbff1/core/src/test/java/org/apache/any23/extractor/yaml/YAMLExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/yaml/YAMLExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/yaml/YAMLExtractorTest.java
index 0cf8d14..f2c85ba 100644
--- a/core/src/test/java/org/apache/any23/extractor/yaml/YAMLExtractorTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/yaml/YAMLExtractorTest.java
@@ -27,7 +27,6 @@ import org.eclipse.rdf4j.model.Statement;
 import org.eclipse.rdf4j.model.vocabulary.RDF;
 import org.eclipse.rdf4j.model.vocabulary.RDFS;
 import org.eclipse.rdf4j.repository.RepositoryResult;
-import org.semarglproject.vocab.XSD;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 

http://git-wip-us.apache.org/repos/asf/any23/blob/2ecfbff1/openie/pom.xml
----------------------------------------------------------------------
diff --git a/openie/pom.xml b/openie/pom.xml
new file mode 100644
index 0000000..799684d
--- /dev/null
+++ b/openie/pom.xml
@@ -0,0 +1,154 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <artifactId>apache-any23</artifactId>
+    <groupId>org.apache.any23</groupId>
+    <version>2.1-SNAPSHOT</version>
+    <relativePath></relativePath>
+  </parent>
+
+  <repositories>
+    <repository>
+      <snapshots>
+        <enabled>false</enabled>
+      </snapshots>
+      <id>bintray-allenai-maven</id>
+      <name>bintray</name>
+      <url>http://allenai.bintray.com/maven</url>
+    </repository>
+  </repositories>
+  <pluginRepositories>
+    <pluginRepository>
+      <snapshots>
+        <enabled>false</enabled>
+      </snapshots>
+      <id>bintray-allenai-maven</id>
+      <name>bintray-plugins</name>
+      <url>http://allenai.bintray.com/maven</url>
+    </pluginRepository>
+  </pluginRepositories>
+
+  <artifactId>apache-any23-openie</artifactId>
+
+  <name>Apache Any23 :: OpenIE</name>
+  <description>Open Information Extraction module.</description>
+
+  <dependencies>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>apache-any23-core</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>apache-any23-test-resources</artifactId>
+      <version>${project.version}</version>
+      <scope>test</scope>
+      <type>test-jar</type>
+    </dependency>
+    <dependency>
+      <groupId>org.allenai.openie</groupId>
+      <artifactId>openie_2.11</artifactId>
+      <version>4.2.6</version>
+      <scope>compile</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.allenai.openie</groupId>
+      <artifactId>openie_2.11</artifactId>
+      <version>4.2.6</version>
+      <scope>compile</scope>
+      <type>pom</type>
+    </dependency>
+    <dependency>
+      <groupId>edu.washington.cs.knowitall</groupId>
+      <artifactId>openregex</artifactId>
+      <version>1.1.1</version>
+      <scope>runtime</scope>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <resources>
+      <resource>
+        <directory>${basedir}/../</directory>
+        <targetPath>META-INF</targetPath>
+        <includes>
+          <include>LICENSE.txt</include>
+          <include>NOTICE.txt</include>
+        </includes>
+      </resource>
+    </resources>
+    <pluginManagement>
+      <plugins>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-assembly-plugin</artifactId>
+          <version>${maven-assembly-plugin.version}</version>
+          <executions>
+            <execution>
+              <id>assembly</id>
+              <phase>package</phase>
+              <goals>
+                <goal>single</goal>
+              </goals>
+            </execution>
+          </executions>
+          <configuration>
+            <attach>true</attach>
+            <skipAssembly>true</skipAssembly>
+            <tarLongFileMode>gnu</tarLongFileMode>
+          </configuration>
+        </plugin>
+      </plugins>
+    </pluginManagement>
+  </build>
+
+  <profiles>
+    <profile>
+      <id>release</id>
+      <build>
+        <resources>
+          <resource>
+            <directory>${basedir}/../</directory>
+            <targetPath>${project.build.directory}/apidocs/META-INF</targetPath>
+            <includes>
+              <include>LICENSE.txt</include>
+              <include>NOTICE.txt</include>
+            </includes>
+          </resource>
+        </resources>
+      </build>
+    </profile>
+
+  </profiles>
+
+</project>

http://git-wip-us.apache.org/repos/asf/any23/blob/2ecfbff1/openie/src/main/java/org/apache/any23/openie/OpenIEExtractor.java
----------------------------------------------------------------------
diff --git a/openie/src/main/java/org/apache/any23/openie/OpenIEExtractor.java b/openie/src/main/java/org/apache/any23/openie/OpenIEExtractor.java
new file mode 100644
index 0000000..b8fda29
--- /dev/null
+++ b/openie/src/main/java/org/apache/any23/openie/OpenIEExtractor.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.any23.openie;
+
+import java.io.IOException;
+import java.util.List;
+
+import javax.xml.transform.TransformerConfigurationException;
+import javax.xml.transform.TransformerFactoryConfigurationError;
+
+import org.apache.any23.extractor.Extractor;
+import org.apache.any23.configuration.Configuration;
+import org.apache.any23.configuration.DefaultConfiguration;
+import org.apache.any23.extractor.ExtractionContext;
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.rdf.RDFUtils;
+import org.apache.any23.util.StreamUtils;
+import org.apache.tika.Tika;
+import org.apache.tika.exception.TikaException;
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.Resource;
+import org.eclipse.rdf4j.model.Value;
+import org.eclipse.rdf4j.model.vocabulary.RDF;
+import org.eclipse.rdf4j.model.vocabulary.RDFS;
+import org.apache.any23.extractor.ExtractionException;
+import org.apache.any23.extractor.ExtractionParameters;
+import org.apache.any23.extractor.ExtractionResult;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.w3c.dom.Document;
+
+import edu.knowitall.openie.Argument;
+import edu.knowitall.openie.Instance;
+import edu.knowitall.openie.OpenIE;
+import edu.knowitall.tool.parse.ClearParser;
+import edu.knowitall.tool.postag.ClearPostagger;
+import edu.knowitall.tool.srl.ClearSrl;
+import edu.knowitall.tool.tokenize.ClearTokenizer;
+import scala.collection.JavaConversions;
+import scala.collection.Seq;
+
+/**
+ * An <a href="https://github.com/allenai/openie-standalone">OpenIE</a> 
+ * extractor able to generate <i>RDF</i> statements from 
+ * sentences representing relations in the text.
+ */
+public class OpenIEExtractor implements Extractor.TagSoupDOMExtractor {
+
+    private static final Logger LOG = LoggerFactory.getLogger(OpenIEExtractor.class);
+
+    private IRI documentRoot;
+
+    /**
+     * default constructor
+     */
+    OpenIEExtractor() {
+        // default constructor
+    }
+
+    /**
+     * @see org.apache.any23.extractor.Extractor#getDescription()
+     */
+    @Override
+    public ExtractorDescription getDescription() {
+        return OpenIEExtractorFactory.getDescriptionInstance();
+    }
+
+    @Override
+    public void run(ExtractionParameters extractionParameters,
+            ExtractionContext context, Document in, ExtractionResult out)
+                    throws IOException, ExtractionException {
+
+        IRI documentIRI = context.getDocumentIRI();
+        documentRoot = RDFUtils.iri(documentIRI.toString() + "root");
+        out.writeNamespace(RDF.PREFIX, RDF.NAMESPACE);
+        out.writeNamespace(RDFS.PREFIX, RDFS.NAMESPACE);
+        LOG.debug("Processing: {}", documentIRI.toString());
+
+        OpenIE openIE = new OpenIE(
+                new ClearParser(
+                        new ClearPostagger(
+                                new ClearTokenizer())), new ClearSrl(), false, false);
+
+        Seq<Instance> extractions = null;
+        Tika tika = new Tika();
+        try {
+            extractions = openIE.extract(tika.parseToString(StreamUtils.documentToInputStream(in)));
+        } catch (TransformerConfigurationException | TransformerFactoryConfigurationError e) {
+            LOG.error("Encountered error during OpenIE extraction.", e);
+        } catch (TikaException e) {
+            LOG.error("Encountered error whilst parsing InputStream with Tika.", e);
+        }
+
+        List<Instance> listExtractions = JavaConversions.seqAsJavaList(extractions);
+        // for each extraction instance we can obtain a number of extraction elements
+        // instance.confidence() - a confidence value for the extraction itself
+        // instance.extr().context() - an optional representation of the context for this extraction
+        // instance.extr().arg1().text() - subject
+        // instance.extr().rel().text() - predicate
+        // instance.extr().arg2s().text() - object
+        for(Instance instance : listExtractions) {
+            final Configuration immutableConf = DefaultConfiguration.singleton();
+            if (instance.confidence() > Double.parseDouble(immutableConf.getProperty("any23.extraction.openie.confidence.threshold", "0.5"))) {
+                List<Argument> listArg2s = JavaConversions.seqAsJavaList(instance.extr().arg2s());
+                for(Argument argument : listArg2s) {
+                    Resource subject = RDFUtils.makeIRI(instance.extr().arg1().text(), documentIRI);
+                    IRI predicate = (IRI) RDFUtils.makeIRI(instance.extr().rel().text(), documentIRI);
+                    Value object = RDFUtils.toValue(argument.text());
+                    out.writeTriple(subject, predicate, object);
+                }
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/2ecfbff1/openie/src/main/java/org/apache/any23/openie/OpenIEExtractorFactory.java
----------------------------------------------------------------------
diff --git a/openie/src/main/java/org/apache/any23/openie/OpenIEExtractorFactory.java b/openie/src/main/java/org/apache/any23/openie/OpenIEExtractorFactory.java
new file mode 100644
index 0000000..4a1696a
--- /dev/null
+++ b/openie/src/main/java/org/apache/any23/openie/OpenIEExtractorFactory.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.any23.openie;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.Prefixes;
+
+/**
+ * @author lewismc
+ *
+ */
+public class OpenIEExtractorFactory extends SimpleExtractorFactory<OpenIEExtractor>
+    implements ExtractorFactory<OpenIEExtractor> {
+
+    public static final String NAME = "openie";
+
+    public static final Prefixes prefixes = null;
+
+    private static final ExtractorDescription descriptionInstance = new OpenIEExtractorFactory();
+
+    public OpenIEExtractorFactory() {
+        super(NAME, prefixes, Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"), "example-openie.html");
+    }
+
+    @Override
+    public OpenIEExtractor createExtractor() {
+        return new OpenIEExtractor();
+    }
+
+    public static ExtractorDescription getDescriptionInstance() {
+        return descriptionInstance;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/2ecfbff1/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java
----------------------------------------------------------------------
diff --git a/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java b/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java
new file mode 100644
index 0000000..3561bdd
--- /dev/null
+++ b/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.any23.openie;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+
+import org.apache.any23.extractor.ExtractionContext;
+import org.apache.any23.extractor.ExtractionException;
+import org.apache.any23.extractor.ExtractionParameters;
+import org.apache.any23.extractor.ExtractionResult;
+import org.apache.any23.extractor.ExtractionResultImpl;
+import org.apache.any23.rdf.RDFUtils;
+import org.apache.any23.util.StreamUtils;
+import org.apache.any23.writer.RDFXMLWriter;
+import org.apache.any23.writer.TripleHandler;
+import org.apache.any23.writer.TripleHandlerException;
+import org.eclipse.rdf4j.model.IRI;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * @author lewismc
+ *
+ */
+public class OpenIEExtractorTest {
+
+    private static final Logger logger = LoggerFactory.getLogger(OpenIEExtractorTest.class);
+
+    private OpenIEExtractor extractor;
+
+    @Before
+    public void setUp() throws Exception {
+        extractor = new OpenIEExtractor();
+    }
+
+    @After
+    public void tearDown() throws Exception {
+        extractor = null;
+    }
+
+    //@Ignore("This typically results in a JVM crash... disabled for the time being.")
+    @Test
+    public void testExtractFromHTMLDocument() 
+      throws IOException, ExtractionException, TripleHandlerException {
+        final IRI uri = RDFUtils.iri("http://podaac.jpl.nasa.gov/aquarius");
+        extract(uri, "/org/apache/any23/extractor/openie/example-openie.html");
+    }
+    
+    public void extract(IRI uri, String filePath) 
+      throws IOException, ExtractionException, TripleHandlerException {
+      ByteArrayOutputStream baos = new ByteArrayOutputStream();
+      final TripleHandler tHandler = new RDFXMLWriter(baos);
+      final ExtractionContext extractionContext = new ExtractionContext("rdf-openie", uri);
+      final ExtractionResult result = new ExtractionResultImpl(extractionContext, extractor, tHandler);
+      try {
+        extractor.run(
+                ExtractionParameters.newDefault(),
+                extractionContext,
+                StreamUtils.inputStreamToDocument(this.getClass().getResourceAsStream(filePath)),
+                result
+        );
+      } finally {
+        logger.debug(baos.toString());
+        tHandler.close();
+        result.close();
+      }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/2ecfbff1/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 23ab57f..fffc7b5 100644
--- a/pom.xml
+++ b/pom.xml
@@ -204,6 +204,7 @@
     <module>encoding</module>
     <module>core</module>
     <module>cli</module>
+    <module>openie</module>
     <module>plugins/basic-crawler</module>
     <module>plugins/html-scraper</module>
     <module>plugins/office-scraper</module>


[12/15] any23 git commit: ANY23-304 increase number of extractors found

Posted by le...@apache.org.
ANY23-304 increase number of extractors found


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/6d5c39e5
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/6d5c39e5
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/6d5c39e5

Branch: refs/heads/master
Commit: 6d5c39e57b5e8a4dd29da27e3137c396dd1ffbd9
Parents: d4008bc
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Wed Jul 26 14:19:37 2017 -0700
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Wed Jul 26 14:19:37 2017 -0700

----------------------------------------------------------------------
 .../src/test/java/org/apache/any23/plugin/PluginIT.java            | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/6d5c39e5/plugins/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java
----------------------------------------------------------------------
diff --git a/plugins/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java b/plugins/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java
index 4347c8b..1abeb2b 100644
--- a/plugins/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java
+++ b/plugins/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java
@@ -41,7 +41,7 @@ import static org.junit.Assert.assertTrue;
  */
 public class PluginIT {
 
-    private static final int NUM_OF_EXTRACTORS = 32;
+    private static final int NUM_OF_EXTRACTORS = 34;
 
     private static final String PLUGIN_DIR = "target/plugins-build/";
 


[08/15] any23 git commit: Fix CLassLoading issues and test issues introduced with ANY23-274

Posted by le...@apache.org.
Fix CLassLoading issues and test issues introduced with ANY23-274


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/89d1d85e
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/89d1d85e
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/89d1d85e

Branch: refs/heads/master
Commit: 89d1d85e263ef91f4c696a56a87eb15c2892349b
Parents: 1bb96c4
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Mon Feb 27 20:56:29 2017 -0800
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Mon Feb 27 20:56:29 2017 -0800

----------------------------------------------------------------------
 .../apache/any23/extractor/ExtractorGroup.java  | 14 +++++----
 cli/pom.xml                                     | 28 ++++++++++++++----
 .../org/apache/any23/cli/PluginVerifier.java    |  8 +++---
 .../main/java/org/apache/any23/cli/Rover.java   | 10 ++++---
 .../java/org/apache/any23/cli/ToolRunner.java   |  7 +++--
 .../any23/extractor/ExtractorRegistryImpl.java  | 30 ++------------------
 .../any23/extractor/ExtractionAPITest.java      |  4 +--
 .../extractor/ExtractionResultImplTest.java     |  2 +-
 openie/pom.xml                                  | 10 -------
 plugins/basic-crawler/pom.xml                   | 19 -------------
 plugins/html-scraper/pom.xml                    | 19 -------------
 plugins/integration-test/pom.xml                | 16 ++++-------
 .../java/org/apache/any23/plugin/PluginIT.java  | 22 +++++++-------
 plugins/office-scraper/pom.xml                  | 19 -------------
 pom.xml                                         | 18 ++++++++++--
 15 files changed, 83 insertions(+), 143 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/89d1d85e/api/src/main/java/org/apache/any23/extractor/ExtractorGroup.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/org/apache/any23/extractor/ExtractorGroup.java b/api/src/main/java/org/apache/any23/extractor/ExtractorGroup.java
index 4eeaa12..9242ea6 100644
--- a/api/src/main/java/org/apache/any23/extractor/ExtractorGroup.java
+++ b/api/src/main/java/org/apache/any23/extractor/ExtractorGroup.java
@@ -52,7 +52,7 @@ public class ExtractorGroup implements Iterable<ExtractorFactory<?>> {
      */
     public ExtractorGroup filterByMIMEType(MIMEType mimeType) {
         // @@@ wildcards, q values
-        Collection<ExtractorFactory<?>> matching = new ArrayList<ExtractorFactory<?>>();
+        Collection<ExtractorFactory<?>> matching = new ArrayList<>();
         for (ExtractorFactory<?> factory : factories) {
             if (supportsAllContentTypes(factory) || supports(factory, mimeType)) {
                 matching.add(factory);
@@ -71,7 +71,8 @@ public class ExtractorGroup implements Iterable<ExtractorFactory<?>> {
      */
     public boolean allExtractorsSupportAllContentTypes() {
         for (ExtractorFactory<?> factory : factories) {
-            if (!supportsAllContentTypes(factory)) return false;
+            if (!supportsAllContentTypes(factory))
+                return false;
         }
         return true;
     }
@@ -82,9 +83,12 @@ public class ExtractorGroup implements Iterable<ExtractorFactory<?>> {
 
     private boolean supports(ExtractorFactory<?> factory, MIMEType mimeType) {
         for (MIMEType supported : factory.getSupportedMIMETypes()) {
-            if (supported.isAnyMajorType()) return true;
-            if (supported.isAnySubtype() && supported.getMajorType().equals(mimeType.getMajorType())) return true;
-            if (supported.getFullType().equals(mimeType.getFullType())) return true;
+            if (supported.isAnyMajorType())
+                return true;
+            if (supported.isAnySubtype() && supported.getMajorType().equals(mimeType.getMajorType()))
+                return true;
+            if (supported.getFullType().equals(mimeType.getFullType()))
+                return true;
         }
         return false;
     }

http://git-wip-us.apache.org/repos/asf/any23/blob/89d1d85e/cli/pom.xml
----------------------------------------------------------------------
diff --git a/cli/pom.xml b/cli/pom.xml
index 6af107e..3f183ae 100644
--- a/cli/pom.xml
+++ b/cli/pom.xml
@@ -33,11 +33,6 @@
   <dependencies>
     <dependency>
       <groupId>${project.groupId}</groupId>
-      <artifactId>apache-any23-openie</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>${project.groupId}</groupId>
       <artifactId>apache-any23-api</artifactId>
       <version>${project.version}</version>
     </dependency>
@@ -76,6 +71,26 @@
       <scope>test</scope>
     </dependency>
     <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>apache-any23-openie</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <!-- dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>apache-any23-basic-crawler</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>apache-any23-office-scraper</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>apache-any23-html-scraper</artifactId>
+      <version>${project.version}</version>
+    </dependency-->
+    <dependency>
       <groupId>commons-lang</groupId>
       <artifactId>commons-lang</artifactId>
     </dependency>
@@ -217,6 +232,9 @@
             <program>
               <mainClass>org.apache.any23.cli.ToolRunner</mainClass>
               <name>any23</name>
+              <jvmSettings>
+                <maxMemorySize>6000m</maxMemorySize>
+              </jvmSettings>
             </program>
           </programs>
           <configurationDirectory>conf</configurationDirectory>

http://git-wip-us.apache.org/repos/asf/any23/blob/89d1d85e/cli/src/main/java/org/apache/any23/cli/PluginVerifier.java
----------------------------------------------------------------------
diff --git a/cli/src/main/java/org/apache/any23/cli/PluginVerifier.java b/cli/src/main/java/org/apache/any23/cli/PluginVerifier.java
index a747b49..f3f3234 100644
--- a/cli/src/main/java/org/apache/any23/cli/PluginVerifier.java
+++ b/cli/src/main/java/org/apache/any23/cli/PluginVerifier.java
@@ -46,7 +46,7 @@ public class PluginVerifier implements Tool {
         description = "plugins-dir",
         converter = FileConverter.class
     )
-    private List<File> pluginsDirs = new LinkedList<File>();
+    private List<File> pluginsDirs = new LinkedList<>();
 
     public void run() throws Exception {
         if (pluginsDirs.isEmpty()) {
@@ -78,9 +78,9 @@ public class PluginVerifier implements Tool {
 
     private void printPluginData(ExtractorFactory extractorFactory, PrintStream ps) {
         final Author authorAnnotation = extractorFactory.getClass().getAnnotation(Author.class);
-        ps.printf("Plugin author    : %s\n", authorAnnotation == null ? "<unknown>" : authorAnnotation.name());
-        ps.printf("Plugin factory   : %s\n", extractorFactory.getClass());
-        ps.printf("Plugin mime-types: %s\n", getMimeTypesStr( extractorFactory.getSupportedMIMETypes() ));
+        ps.printf("Plugin author    : %s%n", authorAnnotation == null ? "<unknown>" : authorAnnotation.name());
+        ps.printf("Plugin factory   : %s%n", extractorFactory.getClass());
+        ps.printf("Plugin mime-types: %s%n", getMimeTypesStr( extractorFactory.getSupportedMIMETypes() ));
     }
 
 }

http://git-wip-us.apache.org/repos/asf/any23/blob/89d1d85e/cli/src/main/java/org/apache/any23/cli/Rover.java
----------------------------------------------------------------------
diff --git a/cli/src/main/java/org/apache/any23/cli/Rover.java b/cli/src/main/java/org/apache/any23/cli/Rover.java
index 26a8663..962e10c 100644
--- a/cli/src/main/java/org/apache/any23/cli/Rover.java
+++ b/cli/src/main/java/org/apache/any23/cli/Rover.java
@@ -75,10 +75,10 @@ public class Rover implements Tool {
     private PrintStream outputStream = System.out;
 
     @Parameter(description = "input IRIs {<url>|<file>}+", converter = ArgumentToIRIConverter.class)
-    protected List<String> inputIRIs = new LinkedList<String>();
+    protected List<String> inputIRIs = new LinkedList<>();
 
     @Parameter(names = { "-e", "--extractors" }, description = "a comma-separated list of extractors, e.g. rdf-xml,rdf-turtle")
-    private List<String> extractors = new LinkedList<String>();
+    private List<String> extractors = new LinkedList<>();
 
     @Parameter(names = { "-f", "--format" }, description = "the output format")
     private String format = FORMATS.get(DEFAULT_FORMAT_INDEX);
@@ -169,8 +169,10 @@ public class Rover implements Tool {
 
     protected String printReports() {
         final StringBuilder sb = new StringBuilder();
-        if (benchmarkTripleHandler != null) sb.append( benchmarkTripleHandler.report() ).append('\n');
-        if (reportingTripleHandler != null) sb.append( reportingTripleHandler.printReport() ).append('\n');
+        if (benchmarkTripleHandler != null)
+            sb.append( benchmarkTripleHandler.report() ).append('\n');
+        if (reportingTripleHandler != null)
+            sb.append( reportingTripleHandler.printReport() ).append('\n');
         return sb.toString();
     }
 

http://git-wip-us.apache.org/repos/asf/any23/blob/89d1d85e/cli/src/main/java/org/apache/any23/cli/ToolRunner.java
----------------------------------------------------------------------
diff --git a/cli/src/main/java/org/apache/any23/cli/ToolRunner.java b/cli/src/main/java/org/apache/any23/cli/ToolRunner.java
index 90daeb3..b875ec7 100644
--- a/cli/src/main/java/org/apache/any23/cli/ToolRunner.java
+++ b/cli/src/main/java/org/apache/any23/cli/ToolRunner.java
@@ -223,7 +223,7 @@ public final class ToolRunner {
             return "z/os";
         } else if (osName.contains("os/400")) {
             return "os/400";
-        } else if (pathSep.equals( ";" )) {
+        } else if (";".equals(pathSep)) {
             return "dos";
         } else if (osName.contains("mac")) {
             if (osName.endsWith("x")) {
@@ -234,7 +234,7 @@ public final class ToolRunner {
             return "tandem";
         } else if (osName.contains("openvms")) {
             return "openvms";
-        } else if (pathSep.equals(":")) {
+        } else if (":".equals(pathSep)) {
             return "unix";
         }
 
@@ -248,7 +248,8 @@ public final class ToolRunner {
                 optionIndex = i;
             }
         }
-        if(optionIndex == -1) return null;
+        if(optionIndex == -1)
+            return null;
 
         if(optionIndex == args.length - 1) {
             throw new IllegalArgumentException("Missing argument for --plugins-dir option.");

http://git-wip-us.apache.org/repos/asf/any23/blob/89d1d85e/core/src/main/java/org/apache/any23/extractor/ExtractorRegistryImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/ExtractorRegistryImpl.java b/core/src/main/java/org/apache/any23/extractor/ExtractorRegistryImpl.java
index 736e6bb..86dc982 100644
--- a/core/src/main/java/org/apache/any23/extractor/ExtractorRegistryImpl.java
+++ b/core/src/main/java/org/apache/any23/extractor/ExtractorRegistryImpl.java
@@ -53,29 +53,6 @@ public class ExtractorRegistryImpl extends org.eclipse.rdf4j.common.lang.service
             final DefaultConfiguration conf = DefaultConfiguration.singleton();
             if (instance == null) {
                 instance = new ExtractorRegistryImpl();
-                // FIXME: Remove these hardcoded links to the extractor factories by turning them into SPI interfaces
-                //instance.register(RDFXMLExtractor.factory);
-                //instance.register(TurtleExtractor.factory);
-                //instance.register(NTriplesExtractor.factory);
-                //instance.register(NQuadsExtractor.factory);
-                //instance.register(TriXExtractor.factory);
-                //instance.register(HeadLinkExtractor.factory);
-                //instance.register(LicenseExtractor.factory);
-                //instance.register(TitleExtractor.factory);
-                //instance.register(XFNExtractor.factory);
-                //instance.register(ICBMExtractor.factory);
-                //instance.register(AdrExtractor.factory);
-                //instance.register(GeoExtractor.factory);
-                //instance.register(HCalendarExtractor.factory);
-                //instance.register(HCardExtractor.factory);
-                //instance.register(HListingExtractor.factory);
-                //instance.register(HResumeExtractor.factory);
-                //instance.register(HReviewExtractor.factory);
-                //instance.register(HRecipeExtractor.factory);
-                //instance.register(SpeciesExtractor.factory);
-                //instance.register(TurtleHTMLExtractor.factory);
-                //instance.register(MicrodataExtractor.factory);
-                //instance.register(CSVExtractor.factory);
                 
                 if(conf.getFlagProperty("any23.extraction.rdfa.programmatic")) {
                     instance.unregister(RDFaExtractorFactory.NAME);
@@ -131,8 +108,7 @@ public class ExtractorRegistryImpl extends org.eclipse.rdf4j.common.lang.service
      */
     @Override
     public ExtractorFactory<?> getFactory(final String name) {
-        ExtractorFactory<?> result = this.get(name).orElseThrow(() -> new IllegalArgumentException("Unregistered extractor name: " + name));
-        return result;
+        return this.get(name).orElseThrow(() -> new IllegalArgumentException("Unregistered extractor name: " + name));
     }
 
     /**
@@ -152,7 +128,7 @@ public class ExtractorRegistryImpl extends org.eclipse.rdf4j.common.lang.service
      */
     @Override
     public ExtractorGroup getExtractorGroup(List<String> names) {
-        List<ExtractorFactory<?>> members = new ArrayList<ExtractorFactory<?>>(names.size());
+        List<ExtractorFactory<?>> members = new ArrayList<>(names.size());
         for (String name : names) {
             members.add(getFactory(name));
         }
@@ -175,7 +151,7 @@ public class ExtractorRegistryImpl extends org.eclipse.rdf4j.common.lang.service
      */
     @Override
     public List<String> getAllNames() {
-        List<String> result = new ArrayList<String>(this.getKeys());
+        List<String> result = new ArrayList<>(this.getKeys());
         Collections.sort(result);
         return result;
     }

http://git-wip-us.apache.org/repos/asf/any23/blob/89d1d85e/core/src/test/java/org/apache/any23/extractor/ExtractionAPITest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/ExtractionAPITest.java b/core/src/test/java/org/apache/any23/extractor/ExtractionAPITest.java
index abef328..3e1dc73 100644
--- a/core/src/test/java/org/apache/any23/extractor/ExtractionAPITest.java
+++ b/core/src/test/java/org/apache/any23/extractor/ExtractionAPITest.java
@@ -17,7 +17,7 @@
 
 package org.apache.any23.extractor;
 
-import junit.framework.Assert;
+import org.junit.Assert;
 import org.apache.any23.extractor.example.ExampleExtractor;
 import org.apache.any23.rdf.RDFUtils;
 import org.apache.any23.writer.CountingTripleHandler;
@@ -30,7 +30,7 @@ import org.eclipse.rdf4j.model.IRI;
 public class ExtractionAPITest {
 
     private static final String exampleDoc = "http://example.com/";
-    private static final IRI uri           = RDFUtils.iri(exampleDoc);
+    private static final IRI uri = RDFUtils.iri(exampleDoc);
 
     @Test
     public void testDirectInstantiation() throws Exception {

http://git-wip-us.apache.org/repos/asf/any23/blob/89d1d85e/core/src/test/java/org/apache/any23/extractor/ExtractionResultImplTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/ExtractionResultImplTest.java b/core/src/test/java/org/apache/any23/extractor/ExtractionResultImplTest.java
index e21be18..69d1c41 100644
--- a/core/src/test/java/org/apache/any23/extractor/ExtractionResultImplTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/ExtractionResultImplTest.java
@@ -17,7 +17,7 @@
 
 package org.apache.any23.extractor;
 
-import junit.framework.Assert;
+import org.junit.Assert;
 import org.apache.any23.extractor.html.TitleExtractor;
 import org.apache.any23.rdf.RDFUtils;
 import org.apache.any23.writer.TripleHandler;

http://git-wip-us.apache.org/repos/asf/any23/blob/89d1d85e/openie/pom.xml
----------------------------------------------------------------------
diff --git a/openie/pom.xml b/openie/pom.xml
index 9745b7a..32d4a0c 100644
--- a/openie/pom.xml
+++ b/openie/pom.xml
@@ -97,16 +97,6 @@
   </dependencies>
 
   <build>
-    <resources>
-      <resource>
-        <directory>${basedir}/../</directory>
-        <targetPath>META-INF</targetPath>
-        <includes>
-          <include>LICENSE.txt</include>
-          <include>NOTICE.txt</include>
-        </includes>
-      </resource>
-    </resources>
     <pluginManagement>
       <plugins>
         <plugin>

http://git-wip-us.apache.org/repos/asf/any23/blob/89d1d85e/plugins/basic-crawler/pom.xml
----------------------------------------------------------------------
diff --git a/plugins/basic-crawler/pom.xml b/plugins/basic-crawler/pom.xml
index 9f294ff..c9769fd 100644
--- a/plugins/basic-crawler/pom.xml
+++ b/plugins/basic-crawler/pom.xml
@@ -94,15 +94,6 @@
       <scope>provided</scope>
     </dependency>
 
-    <!-- BEGIN: plugins -->
-    <dependency>
-      <groupId>org.kohsuke.metainf-services</groupId>
-      <artifactId>metainf-services</artifactId>
-      <scope>compile</scope>
-      <optional>true</optional>
-    </dependency>
-    <!-- END: plugins -->
-
     <!-- BEGIN: Test Dependencies -->
     <dependency>
       <groupId>junit</groupId>
@@ -114,16 +105,6 @@
   </dependencies>
 
   <build>
-    <resources>
-      <resource>
-        <directory>${basedir}/../../</directory>
-        <targetPath>META-INF</targetPath>
-        <includes>
-          <include>LICENSE.txt</include>
-          <include>NOTICE.txt</include>
-        </includes>
-      </resource>
-    </resources>
 
     <plugins>
       <plugin>

http://git-wip-us.apache.org/repos/asf/any23/blob/89d1d85e/plugins/html-scraper/pom.xml
----------------------------------------------------------------------
diff --git a/plugins/html-scraper/pom.xml b/plugins/html-scraper/pom.xml
index aec0259..108b675 100644
--- a/plugins/html-scraper/pom.xml
+++ b/plugins/html-scraper/pom.xml
@@ -71,15 +71,6 @@
     </dependency>
     <!-- END: BoilerPipe -->
 
-    <!-- BEGIN: plugins -->
-    <dependency>
-      <groupId>org.kohsuke.metainf-services</groupId>
-      <artifactId>metainf-services</artifactId>
-      <scope>compile</scope>
-      <optional>true</optional>
-    </dependency>
-    <!-- END: plugins -->
-    
     <!-- Logging -->
     <dependency>
       <groupId>org.slf4j</groupId>
@@ -104,16 +95,6 @@
   </dependencies>
 
   <build>
-    <resources>
-      <resource>
-        <directory>${basedir}/../../</directory>
-        <targetPath>META-INF</targetPath>
-        <includes>
-          <include>LICENSE.txt</include>
-          <include>NOTICE.txt</include>
-        </includes>
-      </resource>
-    </resources>
 
     <plugins>
       <!-- Generates the distribution package -->

http://git-wip-us.apache.org/repos/asf/any23/blob/89d1d85e/plugins/integration-test/pom.xml
----------------------------------------------------------------------
diff --git a/plugins/integration-test/pom.xml b/plugins/integration-test/pom.xml
index d8d87e7..8e8ac9c 100644
--- a/plugins/integration-test/pom.xml
+++ b/plugins/integration-test/pom.xml
@@ -38,6 +38,11 @@
       <version>${project.parent.version}</version>
     </dependency>
     <dependency>
+      <groupId>org.apache.any23</groupId>
+      <artifactId>apache-any23-cli</artifactId>
+      <version>${project.parent.version}</version>
+    </dependency>
+    <dependency>
       <groupId>org.apache.any23.plugins</groupId>
       <artifactId>apache-any23-html-scraper</artifactId>
       <version>1.0.7-SNAPSHOT</version>
@@ -69,16 +74,6 @@
   </dependencies>
 
   <build>
-    <resources>
-      <resource>
-        <directory>${basedir}/../../</directory>
-        <targetPath>META-INF</targetPath>
-        <includes>
-          <include>LICENSE.txt</include>
-          <include>NOTICE.txt</include>
-        </includes>
-      </resource>
-    </resources>
 
     <plugins>
       <!-- skip assembly generation for ITs -->
@@ -98,7 +93,6 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-invoker-plugin</artifactId>
-        <version>1.5</version>
         <configuration>
           <projectsDirectory>..</projectsDirectory>
           <cloneProjectsTo>${project.build.directory}/plugins-build</cloneProjectsTo>

http://git-wip-us.apache.org/repos/asf/any23/blob/89d1d85e/plugins/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java
----------------------------------------------------------------------
diff --git a/plugins/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java b/plugins/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java
index ce74971..4347c8b 100644
--- a/plugins/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java
+++ b/plugins/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java
@@ -17,6 +17,7 @@
 
 package org.apache.any23.plugin;
 
+import org.apache.any23.cli.Crawler;
 import org.apache.any23.cli.Tool;
 import org.apache.any23.extractor.ExtractorGroup;
 import org.apache.any23.extractor.ExtractorRegistryImpl;
@@ -40,8 +41,7 @@ import static org.junit.Assert.assertTrue;
  */
 public class PluginIT {
 
-    //TODO reduced from 31 to 28 within ANY23-276
-    private static final int NUM_OF_EXTRACTORS = 29;
+    private static final int NUM_OF_EXTRACTORS = 32;
 
     private static final String PLUGIN_DIR = "target/plugins-build/";
 
@@ -101,16 +101,14 @@ public class PluginIT {
             tool = tools.next();
             assertTrue("Found duplicate tool.", toolClasses.add(tool.getClass().getName()));
         }
-//TODO Crawler.class not on classpath due to ANY23-276
-//        assertTrue(
-//                String.format(
-//                        "Expected [%s] plugin to be detected, but it is not found in the built classpath.",
-//                        Crawler.class.getName()
-//                ),
-//                toolClasses.contains(Crawler.class.getName())
-//        );
-        //TODO Crawler.class not on classpath due to ANY23-276, should be 7 detected CLI including CrawlerCLI
-        assertEquals(6, toolClasses.size()); // core CLIs
+        assertTrue(
+                String.format(
+                        "Expected [%s] plugin to be detected, but it is not found in the built classpath.",
+                        Crawler.class.getName()
+                ),
+                toolClasses.contains(Crawler.class.getName())
+        );
+        assertEquals(7, toolClasses.size()); // core CLIs
     }
 
 }

http://git-wip-us.apache.org/repos/asf/any23/blob/89d1d85e/plugins/office-scraper/pom.xml
----------------------------------------------------------------------
diff --git a/plugins/office-scraper/pom.xml b/plugins/office-scraper/pom.xml
index ebaa599..6d7e7b8 100644
--- a/plugins/office-scraper/pom.xml
+++ b/plugins/office-scraper/pom.xml
@@ -61,15 +61,6 @@
       <scope>test</scope>
     </dependency>
 
-    <!-- BEGIN: plugins -->
-    <dependency>
-      <groupId>org.kohsuke.metainf-services</groupId>
-      <artifactId>metainf-services</artifactId>
-      <scope>compile</scope>
-      <optional>true</optional>
-    </dependency>
-    <!-- END: plugins -->
-
     <!-- BEGIN: Test Dependencies -->
     <dependency>
       <groupId>junit</groupId>
@@ -86,16 +77,6 @@
   </dependencies>
 
   <build>
-    <resources>
-      <resource>
-        <directory>${basedir}/../../</directory>
-        <targetPath>META-INF</targetPath>
-        <includes>
-          <include>LICENSE.txt</include>
-          <include>NOTICE.txt</include>
-        </includes>
-      </resource>
-    </resources>
 
     <plugins>
       <!-- Generates the distribution package -->

http://git-wip-us.apache.org/repos/asf/any23/blob/89d1d85e/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index fffc7b5..168232e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -555,8 +555,22 @@
     <repository>
       <id>sonatype-snapshots</id>
       <url>https://oss.sonatype.org/content/repositories/snapshots/</url>
-      <releases><enabled>false</enabled></releases>
-      <snapshots><enabled>true</enabled></snapshots>
+      <releases>
+      	<enabled>false</enabled>
+      </releases>
+      <snapshots>
+      	<enabled>true</enabled>
+      </snapshots>
+    </repository>
+    <repository>
+      <id>apache-snapshots</id>
+      <url>https://repository.apache.org/content/repositories/snapshots/</url>
+      <releases>
+      	<enabled>false</enabled>
+      </releases>
+      <snapshots>
+      	<enabled>true</enabled>
+      </snapshots>
     </repository>
   </repositories>
 


[13/15] any23 git commit: ANY23-304 implement temporary file reader within test logic

Posted by le...@apache.org.
ANY23-304 implement temporary file reader within test logic


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/b39d2201
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/b39d2201
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/b39d2201

Branch: refs/heads/master
Commit: b39d2201440c5f1297e99365744ac3fd9b4f9d90
Parents: 6d5c39e
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Thu Jul 27 12:16:29 2017 -0700
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Thu Jul 27 12:16:29 2017 -0700

----------------------------------------------------------------------
 .../java/org/apache/any23/openie/OpenIEExtractorTest.java   | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/b39d2201/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java
----------------------------------------------------------------------
diff --git a/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java b/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java
index 9dfad94..9455311 100644
--- a/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java
+++ b/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java
@@ -16,7 +16,8 @@
  */
 package org.apache.any23.openie;
 
-import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
 import java.io.IOException;
 
 import org.apache.any23.extractor.ExtractionContext;
@@ -66,8 +67,8 @@ public class OpenIEExtractorTest {
     
     public void extract(IRI uri, String filePath) 
       throws IOException, ExtractionException, TripleHandlerException {
-      ByteArrayOutputStream baos = new ByteArrayOutputStream();
-      final TripleHandler tHandler = new RDFXMLWriter(baos);
+      FileOutputStream fos = new FileOutputStream(File.createTempFile("OpenIEExtractorTest", "tmp"));
+      final TripleHandler tHandler = new RDFXMLWriter(fos);
       final ExtractionContext extractionContext = new ExtractionContext("rdf-openie", uri);
       final ExtractionResult result = new ExtractionResultImpl(extractionContext, extractor, tHandler);
       try {
@@ -78,7 +79,7 @@ public class OpenIEExtractorTest {
                 result
         );
       } finally {
-        logger.debug(baos.toString());
+        logger.debug(fos.toString());
         tHandler.close();
         result.close();
       }


[03/15] any23 git commit: Add META-INF service discovery for openie

Posted by le...@apache.org.
Add META-INF service discovery for openie


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/6871755a
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/6871755a
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/6871755a

Branch: refs/heads/master
Commit: 6871755a4875795b0f48fa2c5cb762d56546c8e6
Parents: 2ecfbff
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Thu Feb 23 17:54:15 2017 -0800
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Thu Feb 23 17:54:15 2017 -0800

----------------------------------------------------------------------
 .../META-INF/services/org.apache.any23.extractor.ExtractorFactory   | 1 +
 1 file changed, 1 insertion(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/6871755a/openie/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory
----------------------------------------------------------------------
diff --git a/openie/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory b/openie/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory
new file mode 100644
index 0000000..48754d9
--- /dev/null
+++ b/openie/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory
@@ -0,0 +1 @@
+org.apache.any23.openie.OpenIEExtractorFactory
\ No newline at end of file