You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by le...@apache.org on 2017/08/23 20:27:06 UTC

[09/15] any23 git commit: ANY23-304 Address comments from ansell

ANY23-304 Address comments from ansell


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/1b0c5ff2
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/1b0c5ff2
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/1b0c5ff2

Branch: refs/heads/master
Commit: 1b0c5ff22bb61a9cd992b909c776592a081216e4
Parents: 89d1d85
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Wed Mar 1 17:54:38 2017 -0800
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Wed Mar 1 17:54:38 2017 -0800

----------------------------------------------------------------------
 cli/pom.xml                                     | 14 +++---
 .../org/apache/any23/cli/ToolRunnerTest.java    | 15 ++++--
 .../any23/extractor/openie/OpenIEExtractor.java |  5 +-
 .../any23/openie/OpenIEExtractorTest.java       |  1 -
 plugins/basic-crawler/pom.xml                   | 34 +++++++++++++
 src/site/apt/any23-plugins.apt                  | 16 +++----
 src/site/apt/configuration.apt                  | 10 ++--
 src/site/apt/dev-csv-extractor.apt              |  2 +-
 src/site/apt/dev-data-conversion.apt            | 20 ++++----
 src/site/apt/dev-data-extraction.apt            | 20 ++++----
 src/site/apt/dev-microformat-extractors.apt     | 12 ++---
 src/site/apt/dev-validation-fix.apt             | 12 ++---
 src/site/apt/dev-xpath-extractor.apt            |  2 +-
 src/site/apt/extractors.apt                     | 50 ++++++++++----------
 src/site/apt/getting-started.apt                |  2 +-
 src/site/apt/plugin-basic-crawler.apt           |  4 +-
 src/site/apt/plugin-office-scraper.apt          |  2 +-
 17 files changed, 131 insertions(+), 90 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/cli/pom.xml
----------------------------------------------------------------------
diff --git a/cli/pom.xml b/cli/pom.xml
index 3f183ae..79e8cab 100644
--- a/cli/pom.xml
+++ b/cli/pom.xml
@@ -74,22 +74,20 @@
       <groupId>${project.groupId}</groupId>
       <artifactId>apache-any23-openie</artifactId>
       <version>${project.version}</version>
-    </dependency>
-    <!-- dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>apache-any23-basic-crawler</artifactId>
-      <version>${project.version}</version>
+      <scope>runtime</scope>
     </dependency>
     <dependency>
-      <groupId>${project.groupId}</groupId>
+      <groupId>${project.groupId}.plugins</groupId>
       <artifactId>apache-any23-office-scraper</artifactId>
       <version>${project.version}</version>
+      <scope>runtime</scope>
     </dependency>
     <dependency>
-      <groupId>${project.groupId}</groupId>
+      <groupId>${project.groupId}.plugins</groupId>
       <artifactId>apache-any23-html-scraper</artifactId>
       <version>${project.version}</version>
-    </dependency-->
+      <scope>runtime</scope>
+    </dependency>
     <dependency>
       <groupId>commons-lang</groupId>
       <artifactId>commons-lang</artifactId>

http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/cli/src/test/java/org/apache/any23/cli/ToolRunnerTest.java
----------------------------------------------------------------------
diff --git a/cli/src/test/java/org/apache/any23/cli/ToolRunnerTest.java b/cli/src/test/java/org/apache/any23/cli/ToolRunnerTest.java
index 881a782..11484bb 100644
--- a/cli/src/test/java/org/apache/any23/cli/ToolRunnerTest.java
+++ b/cli/src/test/java/org/apache/any23/cli/ToolRunnerTest.java
@@ -17,7 +17,7 @@
 
 package org.apache.any23.cli;
 
-import junit.framework.Assert;
+import org.junit.Assert;
 import org.junit.Test;
 
 import java.io.IOException;
@@ -34,7 +34,13 @@ import static org.junit.Assert.assertTrue;
  */
 public class ToolRunnerTest {
 
-    private final Set<Class<? extends Tool>> coreTools = new HashSet<Class<? extends Tool>>(){{
+    private final Set<Class<? extends Tool>> coreTools = new HashSet<Class<? extends Tool>>(){
+        /**
+         * 
+         */
+        private static final long serialVersionUID = 1L;
+
+    {
         add(ExtractorDocumentation.class);
         add(MicrodataParser.class);
         add(MimeDetector.class);
@@ -48,7 +54,10 @@ public class ToolRunnerTest {
         Iterator<Tool> tools = new ToolRunner().getToolsInClasspath();
         assertTrue("No core tools have been detected", tools.hasNext());
         while (tools.hasNext()) {
-            assertTrue("Some core tools have not been detected.", coreTools.contains(tools.next().getClass()));
+            assertTrue("Discrepancy between expected and detected tools on classpath. "
+                    + "Expected ExtractorDocumentation.class,"
+                    + "MicrodataParser.class, MimeDetector.class, PluginVerifier.class"
+                    + "Rover.class and VocabPrinter.class.", coreTools.contains(tools.next().getClass()));
         }
     }
 

http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractor.java
----------------------------------------------------------------------
diff --git a/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractor.java b/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractor.java
index bef40de..812ed9c 100644
--- a/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractor.java
+++ b/openie/src/main/java/org/apache/any23/extractor/openie/OpenIEExtractor.java
@@ -113,9 +113,10 @@ public class OpenIEExtractor implements Extractor.TagSoupDOMExtractor {
         // instance.extr().arg1().text() - subject
         // instance.extr().rel().text() - predicate
         // instance.extr().arg2s().text() - object
+        final Configuration immutableConf = DefaultConfiguration.singleton();
+        Double threshold = Double.parseDouble(immutableConf.getProperty("any23.extraction.openie.confidence.threshold", "0.5"));
         for(Instance instance : listExtractions) {
-            final Configuration immutableConf = DefaultConfiguration.singleton();
-            if (instance.confidence() > Double.parseDouble(immutableConf.getProperty("any23.extraction.openie.confidence.threshold", "0.5"))) {
+            if (instance.confidence() > threshold) {
                 List<Argument> listArg2s = JavaConversions.seqAsJavaList(instance.extr().arg2s());
                 for(Argument argument : listArg2s) {
                     Resource subject = RDFUtils.makeIRI(instance.extr().arg1().text(), documentIRI);

http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java
----------------------------------------------------------------------
diff --git a/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java b/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java
index 0ba03fd..9dfad94 100644
--- a/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java
+++ b/openie/src/test/java/org/apache/any23/openie/OpenIEExtractorTest.java
@@ -57,7 +57,6 @@ public class OpenIEExtractorTest {
         extractor = null;
     }
 
-    //@Ignore("This typically results in a JVM crash... disabled for the time being.")
     @Test
     public void testExtractFromHTMLDocument() 
       throws IOException, ExtractionException, TripleHandlerException {

http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/plugins/basic-crawler/pom.xml
----------------------------------------------------------------------
diff --git a/plugins/basic-crawler/pom.xml b/plugins/basic-crawler/pom.xml
index c9769fd..4fdf257 100644
--- a/plugins/basic-crawler/pom.xml
+++ b/plugins/basic-crawler/pom.xml
@@ -139,6 +139,40 @@
           </execution>
         </executions>
       </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <version>3.0.0</version>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+            <configuration>
+              <transformers>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+                  <manifestEntries>
+                    <Main-Class>org.apache.any23.cli.Crawler</Main-Class>
+                    <Build-Number>${implementation.build}</Build-Number>
+                  </manifestEntries>
+                </transformer>
+              </transformers>
+              <filters>
+                <filter>
+                  <artifact>*:*</artifact>
+                  <excludes>
+                    <exclude>META-INF/*.SF</exclude>
+                    <exclude>META-INF/*.DSA</exclude>
+                    <exclude>META-INF/*.RSA</exclude>
+                  </excludes>
+                </filter>
+              </filters>
+              <finalName>${project.artifactId}-uber-${project.version}</finalName>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
     </plugins>
   </build>
 

http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/src/site/apt/any23-plugins.apt
----------------------------------------------------------------------
diff --git a/src/site/apt/any23-plugins.apt b/src/site/apt/any23-plugins.apt
index 7bd297b..f429e2d 100644
--- a/src/site/apt/any23-plugins.apt
+++ b/src/site/apt/any23-plugins.apt
@@ -31,9 +31,9 @@ Apache Any23 Plugins
 
     A plugin is a standard <Maven3> module containing any implementation of
 
-    * {{{./xref/org/apache/any23/plugin/ExtractorPlugin.html}ExtractorPlugin}}
+    * {{{./apidocs/org/apache/any23/plugin/ExtractorPlugin.html}ExtractorPlugin}}
 
-    * {{{./xref/org/apache/any23/cli/Tool.html}Tool}}
+    * {{{./apidocs/org/apache/any23/cli/Tool.html}Tool}}
 
 * How to Register a Plugin
 
@@ -50,13 +50,13 @@ export CLASSPATH_PREFIX=../../../plugins/basic-crawler/target/any23-basic-crawle
    * adding its <JAR> to the <$HOME/.any23/plugins> directory.
 
    A plugin can be added to the <Apache Any23 library API> by using the
-   {{{./xref/org/apache/any23/plugin/Any23PluginManager.html}Any23PluginManager}}#createInstance(Configuration configuration, File... pluginLocations)
+   {{{./apidocs/org/apache/any23/plugin/Any23PluginManager.html}Any23PluginManager}}#createInstance(Configuration configuration, File... pluginLocations)
    method.
 
    TODO: plugin support in Apache Any23 Service
 
     Any implementation of <ExtractorPlugin> will automatically registered to the
-    {{{./xref/org/apache/any23/extractor/ExtractorRegistry.html}ExtractorRegistry}}.
+    {{{./apidocs/org/apache/any23/extractor/ExtractorRegistry.html}ExtractorRegistry}}.
 
     Any detected implementation of <Tool> will be listed by the <ToolRunner>
     command-line tool in <any23-root/><<bin/any23>> .
@@ -74,7 +74,7 @@ export CLASSPATH_PREFIX=../../../plugins/basic-crawler/target/any23-basic-crawle
 
    An <Extractor Plugin> is a class:
 
-   * implementing the {{{./xref/org/apache/any23/plugin/ExtractorPlugin.html}ExtractorPlugin}} interface;
+   * implementing the {{{./apidocs/org/apache/any23/plugin/ExtractorPlugin.html}ExtractorPlugin}} interface;
 
    * packaged under <<org.apache.any23.plugin>> .
 
@@ -107,7 +107,7 @@ public class HTMLScraperPlugin implements ExtractorPlugin {
 
    A <Tool Plugin> is a Java class that:
 
-   * implementing the {{{./xref/org/apache/any23/cli/Tool.html}Tool}} interface;
+   * implementing the {{{./apidocs/org/apache/any23/cli/Tool.html}Tool}} interface;
 
    * CLI parameters are extracted by annotating the class members with {{{http://jcommander.org/}JCommander}} annotations.
 
@@ -152,7 +152,7 @@ public class MyExecutableTool implements Tool {
 
   * Crawler CLI Tool
 
-    The {{{./xref/org/apache/any23/cli/Crawler.html}Crawler CLI Tool}} is an extension of the
-    {{{./xref/org/apache/any23/cli/Rover.html}Rover CLI Tool}} to add site crawling basic
+    The {{{./apidocs/org/apache/any23/cli/Crawler.html}Crawler CLI Tool}} is an extension of the
+    {{{./apidocs/org/apache/any23/cli/Rover.html}Rover CLI Tool}} to add site crawling basic
     capabilities. More information about the <CLI> can be found at
     {{{./getting-started.html#crawler-tool}Getting Started - Crawler Tool}} section.

http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/src/site/apt/configuration.apt
----------------------------------------------------------------------
diff --git a/src/site/apt/configuration.apt b/src/site/apt/configuration.apt
index 63ff1fd..076f806 100644
--- a/src/site/apt/configuration.apt
+++ b/src/site/apt/configuration.apt
@@ -27,8 +27,8 @@ Configuration
     The core module contains the main library code and the command-line implementation.
 
     The main library configuration parameters are managed by the
-    {{{./xref/org/apache/any23/configuration/DefaultConfiguration.html} Configuration}}
-    class. The default values are declared within the {{{http://any23.googlecode.com/svn/trunk/any23-core/src/main/resources/default-configuration.properties} default-configuration.properties}}
+    {{{./apidocs/org/apache/any23/configuration/DefaultConfiguration.html} Configuration}}
+    class. The default values are declared within the {{{https://github.com/apache/any23/blob/master/api/src/main/resources/default-configuration.properties} default-configuration.properties}}
     file. The following sections explain how to override the default configuration.
 
 ** Override Default Configuration from Command-line
@@ -52,7 +52,7 @@ any23-core/bin/$ ANY23_OPTS="-Dany23.http.client.max.connections=10" any23 http:
 
 ** Override Default Configuration Programmatically
 
-    The {{{./xref/org/apache/any23/configuration/Configuration.html} Configuration}}
+    The {{{./apidocs/org/apache/any23/configuration/Configuration.html} Configuration}}
     properties can be accessed in read-only mode just retrieving the configuration <<singleton>> instance.\
     Such instance is <immutable>:
 
@@ -62,7 +62,7 @@ final String propertyValue = immutableConf.getProperty("propertyName", "default
 ...
 +----------------------------------------------------------------------------------------------
 
-    To obtain a <modifiable> {{{./xref/org/apache/any23/configuration/Configuration.html} Configuration}}
+    To obtain a <modifiable> {{{./apidocs/org/apache/any23/configuration/Configuration.html} Configuration}}
     instead it is possible to use the <<copy()>> method.\
     One of the <<Apache Any23>> constructors accepts a <<Configuration>> object that allows to customize the behavior
     of the <<Apache Any23>> instance for its entire life-cycle.
@@ -77,7 +77,7 @@ final Apache Any23 any23 = new Apache Any23(modifiableConf, "extractor1", ...);
 * Use of ExtractionParameters
 
     It is possible to customize the behavior of a single data extraction by providing an
-    {{{./xref/org/apache/any23/extractor/ExtractionParameters.html} ExtractionParameters}}
+    {{{./apidocs/org/apache/any23/extractor/ExtractionParameters.html} ExtractionParameters}}
     instance to one the <Apache Any23#extract()> methods accepting it. <<ExtractionParameters>> allows to customize any <property> and <flag>
     other then the <<specific extraction options>>.\
     If no custom parameters are specified the default configuration values are used.

http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/src/site/apt/dev-csv-extractor.apt
----------------------------------------------------------------------
diff --git a/src/site/apt/dev-csv-extractor.apt b/src/site/apt/dev-csv-extractor.apt
index 7eb7c8a..24b0d6c 100644
--- a/src/site/apt/dev-csv-extractor.apt
+++ b/src/site/apt/dev-csv-extractor.apt
@@ -22,7 +22,7 @@
 
 CSV Extractor Algorithm
 
-  The {{{./xref/org/apache/any23/extractor/csv/CSVExtractor.html}CSV Extractor}} produces 
+  The {{{./apidocs/org/apache/any23/extractor/csv/CSVExtractor.html}CSV Extractor}} produces 
   an RDF representation of a CSV file compliant with the {{{http://www.ietf.org/rfc/rfc4180.txt}RFC 4180}} 
   and that foresees an header.
   Such extractor relies on the presence of an header to use the named fields as RDF properties.

http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/src/site/apt/dev-data-conversion.apt
----------------------------------------------------------------------
diff --git a/src/site/apt/dev-data-conversion.apt b/src/site/apt/dev-data-conversion.apt
index 319eadd..1749b9a 100644
--- a/src/site/apt/dev-data-conversion.apt
+++ b/src/site/apt/dev-data-conversion.apt
@@ -47,35 +47,35 @@ Data Conversion
  useful for the transformation. The facade constructor accepts a list of extractor names, if specified
  the extraction will be done only over this list, otherwise the data <MIME Type> will detected and will be applied
  all the compatible extractors declared within the
- {{{./xref/org/apache/any23/extractor/ExtractorRegistry.html}ExtractorRegistry}}.
+ {{{./apidocs/org/apache/any23/extractor/ExtractorRegistry.html}ExtractorRegistry}}.
 
  The <<line 2>> defines the input string containing some {{{http://www.w3.org/TeamSubmission/turtle/}Turtle}} data.
 
- At <<line 3>> we instantiate a {{{./xref/org/apache/any23/source/StringDocumentSource.html}StringDocumentSource}},
+ At <<line 3>> we instantiate a {{{./apidocs/org/apache/any23/source/StringDocumentSource.html}StringDocumentSource}},
   specifying a content and a the source <IRI>.
  The <IRI> should be the source of the content data, and must be valid.
- Besides the {{{./xref/org/apache/any23/source/StringDocumentSource.html}StringDocumentSource}},
+ Besides the {{{./apidocs/org/apache/any23/source/StringDocumentSource.html}StringDocumentSource}},
  you can also provide input from other sources, such as <HTTP> requests
- and local files. See the classes in the sources {{{./xref/org/apache/any23/source/package-summary.html}package}}.
+ and local files. See the classes in the sources {{{./apidocs/org/apache/any23/source/package-summary.html}package}}.
 
  The <<line 4>> defines a buffered output stream that will be used to store the data produced by the
  writer declared at <<line 5>>.
 
  A writer stores the extracted triples in some destination.
- We use an {{{./xref/org/apache/any23/writer/NTriplesWriter.html}NTriplesWriter}} here that writes
+ We use an {{{./apidocs/org/apache/any23/writer/NTriplesWriter.html}NTriplesWriter}} here that writes
  into a <<ByteArrayOutputStream>>. The main <<RDF>> formats writers are available and it is possible also to store
  the triples directly into an <<RDF4J>> repository to query them via <<SPARQL>>.
- See {{{./xref/org/apache/any23/writer/RepositoryWriter.html}RepositoryWriter}} and the writer
- {{{./xref/org/apache/any23/writer/package-summary.html}package}}.
+ See {{{./apidocs/org/apache/any23/writer/RepositoryWriter.html}RepositoryWriter}} and the writer
+ {{{./apidocs/org/apache/any23/writer/package-summary.html}package}}.
 
  The extractor method invoked at <<line 6>> performs the metadata extraction.
- This method accepts as first argument a {{{./xref/org/apache/any23/source/DocumentSource.html}DocumentSource}} and as
- second argument a {{{./xref/org/apache/any23/writer/TripleHandler.html}TripleHandler}},
+ This method accepts as first argument a {{{./apidocs/org/apache/any23/source/DocumentSource.html}DocumentSource}} and as
+ second argument a {{{./apidocs/org/apache/any23/writer/TripleHandler.html}TripleHandler}},
  that will receive the sequence parsing events generated by the applied extractors. The extract method defines also
  another signature where it is possible to specify a charset encoding for the input data. If <<null>>, the charset
  will be auto detected.
 
- The {{{./xref/org/apache/any23/writer/TripleHandler.html}TripleHandler}} needs to be explicitly closed,
+ The {{{./apidocs/org/apache/any23/writer/TripleHandler.html}TripleHandler}} needs to be explicitly closed,
  this is done safely in a <<finally>> block at <<line 7>>.
 
  The expected output is <UTF-8> encoded at <<line 8>>:

http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/src/site/apt/dev-data-extraction.apt
----------------------------------------------------------------------
diff --git a/src/site/apt/dev-data-extraction.apt b/src/site/apt/dev-data-extraction.apt
index 2a5bda2..1f67a53 100644
--- a/src/site/apt/dev-data-extraction.apt
+++ b/src/site/apt/dev-data-extraction.apt
@@ -45,21 +45,21 @@ Data Extraction
    the usage of specific extractors.
 
    The <<line 2>> defines the <HTTP User Agent>, used to identify the client during <HTTP> data collection.
-   At <<line 3>> we use the runner to create an instance of {{{./xref/org/apache/any23/http/HTTPClient.html}HTTPClient}},
-   used by {{{./xref/org/apache/any23/source/HTTPDocumentSource.html}HTTPDocumentSource}} for <HTTP> content fetching.
+   At <<line 3>> we use the runner to create an instance of {{{./apidocs/org/apache/any23/http/HTTPClient.html}HTTPClient}},
+   used by {{{./apidocs/org/apache/any23/source/HTTPDocumentSource.html}HTTPDocumentSource}} for <HTTP> content fetching.
 
-   The <<line 4>> instantiates an {{{./xref/org/apache/any23/source/HTTPDocumentSource.html}HTTPDocumentSource}} instance,
-   specifying the {{{./xref/org/apache/any23/http/HTTPClient.html}HTTPClient}} and the URL addressing the content
+   The <<line 4>> instantiates an {{{./apidocs/org/apache/any23/source/HTTPDocumentSource.html}HTTPDocumentSource}} instance,
+   specifying the {{{./apidocs/org/apache/any23/http/HTTPClient.html}HTTPClient}} and the URL addressing the content
    to be processed.
 
    At <<line 5>> we define a buffered output stream used to store data produced by the
-   {{{./xref/org/apache/any23/writer/TripleHandler.html}TripleHandler}} defined at <<line 6>>.
+   {{{./apidocs/org/apache/any23/writer/TripleHandler.html}TripleHandler}} defined at <<line 6>>.
 
    The extraction method at <<line 7>> will run the metadata extraction.
    The produced metadata will be written within the passed
-   {{{./xref/org/apache/any23/writer/TripleHandler.html}TripleHandler}} instance.
+   {{{./apidocs/org/apache/any23/writer/TripleHandler.html}TripleHandler}} instance.
 
-   The {{{./xref/org/apache/any23/writer/TripleHandler.html}TripleHandler}} needs to be explicitly closed,
+   The {{{./apidocs/org/apache/any23/writer/TripleHandler.html}TripleHandler}} needs to be explicitly closed,
    this is done safely in a <<finally>> block at <<line 8>>.
 
    The expected output is <UTF-8> encoded at <<line 9>> and is:
@@ -96,11 +96,11 @@ Filter Out Accidental Triples
    To remove accidental triples <<Apache Any23>> provides a set of useful filters, located
    within the <<org.apache.any23.filter>> package.
 
-   The filter {{{./xref/org/apache/any23/filter/IgnoreTitlesOfEmptyDocuments.html}IgnoreTitlesOfEmptyDocuments}}
-   removes triples generated by the {{{./xref/org/apache/any23/extractor/html/TitleExtractor.html}TitleExtractor}}
+   The filter {{{./apidocs/org/apache/any23/filter/IgnoreTitlesOfEmptyDocuments.html}IgnoreTitlesOfEmptyDocuments}}
+   removes triples generated by the {{{./apidocs/org/apache/any23/extractor/html/TitleExtractor.html}TitleExtractor}}
    whether the document is empty.
 
-   The filter {{{./xref/org/apache/any23/filter/IgnoreAccidentalRDFa.html}IgnoreAccidentalRDFa}} removes accidental
+   The filter {{{./apidocs/org/apache/any23/filter/IgnoreAccidentalRDFa.html}IgnoreAccidentalRDFa}} removes accidental
    <<CSS>> related triples.
 
 +------------------------------------

http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/src/site/apt/dev-microformat-extractors.apt
----------------------------------------------------------------------
diff --git a/src/site/apt/dev-microformat-extractors.apt b/src/site/apt/dev-microformat-extractors.apt
index e5db96e..4f03d71 100644
--- a/src/site/apt/dev-microformat-extractors.apt
+++ b/src/site/apt/dev-microformat-extractors.apt
@@ -40,7 +40,7 @@ Microformat Extractors
  More specifically:
 
   * Embedding explicitly the logic within the
-  {{{./xref/org/apache/any23/extractor/html/package-summary.html}Microformats Extractors}}
+  {{{./apidocs/org/apache/any23/extractor/html/package-summary.html}Microformats Extractors}}
 
   * Using the default <<Apache Any23>> nesting feature.
 
@@ -60,7 +60,7 @@ Microformat Extractors
 </span>
 +----------------------------------------------------------------------------------------------
 
- Since, as shown below, the {{{./xref/org/apache/any23/extractor/html/HCardExtractor.html}HCardExtractor}}
+ Since, as shown below, the {{{./apidocs/org/apache/any23/extractor/html/HCardExtractor.html}HCardExtractor}}
  contains the code to handle nested hAddress,
 
 +------------------------------
@@ -101,12 +101,12 @@ private boolean addSubMicroformat(String className, Resource resource, IRI prope
 +-----------------------------------------------------------------------------------------------------
 
  It is higly recommended to decorate the extractors who natively handle the nesting relatioship using the
-  {{{./xref/org/apache/any23/extractor/html/annotations/Includes.html}@Includes}} annotation. This annotation,
+  {{{./apidocs/org/apache/any23/extractor/html/annotations/Includes.html}@Includes}} annotation. This annotation,
   if present, avoid the production of <nesting_original> and <nesting_structured> RDF statements.
 
-  The following example shows how the {{{./xref/org/apache/any23/extractor/html/annotations/Includes.html}@Includes}} annotation
-  could be used to claim the fact that {{{./xref/org/apache/any23/extractor/html/HCardExtractor.html}HCardExtractor}} natively
-  embedds the {{{./xref/org/apache/any23/extractor/html/AdrExtractor.html}AdrExtractor}}.
+  The following example shows how the {{{./apidocs/org/apache/any23/extractor/html/annotations/Includes.html}@Includes}} annotation
+  could be used to claim the fact that {{{./apidocs/org/apache/any23/extractor/html/HCardExtractor.html}HCardExtractor}} natively
+  embedds the {{{./apidocs/org/apache/any23/extractor/html/AdrExtractor.html}AdrExtractor}}.
 
 +----------------------------------------------------------------------------------------------
 @Includes( extractors = AdrExtractor.class )

http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/src/site/apt/dev-validation-fix.apt
----------------------------------------------------------------------
diff --git a/src/site/apt/dev-validation-fix.apt b/src/site/apt/dev-validation-fix.apt
index 96a9bf2..c588fc2 100644
--- a/src/site/apt/dev-validation-fix.apt
+++ b/src/site/apt/dev-validation-fix.apt
@@ -35,9 +35,9 @@ Validation and Fixing
    This pages describes the <<Apache Any23>> rule-based approach, that allows it to detect, fix and correctly extract
    RDF from those ill-formed RDFa in XHTML pages.
 
-   More specifically, <<Apache Any23>> allows you to write a {{{./xref/org/apache/any23/validator/Rule.html}Rule}}
-   able to detect the errors, a {{{./xref/org/apache/any23/validator/Fix.html}Fix}} containing the logic to fix the problem and a
-   {{{./xref/org/apache/any23/validator/Validator.html}Validator}} which acts as a register of rules and fixes. The Validator
+   More specifically, <<Apache Any23>> allows you to write a {{{./apidocs/org/apache/any23/validator/Rule.html}Rule}}
+   able to detect the errors, a {{{./apidocs/org/apache/any23/validator/Fix.html}Fix}} containing the logic to fix the problem and a
+   {{{./apidocs/org/apache/any23/validator/Validator.html}Validator}} which acts as a register of rules and fixes. The Validator
    calls all the registered rules and when one of them is applied it calls the associated Fix.
 
    The following code snipped shows how to programmatically detect and fix a very common data error with <<Apache Any23>>.
@@ -64,8 +64,8 @@ Validation and Fixing
 </div>
 +------------------------------------------------------------------------------------------
 
-   With the <<Apache Any23>> {{{./xref/org/apache/any23/validator/package-summary.html}Validator}} classes it's possible to solve this
-   problem simply implementing the {{{./xref/org/apache/any23/validator/Rule.html}Rule}} interface as described below:
+   With the <<Apache Any23>> {{{./apidocs/org/apache/any23/validator/package-summary.html}Validator}} classes it's possible to solve this
+   problem simply implementing the {{{./apidocs/org/apache/any23/validator/Rule.html}Rule}} interface as described below:
 
 +------------------------------------------------------------------------------------------
 public class MissingOpenGraphNamespaceRule implements Rule {
@@ -100,7 +100,7 @@ public class MissingOpenGraphNamespaceRule implements Rule {
 }
 +------------------------------------------------------------------------------------------
 
-   The {{{./xref/org/apache/any23/validator/rule/MissingOpenGraphNamespaceRule.html}MissingOpenGraphNamespaceRule}} inspects the DOM
+   The {{{./apidocs/org/apache/any23/validator/rule/MissingOpenGraphNamespaceRule.html}MissingOpenGraphNamespaceRule}} inspects the DOM
    structure of the HTML page and if it finds some META tags with some RDFa property (of the OpenGraph Protocol vocabulary, in this case)
    it looks for the declaration of that name space. If there is no declaration it return <<true>>, that means that an error has been detected
    within the document.

http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/src/site/apt/dev-xpath-extractor.apt
----------------------------------------------------------------------
diff --git a/src/site/apt/dev-xpath-extractor.apt b/src/site/apt/dev-xpath-extractor.apt
index 4bae9c2..fc9df63 100644
--- a/src/site/apt/dev-xpath-extractor.apt
+++ b/src/site/apt/dev-xpath-extractor.apt
@@ -28,4 +28,4 @@ XPath Extractor
     activated by a regular expression over the page URL.
     When an extraction rule is activated all the variables it defines are
     evaluated and then a NQuads template is expanded for generating statements.
-    See {{{./xref/org/apache/any23/extractor/xpath/package-summary.html}Javadoc}}.
+    See {{{./apidocs/org/apache/any23/extractor/xpath/package-summary.html}Javadoc}}.

http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/src/site/apt/extractors.apt
----------------------------------------------------------------------
diff --git a/src/site/apt/extractors.apt b/src/site/apt/extractors.apt
index ddce55b..4031b05 100644
--- a/src/site/apt/extractors.apt
+++ b/src/site/apt/extractors.apt
@@ -22,7 +22,7 @@
 
 Apache Any23 Extractors
 
-  This page enlists all the Apache Any23 Extractors (see source code {{{./xref/org/apache/any23/extractor/package-summary.html}package}}).
+  This page enlists all the Apache Any23 Extractors (see source code {{{./apidocs/org/apache/any23/extractor/package-summary.html}package}}).
 
 * Microformat Extractors
 
@@ -31,68 +31,68 @@ Apache Any23 Extractors
       Specific details about *Microformats* extractors can be found {{{./dev-microformat-extractors.html}here}}.
       In particular the *Microformats Nesting* representation policy is described {{{./dev-microformat-extractors.html#microformat-nesting}here}}.
 
-      {{{./xref/org/apache/any23/extractor/html/AdrExtractor.html}AdrExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/html/AdrExtractor.html}AdrExtractor}}
 
-      {{{./xref/org/apache/any23/extractor/html/GeoExtractor.html}GeoExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/html/GeoExtractor.html}GeoExtractor}}
 
-      {{{./xref/org/apache/any23/extractor/html/HCalendarExtractor.html}HCalendar}}
+      {{{./apidocs/org/apache/any23/extractor/html/HCalendarExtractor.html}HCalendar}}
 
-      {{{./xref/org/apache/any23/extractor/html/HCardExtractor.html}HCard}}
+      {{{./apidocs/org/apache/any23/extractor/html/HCardExtractor.html}HCard}}
 
-      {{{./xref/org/apache/any23/extractor/html/HListingExtractor.html}HListing}}
+      {{{./apidocs/org/apache/any23/extractor/html/HListingExtractor.html}HListing}}
 
-      {{{./xref/org/apache/any23/extractor/html/HResumeExtractor.html}HResume}}
+      {{{./apidocs/org/apache/any23/extractor/html/HResumeExtractor.html}HResume}}
 
-      {{{./xref/org/apache/any23/extractor/html/HReviewExtractor.html}HReview}}
+      {{{./apidocs/org/apache/any23/extractor/html/HReviewExtractor.html}HReview}}
 
-      {{{./xref/org/apache/any23/extractor/html/SpeciesExtractor.html}SpeciesExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/html/SpeciesExtractor.html}SpeciesExtractor}}
 
-      {{{./xref/org/apache/any23/extractor/html/LicenseExtractor.html}LicenseExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/html/LicenseExtractor.html}LicenseExtractor}}
 
-      {{{./xref/org/apache/any23/extractor/html/XFNExtractor.html}XFNExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/html/XFNExtractor.html}XFNExtractor}}
 
-      {{{./xref/org/apache/any23/extractor/html/HRecipeExtractor.html}HRecipeExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/html/HRecipeExtractor.html}HRecipeExtractor}}
 
 *  RDFa [1.0 , 1.1]
 
       The following extractors refer to the {{{http://www.w3.org/TR/rdfa-syntax/}RDFa 1.0}}
       and {{{http://www.w3.org/TR/rdfa-core/}RDFa 1.1}} specifications.
 
-      {{{./xref/org/apache/any23/extractor/rdfa/RDFaExtractor.html}RDFaExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/rdfa/RDFaExtractor.html}RDFaExtractor}}
 
 * Microdata
 
       The following extractors refer to the {{{http://dev.w3.org/html5/md/}Microdata specifications}}.
 
-      {{{./xref/org/apache/any23/extractor/microdata/MicrodataExtractor.html}MicrodataExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/microdata/MicrodataExtractor.html}MicrodataExtractor}}
 
 *  RDF
 
-      {{{./xref/org/apache/any23/extractor/rdf/RDFXMLExtractor.html}RDFXMLExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/rdf/RDFXMLExtractor.html}RDFXMLExtractor}}
 
-      {{{./xref/org/apache/any23/extractor/rdf/NQuadsExtractor.html}NQuadsExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/rdf/NQuadsExtractor.html}NQuadsExtractor}}
 
-      {{{./xref/org/apache/any23/extractor/rdf/TurtleExtractor.html}TurtleExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/rdf/TurtleExtractor.html}TurtleExtractor}}
 
-      {{{./xref/org/apache/any23/extractor/rdf/NTriplesExtractor.html}NTriplesExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/rdf/NTriplesExtractor.html}NTriplesExtractor}}
 
 * Metadata Extractors
 
-      {{{./xref/org/apache/any23/extractor/html/TitleExtractor.html}TitleExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/html/TitleExtractor.html}TitleExtractor}}
 
-      {{{./xref/org/apache/any23/extractor/html/HTMLMetaExtractor.html}HTMLMetaExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/html/HTMLMetaExtractor.html}HTMLMetaExtractor}}
 
-      {{{./xref/org/apache/any23/extractor/html/HeadLinkExtractor.html}HeadLinkExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/html/HeadLinkExtractor.html}HeadLinkExtractor}}
 
-      {{{./xref/org/apache/any23/extractor/html/ICBMExtractor.html}ICBMExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/html/ICBMExtractor.html}ICBMExtractor}}
 
-      {{{./xref/org/apache/any23/extractor/html/TurtleHTMLExtractor.html}TurtleHTMLExtractor}}
+      {{{./apidocs/org/apache/any23/extractor/html/TurtleHTMLExtractor.html}TurtleHTMLExtractor}}
 
 * Content Extractors
 
-      {{{./xref/org/apache/any23/extractor/xpath/XPathExtractor.html}XPath Extractor}} (<<Experimental>>)
+      {{{./apidocs/org/apache/any23/extractor/xpath/XPathExtractor.html}XPath Extractor}} (<<Experimental>>)
 
-      {{{./xref/org/apache/any23/extractor/csv/CSVExtractor.html}CSV Extractor}} (See the extraction {{{./dev-csv-extractor.html}algorithm}}.)
+      {{{./apidocs/org/apache/any23/extractor/csv/CSVExtractor.html}CSV Extractor}} (See the extraction {{{./dev-csv-extractor.html}algorithm}}.)
 
 Get more documentation
 

http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/src/site/apt/getting-started.apt
----------------------------------------------------------------------
diff --git a/src/site/apt/getting-started.apt b/src/site/apt/getting-started.apt
index 5f60b93..75861da 100644
--- a/src/site/apt/getting-started.apt
+++ b/src/site/apt/getting-started.apt
@@ -334,7 +334,7 @@ any23-service$ ./bin/any23server
     from the command line in order to start up the server, then go to {{{http://localhost:8080/}}}
     to access the web interface. A live demo version of such service is running at {{{http://any23.org/}}}.
     You can also start the server from Java by running the
-    {{{./xref/org/apache/any23/servlet/Servlet.html}Apache Any23 Servlet}} class. Maven can be used to create a WAR
+    {{{./apidocs/org/apache/any23/servlet/Servlet.html}Apache Any23 Servlet}} class. Maven can be used to create a WAR
     file for deployment into an existing servlet container such as {{{http://tomcat.apache.org/}Apache Tomcat}}.
 
 * Use <<Apache Any23>> as a Library

http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/src/site/apt/plugin-basic-crawler.apt
----------------------------------------------------------------------
diff --git a/src/site/apt/plugin-basic-crawler.apt b/src/site/apt/plugin-basic-crawler.apt
index 94828da..dcbcee9 100644
--- a/src/site/apt/plugin-basic-crawler.apt
+++ b/src/site/apt/plugin-basic-crawler.apt
@@ -22,8 +22,8 @@
 
 Basic Crawler Plugin
 
-  The <Basic Crawler Plugin> implements a <CLI> {{{./xref/org/apache/any23/cli/Tool.html}Tool}} extending
-  {{{./xref/org/apache/any23/cli/Rover.html}Rover}} to add <site crawling> capabilities.
+  The <Basic Crawler Plugin> implements a <CLI> {{{./apidocs/org/apache/any23/cli/Tool.html}Tool}} extending
+  {{{./apidocs/org/apache/any23/cli/Rover.html}Rover}} to add <site crawling> capabilities.
 
   The tool can be used to extract semantic content from a small/medium size sites.
 

http://git-wip-us.apache.org/repos/asf/any23/blob/1b0c5ff2/src/site/apt/plugin-office-scraper.apt
----------------------------------------------------------------------
diff --git a/src/site/apt/plugin-office-scraper.apt b/src/site/apt/plugin-office-scraper.apt
index 3508f95..fcdff24 100644
--- a/src/site/apt/plugin-office-scraper.apt
+++ b/src/site/apt/plugin-office-scraper.apt
@@ -24,7 +24,7 @@ Office Scraper Plugins
 
  * <Excel Plugin>
 
-   The {{{./xref/org/apache/any23/plugin/officescraper/ExcelPlugin.html}ExcelPlugin}} converts any
+   The {{{./apidocs/org/apache/any23/plugin/officescraper/ExcelPlugin.html}ExcelPlugin}} converts any
    <<Microsoft Excel>> <97-2007> document to <RDF>.
 
    <<TODO: add conversion schema.>>