You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by an...@apache.org on 2017/01/13 22:25:59 UTC

[20/25] any23 git commit: ANY23-80 : Split out CLI into its own module

ANY23-80 : Split out CLI into its own module

Signed-off-by: Peter Ansell <p_...@yahoo.com>


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/242b130b
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/242b130b
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/242b130b

Branch: refs/heads/master
Commit: 242b130b4670507e240bf9fec1fb8f9aad647870
Parents: 82e5645
Author: Peter Ansell <p_...@yahoo.com>
Authored: Thu Jan 12 10:35:17 2017 +1100
Committer: Peter Ansell <p_...@yahoo.com>
Committed: Thu Jan 12 10:35:17 2017 +1100

----------------------------------------------------------------------
 cli/pom.xml                                     | 253 ++++++++++++++++++
 .../any23/cli/ExtractorDocumentation.java       | 186 +++++++++++++
 .../org/apache/any23/cli/MicrodataParser.java   |  99 +++++++
 .../java/org/apache/any23/cli/MimeDetector.java | 101 +++++++
 .../org/apache/any23/cli/PluginVerifier.java    |  86 ++++++
 .../main/java/org/apache/any23/cli/Rover.java   | 265 +++++++++++++++++++
 .../java/org/apache/any23/cli/ToolRunner.java   | 263 ++++++++++++++++++
 .../java/org/apache/any23/cli/VocabPrinter.java |  54 ++++
 .../java/org/apache/any23/cli/package-info.java |  22 ++
 .../any23/cli/ExtractorDocumentationTest.java   |  57 ++++
 .../apache/any23/cli/MicrodataParserTest.java   |  46 ++++
 .../org/apache/any23/cli/MimeDetectorTest.java  |  51 ++++
 .../apache/any23/cli/PluginVerifierTest.java    |  38 +++
 .../java/org/apache/any23/cli/RoverTest.java    | 139 ++++++++++
 .../org/apache/any23/cli/ToolRunnerTest.java    |  65 +++++
 .../java/org/apache/any23/cli/ToolTestBase.java |  91 +++++++
 .../org/apache/any23/cli/VocabPrinterTest.java  |  38 +++
 .../any23/cli/ExtractorDocumentation.java       | 186 -------------
 .../org/apache/any23/cli/MicrodataParser.java   |  99 -------
 .../java/org/apache/any23/cli/MimeDetector.java | 101 -------
 .../org/apache/any23/cli/PluginVerifier.java    |  86 ------
 .../main/java/org/apache/any23/cli/Rover.java   | 265 -------------------
 .../java/org/apache/any23/cli/ToolRunner.java   | 263 ------------------
 .../java/org/apache/any23/cli/VocabPrinter.java |  54 ----
 .../java/org/apache/any23/cli/package-info.java |  22 --
 .../any23/cli/ExtractorDocumentationTest.java   |  57 ----
 .../apache/any23/cli/MicrodataParserTest.java   |  46 ----
 .../org/apache/any23/cli/MimeDetectorTest.java  |  51 ----
 .../apache/any23/cli/PluginVerifierTest.java    |  38 ---
 .../java/org/apache/any23/cli/RoverTest.java    | 139 ----------
 .../org/apache/any23/cli/ToolRunnerTest.java    |  65 -----
 .../java/org/apache/any23/cli/ToolTestBase.java |  91 -------
 .../org/apache/any23/cli/VocabPrinterTest.java  |  38 ---
 plugins/basic-crawler/pom.xml                   |  16 +-
 plugins/html-scraper/pom.xml                    |   1 -
 plugins/office-scraper/pom.xml                  |   1 -
 pom.xml                                         |   1 +
 37 files changed, 1870 insertions(+), 1604 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/cli/pom.xml
----------------------------------------------------------------------
diff --git a/cli/pom.xml b/cli/pom.xml
new file mode 100644
index 0000000..c01f3b7
--- /dev/null
+++ b/cli/pom.xml
@@ -0,0 +1,253 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.any23</groupId>
+    <artifactId>apache-any23</artifactId>
+    <version>2.0-SNAPSHOT</version>
+    <relativePath>../</relativePath>
+  </parent>
+
+  <artifactId>apache-any23-cli</artifactId>
+
+  <name>Apache Any23 :: CLI</name>
+  <description>Command line interface.</description>
+
+  <dependencies>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>apache-any23-api</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>apache-any23-core</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>apache-any23-core</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>apache-any23-csvutils</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>apache-any23-mime</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>apache-any23-encoding</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>apache-any23-test-resources</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>commons-lang</groupId>
+      <artifactId>commons-lang</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-httpclient</groupId>
+      <artifactId>commons-httpclient</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-codec</groupId>
+      <artifactId>commons-codec</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>net.sourceforge.nekohtml</groupId>
+      <artifactId>nekohtml</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.beust</groupId>
+      <artifactId>jcommander</artifactId>
+    </dependency>
+
+    <!-- BEGIN: Tika -->
+    <dependency>
+      <groupId>org.apache.tika</groupId>
+      <artifactId>tika-core</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.tika</groupId>
+      <artifactId>tika-parsers</artifactId>
+    </dependency>
+    <!-- END: Tika -->
+
+    <!-- BEGIN: Sesame -->
+    <dependency>
+      <groupId>org.eclipse.rdf4j</groupId>
+      <artifactId>rdf4j-model</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.rdf4j</groupId>
+      <artifactId>rdf4j-rio-api</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.rdf4j</groupId>
+      <artifactId>rdf4j-rio-jsonld</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.rdf4j</groupId>
+      <artifactId>rdf4j-rio-turtle</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.rdf4j</groupId>
+      <artifactId>rdf4j-rio-rdfxml</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.rdf4j</groupId>
+      <artifactId>rdf4j-rio-ntriples</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.rdf4j</groupId>
+      <artifactId>rdf4j-rio-trix</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.rdf4j</groupId>
+      <artifactId>rdf4j-repository-sail</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.rdf4j</groupId>
+      <artifactId>rdf4j-sail-memory</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.rdf4j</groupId>
+      <artifactId>rdf4j-repository-api</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.semarglproject</groupId>
+      <artifactId>semargl-rdf4j</artifactId>
+    </dependency>
+    <!-- END: Sesame -->
+    
+    <!-- BEGIN:  Apache Commons, this version is hosted in the 
+           any23-repository-external repository -->
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-csv</artifactId>
+    </dependency>
+    <!-- END: Apache Commons CSV -->
+
+    <!-- BEGIN: Test Dependencies -->
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-core</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
+    </dependency>
+    <!-- END: Test Dependencies -->
+  </dependencies>
+
+  <build>
+    <resources>
+      <resource>
+        <directory>${basedir}/src/main/resources</directory>
+        <filtering>true</filtering>
+      </resource>
+
+      <resource>
+        <directory>${basedir}/../</directory>
+        <targetPath>META-INF</targetPath>
+        <includes>
+          <include>LICENSE.txt</include>
+          <include>NOTICE.txt</include>
+        </includes>
+      </resource>
+    </resources>
+
+    <plugins>
+      <!-- generates the bin launchers -->
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>appassembler-maven-plugin</artifactId>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>assemble</goal>
+            </goals>
+          </execution>
+        </executions>
+        <configuration>
+          <programs>
+            <program>
+              <mainClass>org.apache.any23.cli.ToolRunner</mainClass>
+              <name>any23</name>
+            </program>
+          </programs>
+          <configurationDirectory>conf</configurationDirectory>
+          <configurationSourceDirectory>${basedir}/src/test/resources</configurationSourceDirectory>
+          <copyConfigurationDirectory>true</copyConfigurationDirectory>
+        </configuration>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jar-plugin</artifactId>
+        <executions>
+          <execution>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+
+  <profiles>
+    <profile>
+      <id>release</id>
+      <build>
+        <resources>
+          <resource>
+            <directory>${basedir}/../</directory>
+            <targetPath>${project.build.directory}/apidocs/META-INF</targetPath>
+            <includes>
+              <include>LICENSE.txt</include>
+              <include>NOTICE.txt</include>
+            </includes>
+          </resource>
+        </resources>
+      </build>
+    </profile>
+  </profiles>
+
+</project>

http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/cli/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java
----------------------------------------------------------------------
diff --git a/cli/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java b/cli/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java
new file mode 100644
index 0000000..9a0410b
--- /dev/null
+++ b/cli/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.cli;
+
+import com.beust.jcommander.Parameter;
+import com.beust.jcommander.Parameters;
+import org.apache.any23.extractor.ExampleInputOutput;
+import org.apache.any23.extractor.ExtractionException;
+import org.apache.any23.extractor.Extractor;
+import org.apache.any23.extractor.ExtractorRegistryImpl;
+import org.apache.any23.extractor.Extractor.BlindExtractor;
+import org.apache.any23.extractor.Extractor.ContentExtractor;
+import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.ExtractorRegistry;
+
+import java.io.IOException;
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * This class provides some command-line documentation
+ * about available extractors and their usage.
+ */
+@Parameters( commandNames = { "extractor" }, commandDescription= "Utility for obtaining documentation about metadata extractors.")
+public class ExtractorDocumentation implements Tool {
+
+    @Parameter( names = { "-l", "--list" }, description = "shows the names of all available extractors" )
+    private boolean showList;
+
+    @Parameter( names = { "-i", "--input" }, description = "shows example input for the given extractor" )
+    private boolean showInput;
+
+    @Parameter( names = { "-o", "--outut" }, description = "shows example output for the given extractor" )
+    private boolean showOutput;
+
+    @Parameter( names = { "-a", "--all" }, description = "shows a report about all available extractors" )
+    private boolean showAll;
+
+    @Parameter( arity = 1, description = "Extractor name" )
+    private List<String> extractor = new LinkedList<String>();
+
+    public void run() throws Exception {
+        if (showList) {
+            printExtractorList(ExtractorRegistryImpl.getInstance());
+        } else if (showInput) {
+            if (extractor.isEmpty()) {
+                throw new IllegalArgumentException("Required argument for -i: extractor name");
+            }
+
+            printExampleInput(extractor.get(0), ExtractorRegistryImpl.getInstance());
+        } else if (showOutput) {
+            if (extractor.isEmpty()) {
+                throw new IllegalArgumentException("Required argument for -o: extractor name");
+            }
+
+            printExampleOutput(extractor.get(0), ExtractorRegistryImpl.getInstance());
+        } else if (showAll) {
+            printReport(ExtractorRegistryImpl.getInstance());
+        }
+    }
+
+    /**
+     * Print an error message.
+     *
+     * @param msg the error message to be printed
+     */
+    public void printError(String msg) {
+        System.err.println(msg);
+    }
+
+    /**
+     * Prints the list of all the available extractors.
+     * @param registry the {@link org.apache.any23.extractor.ExtractorRegistry}
+     * containing all extractors
+     */
+    public void printExtractorList(ExtractorRegistry registry) {
+        for (ExtractorFactory factory : registry.getExtractorGroup()) {
+            System.out.println( String.format("%25s [%15s]", factory.getExtractorName(), factory.getExtractorLabel()));
+        }
+    }
+
+    /**
+     * Prints an example of input for the provided extractor.
+     *
+     * @param extractorName the name of the extractor
+     * @param registry the {@link org.apache.any23.extractor.ExtractorRegistry}
+     * containing all extractors
+     * @throws IOException raised if no extractor is found with that name
+     */
+    public void printExampleInput(String extractorName, ExtractorRegistry registry) throws IOException {
+        ExtractorFactory<?> factory = getFactory(registry, extractorName);
+        ExampleInputOutput example = new ExampleInputOutput(factory);
+        String input = example.getExampleInput();
+        if (input == null) {
+            throw new IllegalArgumentException("Extractor " + extractorName + " provides no example input");
+        }
+        System.out.println(input);
+    }
+
+    /**
+     * Prints an output example for the given extractor.
+     *
+     * @param extractorName the extractor name
+     * @param registry the {@link org.apache.any23.extractor.ExtractorRegistry}
+     * containing all extractors
+     * @throws IOException raised if no extractor is found with that name
+     * @throws ExtractionException if there is an error duing extraction
+     */
+    public void printExampleOutput(String extractorName, ExtractorRegistry registry) throws IOException, ExtractionException {
+        ExtractorFactory<?> factory = getFactory(registry, extractorName);
+        ExampleInputOutput example = new ExampleInputOutput(factory);
+        String output = example.getExampleOutput();
+        if (output == null) {
+            throw new IllegalArgumentException("Extractor " + extractorName + " provides no example output");
+        }
+        System.out.println(output);
+    }
+
+    /**
+     * Prints a complete report on all the available extractors.
+     *
+     * @param registry the {@link org.apache.any23.extractor.ExtractorRegistry}
+     * containing all extractors
+     * @throws IOException raised if no extractor is found with that name
+     * @throws ExtractionException if there is an error duing extraction
+     */
+    public void printReport(ExtractorRegistry registry) throws IOException, ExtractionException {
+        for (String extractorName : registry.getAllNames()) {
+            ExtractorFactory<?> factory = registry.getFactory(extractorName);
+            ExampleInputOutput example = new ExampleInputOutput(factory);
+            System.out.println("Extractor: " + extractorName);
+            System.out.println("\ttype: " + getType(factory));
+            System.out.println();
+            final String exampleInput = example.getExampleInput();
+            if(exampleInput == null) {
+                System.out.println("(No Example Available)");
+            } else {
+                System.out.println("-------- Example Input  --------");
+                System.out.println(exampleInput);
+                System.out.println("-------- Example Output --------");
+                String output = example.getExampleOutput();
+                System.out.println(output == null || output.trim().length() == 0 ? "(No Output Generated)" : output);
+            }
+            System.out.println("================================");
+            System.out.println();
+        }
+    }
+
+    private ExtractorFactory<?> getFactory(ExtractorRegistry registry, String name) {
+        if (!registry.isRegisteredName(name)) {
+            throw new IllegalArgumentException("Unknown extractor name: " + name);
+        }
+        return registry.getFactory(name);
+    }
+
+    private String getType(ExtractorFactory<?> factory) {
+        Extractor<?> extractor = factory.createExtractor();
+        if (extractor instanceof BlindExtractor) {
+            return BlindExtractor.class.getSimpleName();
+        }
+        if (extractor instanceof TagSoupDOMExtractor) {
+            return TagSoupDOMExtractor.class.getSimpleName();
+        }
+        if (extractor instanceof ContentExtractor) {
+            return ContentExtractor.class.getSimpleName();
+        }
+        return "?";
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/cli/src/main/java/org/apache/any23/cli/MicrodataParser.java
----------------------------------------------------------------------
diff --git a/cli/src/main/java/org/apache/any23/cli/MicrodataParser.java b/cli/src/main/java/org/apache/any23/cli/MicrodataParser.java
new file mode 100644
index 0000000..19c59bf
--- /dev/null
+++ b/cli/src/main/java/org/apache/any23/cli/MicrodataParser.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.cli;
+
+import com.beust.jcommander.IStringConverter;
+import com.beust.jcommander.Parameter;
+import com.beust.jcommander.ParameterException;
+import com.beust.jcommander.Parameters;
+import org.apache.any23.extractor.html.TagSoupParser;
+import org.apache.any23.http.DefaultHTTPClient;
+import org.apache.any23.source.DocumentSource;
+import org.apache.any23.source.FileDocumentSource;
+import org.apache.any23.source.HTTPDocumentSource;
+import org.apache.any23.util.StreamUtils;
+
+import java.io.File;
+import java.io.InputStream;
+import java.net.URISyntaxException;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Command line <i>Microdata</i> parser, accepting both files and URLs and
+ * returing a <i>JSON</i> representation of the extracted metadata as described at
+ * <a href="http://www.w3.org/TR/microdata/#json">Microdata JSON Specification</a>.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+@Parameters( commandNames = { "microdata" },  commandDescription = "Commandline Tool for extracting Microdata from file/HTTP source.")
+public class MicrodataParser implements Tool {
+
+    private static final Pattern HTTP_DOCUMENT_PATTERN = Pattern.compile("^https?://.*");
+
+    private static final Pattern FILE_DOCUMENT_PATTERN = Pattern.compile("^file:(.*)$");
+
+    @Parameter(
+       arity = 1,
+       description = "Input document URL, {http://path/to/resource.html|file:/path/to/localFile.html}",
+       converter = MicrodataParserDocumentSourceConverter.class
+    )
+    private List<DocumentSource> document = new LinkedList<DocumentSource>();
+
+    public void run() throws Exception {
+        if (document.isEmpty()) {
+            throw new IllegalArgumentException("No input document URL specified");
+        }
+        InputStream documentInputInputStream = null;
+        try {
+            final DocumentSource documentSource = document.get(0);
+            documentInputInputStream = documentSource.openInputStream();
+            final TagSoupParser tagSoupParser = new TagSoupParser(
+                    documentInputInputStream,
+                    documentSource.getDocumentIRI()
+            );
+            org.apache.any23.extractor.microdata.MicrodataParser.getMicrodataAsJSON(tagSoupParser.getDOM(), System.out);
+        } finally {
+            if (documentInputInputStream != null) StreamUtils.closeGracefully(documentInputInputStream);
+        }
+    }
+
+    public static final class MicrodataParserDocumentSourceConverter implements IStringConverter<DocumentSource> {
+
+        @Override
+        public DocumentSource convert( String value ) {
+            final Matcher httpMatcher = HTTP_DOCUMENT_PATTERN.matcher(value);
+            if (httpMatcher.find()) {
+                try {
+                    return new HTTPDocumentSource(DefaultHTTPClient.createInitializedHTTPClient(), value);
+                } catch ( URISyntaxException e ) {
+                    throw new ParameterException("Invalid source IRI: '" + value + "'");
+                }
+            }
+            final Matcher fileMatcher = FILE_DOCUMENT_PATTERN.matcher(value);
+            if (fileMatcher.find()) {
+                return new FileDocumentSource( new File( fileMatcher.group(1) ) );
+            }
+            throw new ParameterException("Invalid source protocol: '" + value + "'");
+        }
+
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/cli/src/main/java/org/apache/any23/cli/MimeDetector.java
----------------------------------------------------------------------
diff --git a/cli/src/main/java/org/apache/any23/cli/MimeDetector.java b/cli/src/main/java/org/apache/any23/cli/MimeDetector.java
new file mode 100644
index 0000000..c9072cb
--- /dev/null
+++ b/cli/src/main/java/org/apache/any23/cli/MimeDetector.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.cli;
+
+import com.beust.jcommander.IStringConverter;
+import com.beust.jcommander.Parameter;
+import com.beust.jcommander.Parameters;
+import org.apache.any23.http.DefaultHTTPClient;
+import org.apache.any23.http.DefaultHTTPClientConfiguration;
+import org.apache.any23.http.HTTPClient;
+import org.apache.any23.mime.MIMEType;
+import org.apache.any23.mime.MIMETypeDetector;
+import org.apache.any23.mime.TikaMIMETypeDetector;
+import org.apache.any23.source.DocumentSource;
+import org.apache.any23.source.FileDocumentSource;
+import org.apache.any23.source.HTTPDocumentSource;
+import org.apache.any23.source.StringDocumentSource;
+
+import java.io.File;
+import java.net.URISyntaxException;
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * Commandline tool to detect <b>MIME Type</b>s from
+ * file, HTTP and direct input sources.
+ * The implementation of this tool is based on {@link org.apache.any23.mime.TikaMIMETypeDetector}.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+@Parameters(commandNames = { "mimes" }, commandDescription = "MIME Type Detector Tool.")
+public class MimeDetector implements Tool{
+
+    public static final String FILE_DOCUMENT_PREFIX   = "file://";
+
+    public static final String INLINE_DOCUMENT_PREFIX = "inline://";
+
+    public static final String URL_DOCUMENT_RE        = "^https?://.*";
+
+    @Parameter(
+       arity = 1,
+       description = "Input document URL, {http://path/to/resource.html|file:///path/to/local.file|inline:// some inline content}",
+       converter = MimeDetectorDocumentSourceConverter.class
+    )
+    private List<DocumentSource> document = new LinkedList<DocumentSource>();
+
+    public void run() throws Exception {
+        if (document.isEmpty()) {
+            throw new IllegalArgumentException("No input document URL specified");
+        }
+
+        final DocumentSource documentSource = document.get(0);
+        final MIMETypeDetector detector = new TikaMIMETypeDetector();
+        final MIMEType mimeType = detector.guessMIMEType(
+                documentSource.getDocumentIRI(),
+                documentSource.openInputStream(),
+                MIMEType.parse(documentSource.getContentType())
+        );
+        System.out.println(mimeType);
+    }
+
+    public static final class MimeDetectorDocumentSourceConverter implements IStringConverter<DocumentSource> {
+
+        @Override
+        public DocumentSource convert( String document ) {
+            if (document.startsWith(FILE_DOCUMENT_PREFIX)) {
+                return new FileDocumentSource( new File( document.substring(FILE_DOCUMENT_PREFIX.length()) ) );
+            }
+            if (document.startsWith(INLINE_DOCUMENT_PREFIX)) {
+                return new StringDocumentSource( document.substring(INLINE_DOCUMENT_PREFIX.length()), "" );
+            }
+            if (document.matches(URL_DOCUMENT_RE)) {
+                final HTTPClient client = new DefaultHTTPClient();
+                client.init( DefaultHTTPClientConfiguration.singleton() );
+                try {
+                    return new HTTPDocumentSource(client, document);
+                } catch ( URISyntaxException e ) {
+                    throw new IllegalArgumentException("Invalid source IRI: '" + document + "'");
+                }
+            }
+            throw new IllegalArgumentException("Unsupported protocol for document " + document);
+        }
+
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/cli/src/main/java/org/apache/any23/cli/PluginVerifier.java
----------------------------------------------------------------------
diff --git a/cli/src/main/java/org/apache/any23/cli/PluginVerifier.java b/cli/src/main/java/org/apache/any23/cli/PluginVerifier.java
new file mode 100644
index 0000000..a747b49
--- /dev/null
+++ b/cli/src/main/java/org/apache/any23/cli/PluginVerifier.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.cli;
+
+import com.beust.jcommander.Parameter;
+import com.beust.jcommander.Parameters;
+import com.beust.jcommander.converters.FileConverter;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.mime.MIMEType;
+import org.apache.any23.plugin.Any23PluginManager;
+import org.apache.any23.plugin.Author;
+import java.io.File;
+import java.io.PrintStream;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * Commandline utility to verify the <b>Any23</b> plugins
+ * and extract basic information.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+@Parameters(commandNames = { "verify" }, commandDescription = "Utility for plugin management verification.")
+public class PluginVerifier implements Tool {
+
+    private Any23PluginManager pluginManager = Any23PluginManager.getInstance();
+
+    @Parameter(
+        description = "plugins-dir",
+        converter = FileConverter.class
+    )
+    private List<File> pluginsDirs = new LinkedList<File>();
+
+    public void run() throws Exception {
+        if (pluginsDirs.isEmpty()) {
+            throw new IllegalArgumentException("No plugin directory specified.");
+        }
+
+        final File pluginsDir = pluginsDirs.get(0);
+        if (!pluginsDir.isDirectory()) {
+            throw new IllegalArgumentException("<plugins-dir> must be a valid dir.");
+        }
+
+        pluginManager.loadJARDir(pluginsDir);
+
+        final Iterator<ExtractorFactory> plugins = pluginManager.getExtractors();
+
+        while (plugins.hasNext()) {
+            printPluginData(plugins.next(), System.out);
+            System.out.println("------------------------------------------------------------------------");
+        }
+    }
+
+    private String getMimeTypesStr(Collection<MIMEType> mimeTypes) {
+        final StringBuilder sb = new StringBuilder();
+        for (MIMEType mt : mimeTypes) {
+            sb.append(mt).append(' ');
+        }
+        return sb.toString();
+    }
+
+    private void printPluginData(ExtractorFactory extractorFactory, PrintStream ps) {
+        final Author authorAnnotation = extractorFactory.getClass().getAnnotation(Author.class);
+        ps.printf("Plugin author    : %s\n", authorAnnotation == null ? "<unknown>" : authorAnnotation.name());
+        ps.printf("Plugin factory   : %s\n", extractorFactory.getClass());
+        ps.printf("Plugin mime-types: %s\n", getMimeTypesStr( extractorFactory.getSupportedMIMETypes() ));
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/cli/src/main/java/org/apache/any23/cli/Rover.java
----------------------------------------------------------------------
diff --git a/cli/src/main/java/org/apache/any23/cli/Rover.java b/cli/src/main/java/org/apache/any23/cli/Rover.java
new file mode 100644
index 0000000..26a8663
--- /dev/null
+++ b/cli/src/main/java/org/apache/any23/cli/Rover.java
@@ -0,0 +1,265 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.cli;
+
+import com.beust.jcommander.IStringConverter;
+import com.beust.jcommander.Parameter;
+import com.beust.jcommander.ParameterException;
+import com.beust.jcommander.Parameters;
+import com.beust.jcommander.converters.FileConverter;
+import org.apache.any23.Any23;
+import org.apache.any23.configuration.Configuration;
+import org.apache.any23.configuration.DefaultConfiguration;
+import org.apache.any23.extractor.ExtractionParameters;
+import org.apache.any23.extractor.ExtractionParameters.ValidationMode;
+import org.apache.any23.filter.IgnoreAccidentalRDFa;
+import org.apache.any23.filter.IgnoreTitlesOfEmptyDocuments;
+import org.apache.any23.source.DocumentSource;
+import org.apache.any23.writer.BenchmarkTripleHandler;
+import org.apache.any23.writer.LoggingTripleHandler;
+import org.apache.any23.writer.ReportingTripleHandler;
+import org.apache.any23.writer.TripleHandler;
+import org.apache.any23.writer.TripleHandlerException;
+import org.apache.any23.writer.WriterFactoryRegistry;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.PrintStream;
+import java.io.PrintWriter;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.LinkedList;
+import java.util.List;
+
+import static java.lang.String.format;
+
+/**
+ * A default rover implementation. Goes and fetches a URL using an hint
+ * as to what format should require, then tries to convert it to RDF.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ * @author Richard Cyganiak (richard@cyganiak.de)
+ * @author Gabriele Renzi
+ */
+@Parameters(commandNames = { "rover" }, commandDescription = "Any23 Command Line Tool.")
+public class Rover implements Tool {
+
+    private static final List<String> FORMATS = WriterFactoryRegistry.getInstance().getIdentifiers();
+
+    private static final int DEFAULT_FORMAT_INDEX = 0;
+
+    private static final Logger logger = LoggerFactory.getLogger(Rover.class);
+
+    @Parameter(
+       names = { "-o", "--output" },
+       description = "Specify Output file (defaults to standard output)",
+       converter = PrintStreamConverter.class
+    )
+    private PrintStream outputStream = System.out;
+
+    @Parameter(description = "input IRIs {<url>|<file>}+", converter = ArgumentToIRIConverter.class)
+    protected List<String> inputIRIs = new LinkedList<String>();
+
+    @Parameter(names = { "-e", "--extractors" }, description = "a comma-separated list of extractors, e.g. rdf-xml,rdf-turtle")
+    private List<String> extractors = new LinkedList<String>();
+
+    @Parameter(names = { "-f", "--format" }, description = "the output format")
+    private String format = FORMATS.get(DEFAULT_FORMAT_INDEX);
+
+    @Parameter(
+       names = { "-l", "--log" },
+       description = "Produce log within a file.",
+       converter = FileConverter.class
+    )
+    private File logFile = null;
+
+    @Parameter(names = { "-s", "--stats" }, description = "Print out extraction statistics.")
+    private boolean statistics;
+
+    @Parameter(names = { "-t", "--notrivial" }, description = "Filter trivial statements (e.g. CSS related ones).")
+    private boolean noTrivial;
+
+    @Parameter(names = { "-p", "--pedantic" }, description = "Validate and fixes HTML content detecting commons issues.")
+    private boolean pedantic;
+
+    @Parameter(names = { "-n", "--nesting" }, description = "Disable production of nesting triples.")
+    private boolean nestingDisabled;
+
+    @Parameter(names = { "-d", "--defaultns" }, description = "Override the default namespace used to produce statements.")
+    private String defaultns;
+
+    // non parameters
+
+    private TripleHandler tripleHandler;
+
+    private ReportingTripleHandler reportingTripleHandler;
+
+    private BenchmarkTripleHandler benchmarkTripleHandler;
+
+    private Any23 any23;
+
+    private ExtractionParameters extractionParameters;
+
+    protected void configure() {
+        try {
+            tripleHandler = WriterFactoryRegistry.getInstance().getWriterInstanceByIdentifier(format, outputStream);
+        } catch (Exception e) {
+            throw new NullPointerException(
+                    format("Invalid output format '%s', admitted values: %s",
+                        format,
+                        FORMATS
+                    )
+            );
+        }
+
+        if (logFile != null) {
+            try {
+                tripleHandler = new LoggingTripleHandler(tripleHandler, new PrintWriter(logFile));
+            } catch (FileNotFoundException fnfe) {
+                throw new IllegalArgumentException( format("Can not write to log file [%s]", logFile), fnfe );
+            }
+        }
+
+        if (statistics) {
+            benchmarkTripleHandler = new BenchmarkTripleHandler(tripleHandler);
+            tripleHandler = benchmarkTripleHandler;
+        }
+
+        if (noTrivial) {
+            tripleHandler = new IgnoreAccidentalRDFa(new IgnoreTitlesOfEmptyDocuments(tripleHandler),
+                                                     true    // suppress stylesheet triples.
+                                                     );
+        }
+
+        reportingTripleHandler = new ReportingTripleHandler(tripleHandler);
+
+        final Configuration configuration = DefaultConfiguration.singleton();
+        extractionParameters =
+                pedantic
+                        ?
+                new ExtractionParameters(configuration, ValidationMode.ValidateAndFix, nestingDisabled)
+                        :
+                new ExtractionParameters(configuration, ValidationMode.None          , nestingDisabled);
+        if (defaultns != null) {
+            extractionParameters.setProperty(ExtractionParameters.EXTRACTION_CONTEXT_IRI_PROPERTY,
+                                             defaultns);
+        }
+
+        any23 = (extractors.isEmpty()) ? new Any23()
+                                                   : new Any23(extractors.toArray(new String[extractors.size()]));
+        any23.setHTTPUserAgent(Any23.DEFAULT_HTTP_CLIENT_USER_AGENT + "/" + Any23.VERSION);
+    }
+
+    protected String printReports() {
+        final StringBuilder sb = new StringBuilder();
+        if (benchmarkTripleHandler != null) sb.append( benchmarkTripleHandler.report() ).append('\n');
+        if (reportingTripleHandler != null) sb.append( reportingTripleHandler.printReport() ).append('\n');
+        return sb.toString();
+    }
+
+    protected void performExtraction(DocumentSource documentSource) throws Exception {
+        if (!any23.extract(extractionParameters, documentSource, tripleHandler).hasMatchingExtractors()) {
+            throw new IllegalStateException(format("No suitable extractors found for source %s", documentSource));
+        }
+    }
+
+    protected void close() {
+        if (tripleHandler != null) {
+            try {
+                tripleHandler.close();
+            } catch (TripleHandlerException the) {
+                throw new RuntimeException("Error while closing TripleHandler", the);
+            }
+        }
+
+        if (outputStream != null && outputStream != System.out) { // TODO: low - find better solution to avoid closing system out.
+            outputStream.close();
+        }
+    }
+
+    public void run() throws Exception {
+        if (inputIRIs.isEmpty()) {
+            throw new IllegalArgumentException("Expected at least 1 argument.");
+        }
+
+        configure();
+
+        // perform conversions
+
+        try {
+            final long start = System.currentTimeMillis();
+            for (String inputIRI : inputIRIs) {
+                DocumentSource source = any23.createDocumentSource(inputIRI);
+
+                performExtraction( source );
+            }
+            final long elapsed = System.currentTimeMillis() - start;
+
+            if (benchmarkTripleHandler != null) {
+                System.err.println(benchmarkTripleHandler.report());
+            }
+
+            logger.info("Extractors used: " + reportingTripleHandler.getExtractorNames());
+            logger.info(reportingTripleHandler.getTotalTriples() + " triples, " + elapsed + "ms");
+        } finally {
+            close();
+        }
+    }
+
+    public static final class ArgumentToIRIConverter implements IStringConverter<String> {
+
+        @Override
+        public String convert(String uri) {
+            uri = uri.trim();
+            if (uri.toLowerCase().startsWith("http:") || uri.toLowerCase().startsWith("https:")) {
+                try {
+                    return new URL(uri).toString();
+                } catch (MalformedURLException murle) {
+                    throw new ParameterException(format("Invalid IRI: '%s': %s", uri, murle.getMessage()));
+                }
+            }
+
+            final File f = new File(uri);
+            if (!f.exists()) {
+                throw new ParameterException(format("No such file: [%s]", f.getAbsolutePath()));
+            }
+            if (f.isDirectory()) {
+                throw new ParameterException(format("Found a directory: [%s]", f.getAbsolutePath()));
+            }
+            return f.toURI().toString();
+        }
+
+    }
+
+    public static final class PrintStreamConverter implements IStringConverter<PrintStream> {
+
+        @Override
+        public PrintStream convert( String value ) {
+            final File file = new File(value);
+            try {
+                return new PrintStream(file);
+            } catch (FileNotFoundException fnfe) {
+                throw new ParameterException(format("Cannot open file '%s': %s", file, fnfe.getMessage()));
+            }
+        }
+
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/cli/src/main/java/org/apache/any23/cli/ToolRunner.java
----------------------------------------------------------------------
diff --git a/cli/src/main/java/org/apache/any23/cli/ToolRunner.java b/cli/src/main/java/org/apache/any23/cli/ToolRunner.java
new file mode 100644
index 0000000..90daeb3
--- /dev/null
+++ b/cli/src/main/java/org/apache/any23/cli/ToolRunner.java
@@ -0,0 +1,263 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.cli;
+
+import com.beust.jcommander.JCommander;
+import com.beust.jcommander.Parameter;
+import com.beust.jcommander.converters.FileConverter;
+import org.apache.any23.Any23;
+import org.apache.any23.plugin.Any23PluginManager;
+import org.apache.any23.util.LogUtils;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.util.Date;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Properties;
+
+import static java.lang.System.currentTimeMillis;
+import static java.lang.System.exit;
+
+/**
+ * This class is the main class responsible to provide a uniform command-line
+ * access points to all the others tools like {@link Rover}.
+ *
+ * @see ExtractorDocumentation
+ * @see Rover
+ */
+public final class ToolRunner {
+
+    public static final File DEFAULT_PLUGIN_DIR = new File(new File(System.getProperty("user.home")), ".any23/plugins");
+
+    private static final PrintStream infoStream = System.err;
+
+    @Parameter( names = { "-h", "--help" }, description = "Display help information." )
+    private boolean printHelp;
+
+    @Parameter( names = { "-v", "--version" }, description = "Display version information." )
+    private boolean showVersion;
+
+    @Parameter( names = { "-X", "--verbose" }, description = "Produce execution verbose output." )
+    private boolean verbose;
+
+    @Parameter(
+            names = { "--plugins-dir" },
+            description = "The Any23 plugins directory.",
+            converter = FileConverter.class
+    )
+    private File pluginsDir = DEFAULT_PLUGIN_DIR;
+
+    public static void main( String[] args ) throws Exception {
+        exit( new ToolRunner().execute( args ) );
+    }
+
+    public int execute(String...args) throws Exception {
+        JCommander commander = new JCommander(this);
+        commander.setProgramName(System.getProperty("app.name"));
+
+        // TODO (low) : this dirty solution has been introduced because it is not possible to
+        //              parse arguments ( commander.parse() ) twice.
+        final File pluginsDirOption;
+        try {
+            pluginsDirOption = parsePluginDirOption(args);
+        } catch (Exception e) {
+            System.err.println(e.getMessage());
+            return 1;
+        }
+        if(pluginsDirOption != null) {
+            pluginsDir = pluginsDirOption;
+        }
+
+        // add all plugins first
+        final Iterator<Tool> tools = getToolsInClasspath();
+        while (tools.hasNext()) {
+            Tool tool = tools.next();
+            commander.addCommand(tool);
+        }
+
+        commander.parse(args);
+
+        Map<String, JCommander> commands = commander.getCommands();
+        String parsedCommand = commander.getParsedCommand();
+
+        if (printHelp) {
+            commander.usage();
+            return 0;
+        }
+
+        if (showVersion) {
+            printVersionInfo();
+            return 0;
+        }
+
+        if(parsedCommand == null) {
+            infoStream.println("A command must be specified.");
+            commander.usage();
+            return 1;
+        }
+
+        if (verbose) {
+            LogUtils.setVerboseLogging();
+        } else {
+            LogUtils.setDefaultLogging();
+        }
+
+        long start = currentTimeMillis();
+        int exit = 0;
+
+        Throwable error = null;
+
+        // execute the parsed command
+        infoStream.println();
+        infoStream.println( "------------------------------------------------------------------------" );
+        infoStream.printf( "Apache Any23 :: %s%n", parsedCommand );
+        infoStream.println( "------------------------------------------------------------------------" );
+        infoStream.println();
+
+        try {
+            Tool.class.cast( commands.get( parsedCommand ).getObjects().get( 0 ) ).run();
+        } catch (Throwable t) {
+            exit = 1;
+            error = t;
+        } finally {
+            infoStream.println();
+            infoStream.println( "------------------------------------------------------------------------" );
+            infoStream.printf( "Apache Any23 %s%n", ( exit != 0 ) ? "FAILURE" : "SUCCESS" );
+
+            if (exit != 0) {
+                infoStream.println();
+
+                if (verbose) {
+                    System.err.println( "Execution terminated with errors:" );
+                    error.printStackTrace(infoStream);
+                } else {
+                    infoStream.printf( "Execution terminated with errors: %s%n", error.getMessage() );
+                }
+
+                infoStream.println();
+            }
+
+            infoStream.printf( "Total time: %ss%n", ( ( currentTimeMillis() - start ) / 1000 ) );
+            infoStream.printf( "Finished at: %s%n", new Date() );
+
+            final Runtime runtime = Runtime.getRuntime();
+            final int megaUnit = 1024 * 1024;
+            infoStream.printf( "Final Memory: %sM/%sM%n", ( runtime.totalMemory() - runtime.freeMemory() ) / megaUnit,
+                         runtime.totalMemory() / megaUnit );
+
+            infoStream.println( "------------------------------------------------------------------------" );
+        }
+
+        return exit;
+    }
+
+    Iterator<Tool> getToolsInClasspath() throws IOException {
+        final Any23PluginManager pluginManager =  Any23PluginManager.getInstance();
+        if (pluginsDir.exists() && pluginsDir.isDirectory()) {
+            pluginManager.loadJARDir(pluginsDir);
+        }
+        return pluginManager.getTools();
+    }
+
+    private static void printVersionInfo() {
+        Properties properties = new Properties();
+        InputStream input = ToolRunner.class.getClassLoader().getResourceAsStream( "META-INF/maven/org.apache.any23/any23-core/pom.properties" );
+
+        if ( input != null ) {
+            try {
+                properties.load( input );
+            } catch ( IOException e ) {
+                // ignore, just don't load the properties
+            } finally {
+                try {
+                    input.close();
+                } catch (IOException e) {
+                    // close quietly
+                }
+            }
+        }
+
+        infoStream.printf( "Apache Any23 %s%n", Any23.VERSION );
+        infoStream.printf( "Java version: %s, vendor: %s%n",
+                           System.getProperty( "java.version" ),
+                           System.getProperty( "java.vendor" ) );
+        infoStream.printf( "Java home: %s%n", System.getProperty( "java.home" ) );
+        infoStream.printf( "Default locale: %s_%s, platform encoding: %s%n",
+                           System.getProperty( "user.language" ),
+                           System.getProperty( "user.country" ),
+                           System.getProperty( "sun.jnu.encoding" ) );
+        infoStream.printf( "OS name: \"%s\", version: \"%s\", arch: \"%s\", family: \"%s\"%n",
+                           System.getProperty( "os.name" ),
+                           System.getProperty( "os.version" ),
+                           System.getProperty( "os.arch" ),
+                           getOsFamily() );
+    }
+
+    private static final String getOsFamily() {
+        String osName = System.getProperty( "os.name" ).toLowerCase();
+        String pathSep = System.getProperty( "path.separator" );
+
+        if (osName.contains("windows")) {
+            return "windows";
+        } else if (osName.contains("os/2")) {
+            return "os/2";
+        } else if (osName.contains("z/os") || osName.contains("os/390")) {
+            return "z/os";
+        } else if (osName.contains("os/400")) {
+            return "os/400";
+        } else if (pathSep.equals( ";" )) {
+            return "dos";
+        } else if (osName.contains("mac")) {
+            if (osName.endsWith("x")) {
+                return "mac"; // MACOSX
+            }
+            return "unix";
+        } else if (osName.contains("nonstop_kernel")) {
+            return "tandem";
+        } else if (osName.contains("openvms")) {
+            return "openvms";
+        } else if (pathSep.equals(":")) {
+            return "unix";
+        }
+
+        return "undefined";
+    }
+
+    private static File parsePluginDirOption(String[] args) {
+        int optionIndex = -1;
+        for(int i = 0; i < args.length; i++) {
+            if("--plugins-dir".equals(args[i])) {
+                optionIndex = i;
+            }
+        }
+        if(optionIndex == -1) return null;
+
+        if(optionIndex == args.length - 1) {
+            throw new IllegalArgumentException("Missing argument for --plugins-dir option.");
+        }
+        final File pluginsDir = new File( args[optionIndex + 1] );
+        if( ! pluginsDir.isDirectory() ) {
+            throw  new IllegalArgumentException("Expected a directory for --plugins-dir option value.");
+        }
+        return pluginsDir;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/cli/src/main/java/org/apache/any23/cli/VocabPrinter.java
----------------------------------------------------------------------
diff --git a/cli/src/main/java/org/apache/any23/cli/VocabPrinter.java b/cli/src/main/java/org/apache/any23/cli/VocabPrinter.java
new file mode 100644
index 0000000..7fde887
--- /dev/null
+++ b/cli/src/main/java/org/apache/any23/cli/VocabPrinter.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.cli;
+
+import org.apache.any23.vocab.RDFSchemaUtils;
+import org.eclipse.rdf4j.rio.RDFFormat;
+import org.eclipse.rdf4j.rio.RDFWriterRegistry;
+import org.eclipse.rdf4j.rio.Rio;
+
+import com.beust.jcommander.IStringConverter;
+import com.beust.jcommander.Parameter;
+import com.beust.jcommander.Parameters;
+
+/**
+ * Prints out the vocabulary <i>RDFSchema</i> as <i>NQuads</i>.
+ * 
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+@Parameters(commandNames = { "vocab" }, commandDescription = "Prints out the RDF Schema of the vocabularies used by Any23.")
+public class VocabPrinter implements Tool {
+
+    @Parameter(names = { "-f", "--format" }, description = "Vocabulary output format", converter = RDFFormatConverter.class)
+    private RDFFormat format = RDFFormat.NQUADS;
+
+    public void run() throws Exception {
+        RDFSchemaUtils.serializeVocabularies(format, System.out);
+    }
+
+    public static final class RDFFormatConverter implements
+            IStringConverter<RDFFormat> {
+
+        @Override
+        public RDFFormat convert(String value) {
+        	return RDFWriterRegistry.getInstance().getFileFormatForMIMEType(value).orElseThrow(Rio.unsupportedFormat(value));
+        }
+
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/cli/src/main/java/org/apache/any23/cli/package-info.java
----------------------------------------------------------------------
diff --git a/cli/src/main/java/org/apache/any23/cli/package-info.java b/cli/src/main/java/org/apache/any23/cli/package-info.java
new file mode 100644
index 0000000..40ae928
--- /dev/null
+++ b/cli/src/main/java/org/apache/any23/cli/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * This package contains some command-line utilities which allow users
+ * to use the main <i>Any23</i> features via <i>commandline</i> shell.
+ */
+package org.apache.any23.cli;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/cli/src/test/java/org/apache/any23/cli/ExtractorDocumentationTest.java
----------------------------------------------------------------------
diff --git a/cli/src/test/java/org/apache/any23/cli/ExtractorDocumentationTest.java b/cli/src/test/java/org/apache/any23/cli/ExtractorDocumentationTest.java
new file mode 100644
index 0000000..98616ba
--- /dev/null
+++ b/cli/src/test/java/org/apache/any23/cli/ExtractorDocumentationTest.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.cli;
+
+import org.junit.Test;
+
+/**
+ * Test case for {@link ExtractorDocumentation} CLI.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public class ExtractorDocumentationTest extends ToolTestBase {
+
+    private static final String TARGET_EXTRACTOR = "html-microdata";
+
+    public ExtractorDocumentationTest() {
+        super(ExtractorDocumentation.class);
+    }
+
+    @Test
+    public void testList() throws Exception {
+        runToolCheckExit0("--list");
+    }
+
+    @Test
+    public void testAll() throws Exception {
+        runToolCheckExit0("--all");
+    }
+
+    //@Ignore("no available example")
+    @Test
+    public void testExampleInput() throws Exception {
+        runToolCheckExit0("-i", TARGET_EXTRACTOR);
+    }
+
+    //@Ignore("no available example")
+    @Test
+    public void testExampleOutput() throws Exception {
+        runToolCheckExit0("-o", TARGET_EXTRACTOR);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/cli/src/test/java/org/apache/any23/cli/MicrodataParserTest.java
----------------------------------------------------------------------
diff --git a/cli/src/test/java/org/apache/any23/cli/MicrodataParserTest.java b/cli/src/test/java/org/apache/any23/cli/MicrodataParserTest.java
new file mode 100644
index 0000000..a80e729
--- /dev/null
+++ b/cli/src/test/java/org/apache/any23/cli/MicrodataParserTest.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.cli;
+
+import org.junit.Ignore;
+import org.junit.Test;
+
+/**
+ * Test case for {@link MicrodataParser} CLI.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public class MicrodataParserTest extends ToolTestBase {
+
+    public MicrodataParserTest() {
+        super(MicrodataParser.class);
+    }
+
+    @Test
+    public void testRunOnFile() throws Exception {
+        runToolCheckExit0("file:"+copyResourceToTempFile("/microdata/microdata-nested.html").getAbsolutePath());
+    }
+    
+    @Ignore("ANY23-140 - Revise Any23 tests to remove fetching of web content")
+    @Test
+    public void testRunOnHTTPResource() throws Exception {
+        runToolCheckExit0("http://www.imdb.com/title/tt1375666/");
+    }
+    
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/cli/src/test/java/org/apache/any23/cli/MimeDetectorTest.java
----------------------------------------------------------------------
diff --git a/cli/src/test/java/org/apache/any23/cli/MimeDetectorTest.java b/cli/src/test/java/org/apache/any23/cli/MimeDetectorTest.java
new file mode 100644
index 0000000..3894d32
--- /dev/null
+++ b/cli/src/test/java/org/apache/any23/cli/MimeDetectorTest.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.cli;
+
+import org.junit.Test;
+
+/**
+ * Test case for {@link MimeDetector} CLI.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public class MimeDetectorTest extends ToolTestBase {
+
+    public MimeDetectorTest() {
+        super(MimeDetector.class);
+    }
+
+    @Test
+    public void testDetectURL() throws Exception {
+        assumeOnlineAllowed();
+        runToolCheckExit0("http://twitter.com#micmos");
+    }
+
+    @Test
+    public void testDetectFile() throws Exception {
+        assumeOnlineAllowed();
+        runToolCheckExit0("file://"+copyResourceToTempFile("/application/trix/test1.trx").getAbsolutePath());
+    }
+
+    @Test
+    public void testDetectInline() throws Exception {
+        assumeOnlineAllowed();
+        runToolCheckExit0( new String[] {"inline://<http://s> <http://p> <http://o> ."} );
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/cli/src/test/java/org/apache/any23/cli/PluginVerifierTest.java
----------------------------------------------------------------------
diff --git a/cli/src/test/java/org/apache/any23/cli/PluginVerifierTest.java b/cli/src/test/java/org/apache/any23/cli/PluginVerifierTest.java
new file mode 100644
index 0000000..bdee9ae
--- /dev/null
+++ b/cli/src/test/java/org/apache/any23/cli/PluginVerifierTest.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.cli;
+
+import org.junit.Test;
+
+/**
+ * Test case for {@link PluginVerifier} CLI.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public class PluginVerifierTest extends ToolTestBase {
+
+    public PluginVerifierTest() {
+        super(PluginVerifier.class);
+    }
+
+    @Test
+    public void testRun() throws Exception {
+        runToolCheckExit0(".");
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/cli/src/test/java/org/apache/any23/cli/RoverTest.java
----------------------------------------------------------------------
diff --git a/cli/src/test/java/org/apache/any23/cli/RoverTest.java b/cli/src/test/java/org/apache/any23/cli/RoverTest.java
new file mode 100644
index 0000000..893220a
--- /dev/null
+++ b/cli/src/test/java/org/apache/any23/cli/RoverTest.java
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.cli;
+
+import org.apache.any23.rdf.RDFUtils;
+import org.apache.any23.util.FileUtils;
+import org.apache.any23.util.StringUtils;
+import org.apache.any23.util.URLUtils;
+import org.junit.Assert;
+import org.junit.Assume;
+import org.junit.Ignore;
+import org.junit.Test;
+import org.eclipse.rdf4j.model.Statement;
+import org.eclipse.rdf4j.rio.RDFFormat;
+
+import java.io.File;
+import java.util.Arrays;
+
+/**
+ * Test case for {@link Rover}.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+@Ignore("Twitter microdata not parsing correctly right now")
+public class RoverTest extends ToolTestBase {
+
+    private static final String[] TARGET_FILES = {
+        "/microdata/microdata-nested.html",
+        "/org/apache/any23/extractor/csv/test-semicolon.csv"
+    };
+
+    private static final String[] TARGET_URLS = {
+            "http://twitter.com/micmos",
+            "http://twitter.com/dpalmisano"
+    };
+
+    public RoverTest() {
+        super(Rover.class);
+    }
+
+    @Test
+    public void testRunMultiFiles() throws Exception {
+        
+        String[] copiedTargets = new String[TARGET_FILES.length];
+        for(int i = 0; i < TARGET_FILES.length; i++)
+        {
+            File tempFile = copyResourceToTempFile(TARGET_FILES[i]);
+            
+            copiedTargets[i] = tempFile.getAbsolutePath();
+        }
+        
+        runWithMultiSourcesAndVerify(copiedTargets, 0);
+    }
+
+    @Test
+    public void testRunWithDefaultNS() throws Exception {
+        final String DEFAULT_GRAPH = "http://test/default/ns";
+        final File outFile = File.createTempFile("rover-test", "out", tempDirectory);
+        final int exitCode = runTool(
+                String.format(
+                        "-o %s -f nquads -p -n %s -d %s",
+                        outFile.getAbsolutePath(),
+                        copyResourceToTempFile("/cli/rover-test1.nq").getAbsolutePath(),
+                        DEFAULT_GRAPH
+                )
+        );
+
+        Assert.assertEquals("Unexpected exit code.", 0, exitCode);
+        Assert.assertTrue(outFile.exists());
+        final String fileContent = FileUtils.readFileContent(outFile);
+        final String[] lines = fileContent.split("\\n");
+        int graphCounter = 0;
+        for(String line : lines) {
+            if(line.contains(DEFAULT_GRAPH)) {
+                graphCounter++;
+            }
+        }
+        Assert.assertEquals(0, graphCounter);
+    }
+
+    /* BEGIN: online tests. */
+
+    @Test
+    public void testRunMultiURLs() throws Exception {
+        // Assuming first accessibility to remote resources.
+        assumeOnlineAllowed();
+        for(String targetURL : TARGET_URLS) {
+            Assume.assumeTrue( URLUtils.isOnline(targetURL) );
+        }
+
+        runWithMultiSourcesAndVerify(TARGET_URLS, 0);
+    }
+
+    private void runWithMultiSourcesAndVerify(String[] targets, int expectedExit) throws Exception {
+        final File outFile = File.createTempFile("rover-test", "out", tempDirectory);
+        final File logFile = File.createTempFile("rover-test", "log", tempDirectory);
+
+        final int exitCode = runTool(
+                String.format(
+                        "-o %s -f nquads -l %s -p -n %s",
+                        outFile.getAbsolutePath(),
+                        logFile.getAbsolutePath(),
+                        StringUtils.join(" ", targets)
+                )
+        );
+        Assert.assertEquals("Unexpected exit code.", expectedExit, exitCode);
+
+        Assert.assertTrue(outFile.exists());
+        Assert.assertTrue(logFile.exists());
+
+        final String logFileContent = FileUtils.readFileContent(logFile);
+        Assert.assertEquals(
+                "Unexpected number of log lines.",
+                targets.length + 1,  // Header line.
+                StringUtils.countNL(logFileContent)
+        );
+
+        final String outNQuads = FileUtils.readFileContent(outFile);
+        final Statement[] statements = RDFUtils.parseRDF(RDFFormat.NQUADS, outNQuads);
+        System.out.println(Arrays.toString(statements));
+        Assert.assertTrue("Unexpected number of statements.", statements.length > 9);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/cli/src/test/java/org/apache/any23/cli/ToolRunnerTest.java
----------------------------------------------------------------------
diff --git a/cli/src/test/java/org/apache/any23/cli/ToolRunnerTest.java b/cli/src/test/java/org/apache/any23/cli/ToolRunnerTest.java
new file mode 100644
index 0000000..881a782
--- /dev/null
+++ b/cli/src/test/java/org/apache/any23/cli/ToolRunnerTest.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.cli;
+
+import junit.framework.Assert;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
+
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Test case for {@link ToolRunner}.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public class ToolRunnerTest {
+
+    private final Set<Class<? extends Tool>> coreTools = new HashSet<Class<? extends Tool>>(){{
+        add(ExtractorDocumentation.class);
+        add(MicrodataParser.class);
+        add(MimeDetector.class);
+        add(PluginVerifier.class);
+        add(Rover.class);
+        add(VocabPrinter.class);
+    }};
+
+    @Test
+    public void testGetToolsInClasspath() throws IOException {
+        Iterator<Tool> tools = new ToolRunner().getToolsInClasspath();
+        assertTrue("No core tools have been detected", tools.hasNext());
+        while (tools.hasNext()) {
+            assertTrue("Some core tools have not been detected.", coreTools.contains(tools.next().getClass()));
+        }
+    }
+
+    @Test
+    public void testGetVersion() throws Exception {
+        Assert.assertEquals(0, new ToolRunner().execute("-v") );
+    }
+
+    @Test
+    public void testGetHelp() throws Exception {
+        Assert.assertEquals(0, new ToolRunner().execute("-h") );
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/cli/src/test/java/org/apache/any23/cli/ToolTestBase.java
----------------------------------------------------------------------
diff --git a/cli/src/test/java/org/apache/any23/cli/ToolTestBase.java b/cli/src/test/java/org/apache/any23/cli/ToolTestBase.java
new file mode 100644
index 0000000..fef49cd
--- /dev/null
+++ b/cli/src/test/java/org/apache/any23/cli/ToolTestBase.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.cli;
+
+import com.beust.jcommander.Parameters;
+import org.apache.any23.Any23OnlineTestBase;
+
+import java.util.Arrays;
+
+import static java.lang.String.format;
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Base class for <i>CLI</i> related tests.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+// TODO: improve support for Tool testing, intercept i/o streams.
+public abstract class ToolTestBase extends Any23OnlineTestBase {
+
+    public static final String TOOL_RUN_METHOD = "run";
+
+    private final Class<? extends Tool> toolClazz;
+
+    protected ToolTestBase(Class<? extends Tool> tool) {
+        if (tool == null) throw new NullPointerException();
+        toolClazz = tool;
+    }
+
+    /**
+     * Runs the underlying tool.
+     *
+     * @param args tool arguments.
+     * @return the tool exit code.
+     * @throws Exception
+     */
+    protected int runTool(String... args) throws Exception {
+        final String commandName = toolClazz.getAnnotation( Parameters.class ).commandNames()[0];
+
+        final String[] enhancedArgs = new String[args.length + 1];
+        enhancedArgs[0] = commandName;
+        System.arraycopy( args, 0, enhancedArgs, 1, args.length );
+
+        return new ToolRunner().execute( enhancedArgs );
+    }
+
+    /**
+     * Runs the underlying tool.
+     *
+     * @param args args tool arguments.
+     * @return the tool exit code.
+     * @throws Exception
+     */
+    protected int runTool(String args) throws Exception {
+        return runTool(args.split(" "));
+    }
+
+    /**
+     * Runs the underlying tool and verify the exit code to <code>0</code>.
+     *
+     * @param args tool arguments.
+     * @throws Exception
+     */
+    protected void runToolCheckExit0(String... args) throws Exception {
+        assertEquals(
+                format(
+                        "Unexpected exit code for tool [%s] invoked with %s",
+                        toolClazz.getSimpleName(),
+                        Arrays.asList(args)
+                ),
+                0,
+                runTool(args)
+        );
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/cli/src/test/java/org/apache/any23/cli/VocabPrinterTest.java
----------------------------------------------------------------------
diff --git a/cli/src/test/java/org/apache/any23/cli/VocabPrinterTest.java b/cli/src/test/java/org/apache/any23/cli/VocabPrinterTest.java
new file mode 100644
index 0000000..1c841dc
--- /dev/null
+++ b/cli/src/test/java/org/apache/any23/cli/VocabPrinterTest.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.cli;
+
+import org.junit.Test;
+
+/**
+ * Test case for {@link VocabPrinter} CLI.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public class VocabPrinterTest extends ToolTestBase {
+
+    public VocabPrinterTest() {
+        super(VocabPrinter.class);
+    }
+
+    @Test
+    public void testRun() throws Exception {
+        runToolCheckExit0();
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/core/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java b/core/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java
deleted file mode 100644
index 9a0410b..0000000
--- a/core/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.any23.cli;
-
-import com.beust.jcommander.Parameter;
-import com.beust.jcommander.Parameters;
-import org.apache.any23.extractor.ExampleInputOutput;
-import org.apache.any23.extractor.ExtractionException;
-import org.apache.any23.extractor.Extractor;
-import org.apache.any23.extractor.ExtractorRegistryImpl;
-import org.apache.any23.extractor.Extractor.BlindExtractor;
-import org.apache.any23.extractor.Extractor.ContentExtractor;
-import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor;
-import org.apache.any23.extractor.ExtractorFactory;
-import org.apache.any23.extractor.ExtractorRegistry;
-
-import java.io.IOException;
-import java.util.LinkedList;
-import java.util.List;
-
-/**
- * This class provides some command-line documentation
- * about available extractors and their usage.
- */
-@Parameters( commandNames = { "extractor" }, commandDescription= "Utility for obtaining documentation about metadata extractors.")
-public class ExtractorDocumentation implements Tool {
-
-    @Parameter( names = { "-l", "--list" }, description = "shows the names of all available extractors" )
-    private boolean showList;
-
-    @Parameter( names = { "-i", "--input" }, description = "shows example input for the given extractor" )
-    private boolean showInput;
-
-    @Parameter( names = { "-o", "--outut" }, description = "shows example output for the given extractor" )
-    private boolean showOutput;
-
-    @Parameter( names = { "-a", "--all" }, description = "shows a report about all available extractors" )
-    private boolean showAll;
-
-    @Parameter( arity = 1, description = "Extractor name" )
-    private List<String> extractor = new LinkedList<String>();
-
-    public void run() throws Exception {
-        if (showList) {
-            printExtractorList(ExtractorRegistryImpl.getInstance());
-        } else if (showInput) {
-            if (extractor.isEmpty()) {
-                throw new IllegalArgumentException("Required argument for -i: extractor name");
-            }
-
-            printExampleInput(extractor.get(0), ExtractorRegistryImpl.getInstance());
-        } else if (showOutput) {
-            if (extractor.isEmpty()) {
-                throw new IllegalArgumentException("Required argument for -o: extractor name");
-            }
-
-            printExampleOutput(extractor.get(0), ExtractorRegistryImpl.getInstance());
-        } else if (showAll) {
-            printReport(ExtractorRegistryImpl.getInstance());
-        }
-    }
-
-    /**
-     * Print an error message.
-     *
-     * @param msg the error message to be printed
-     */
-    public void printError(String msg) {
-        System.err.println(msg);
-    }
-
-    /**
-     * Prints the list of all the available extractors.
-     * @param registry the {@link org.apache.any23.extractor.ExtractorRegistry}
-     * containing all extractors
-     */
-    public void printExtractorList(ExtractorRegistry registry) {
-        for (ExtractorFactory factory : registry.getExtractorGroup()) {
-            System.out.println( String.format("%25s [%15s]", factory.getExtractorName(), factory.getExtractorLabel()));
-        }
-    }
-
-    /**
-     * Prints an example of input for the provided extractor.
-     *
-     * @param extractorName the name of the extractor
-     * @param registry the {@link org.apache.any23.extractor.ExtractorRegistry}
-     * containing all extractors
-     * @throws IOException raised if no extractor is found with that name
-     */
-    public void printExampleInput(String extractorName, ExtractorRegistry registry) throws IOException {
-        ExtractorFactory<?> factory = getFactory(registry, extractorName);
-        ExampleInputOutput example = new ExampleInputOutput(factory);
-        String input = example.getExampleInput();
-        if (input == null) {
-            throw new IllegalArgumentException("Extractor " + extractorName + " provides no example input");
-        }
-        System.out.println(input);
-    }
-
-    /**
-     * Prints an output example for the given extractor.
-     *
-     * @param extractorName the extractor name
-     * @param registry the {@link org.apache.any23.extractor.ExtractorRegistry}
-     * containing all extractors
-     * @throws IOException raised if no extractor is found with that name
-     * @throws ExtractionException if there is an error duing extraction
-     */
-    public void printExampleOutput(String extractorName, ExtractorRegistry registry) throws IOException, ExtractionException {
-        ExtractorFactory<?> factory = getFactory(registry, extractorName);
-        ExampleInputOutput example = new ExampleInputOutput(factory);
-        String output = example.getExampleOutput();
-        if (output == null) {
-            throw new IllegalArgumentException("Extractor " + extractorName + " provides no example output");
-        }
-        System.out.println(output);
-    }
-
-    /**
-     * Prints a complete report on all the available extractors.
-     *
-     * @param registry the {@link org.apache.any23.extractor.ExtractorRegistry}
-     * containing all extractors
-     * @throws IOException raised if no extractor is found with that name
-     * @throws ExtractionException if there is an error duing extraction
-     */
-    public void printReport(ExtractorRegistry registry) throws IOException, ExtractionException {
-        for (String extractorName : registry.getAllNames()) {
-            ExtractorFactory<?> factory = registry.getFactory(extractorName);
-            ExampleInputOutput example = new ExampleInputOutput(factory);
-            System.out.println("Extractor: " + extractorName);
-            System.out.println("\ttype: " + getType(factory));
-            System.out.println();
-            final String exampleInput = example.getExampleInput();
-            if(exampleInput == null) {
-                System.out.println("(No Example Available)");
-            } else {
-                System.out.println("-------- Example Input  --------");
-                System.out.println(exampleInput);
-                System.out.println("-------- Example Output --------");
-                String output = example.getExampleOutput();
-                System.out.println(output == null || output.trim().length() == 0 ? "(No Output Generated)" : output);
-            }
-            System.out.println("================================");
-            System.out.println();
-        }
-    }
-
-    private ExtractorFactory<?> getFactory(ExtractorRegistry registry, String name) {
-        if (!registry.isRegisteredName(name)) {
-            throw new IllegalArgumentException("Unknown extractor name: " + name);
-        }
-        return registry.getFactory(name);
-    }
-
-    private String getType(ExtractorFactory<?> factory) {
-        Extractor<?> extractor = factory.createExtractor();
-        if (extractor instanceof BlindExtractor) {
-            return BlindExtractor.class.getSimpleName();
-        }
-        if (extractor instanceof TagSoupDOMExtractor) {
-            return TagSoupDOMExtractor.class.getSimpleName();
-        }
-        if (extractor instanceof ContentExtractor) {
-            return ContentExtractor.class.getSimpleName();
-        }
-        return "?";
-    }
-
-}