You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by an...@apache.org on 2017/01/13 22:25:58 UTC
[19/25] any23 git commit: ANY23-80 : Split out CLI into its own module
http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/core/src/main/java/org/apache/any23/cli/MicrodataParser.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/cli/MicrodataParser.java b/core/src/main/java/org/apache/any23/cli/MicrodataParser.java
deleted file mode 100644
index 19c59bf..0000000
--- a/core/src/main/java/org/apache/any23/cli/MicrodataParser.java
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.any23.cli;
-
-import com.beust.jcommander.IStringConverter;
-import com.beust.jcommander.Parameter;
-import com.beust.jcommander.ParameterException;
-import com.beust.jcommander.Parameters;
-import org.apache.any23.extractor.html.TagSoupParser;
-import org.apache.any23.http.DefaultHTTPClient;
-import org.apache.any23.source.DocumentSource;
-import org.apache.any23.source.FileDocumentSource;
-import org.apache.any23.source.HTTPDocumentSource;
-import org.apache.any23.util.StreamUtils;
-
-import java.io.File;
-import java.io.InputStream;
-import java.net.URISyntaxException;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-/**
- * Command line <i>Microdata</i> parser, accepting both files and URLs and
- * returing a <i>JSON</i> representation of the extracted metadata as described at
- * <a href="http://www.w3.org/TR/microdata/#json">Microdata JSON Specification</a>.
- *
- * @author Michele Mostarda (mostarda@fbk.eu)
- */
-@Parameters( commandNames = { "microdata" }, commandDescription = "Commandline Tool for extracting Microdata from file/HTTP source.")
-public class MicrodataParser implements Tool {
-
- private static final Pattern HTTP_DOCUMENT_PATTERN = Pattern.compile("^https?://.*");
-
- private static final Pattern FILE_DOCUMENT_PATTERN = Pattern.compile("^file:(.*)$");
-
- @Parameter(
- arity = 1,
- description = "Input document URL, {http://path/to/resource.html|file:/path/to/localFile.html}",
- converter = MicrodataParserDocumentSourceConverter.class
- )
- private List<DocumentSource> document = new LinkedList<DocumentSource>();
-
- public void run() throws Exception {
- if (document.isEmpty()) {
- throw new IllegalArgumentException("No input document URL specified");
- }
- InputStream documentInputInputStream = null;
- try {
- final DocumentSource documentSource = document.get(0);
- documentInputInputStream = documentSource.openInputStream();
- final TagSoupParser tagSoupParser = new TagSoupParser(
- documentInputInputStream,
- documentSource.getDocumentIRI()
- );
- org.apache.any23.extractor.microdata.MicrodataParser.getMicrodataAsJSON(tagSoupParser.getDOM(), System.out);
- } finally {
- if (documentInputInputStream != null) StreamUtils.closeGracefully(documentInputInputStream);
- }
- }
-
- public static final class MicrodataParserDocumentSourceConverter implements IStringConverter<DocumentSource> {
-
- @Override
- public DocumentSource convert( String value ) {
- final Matcher httpMatcher = HTTP_DOCUMENT_PATTERN.matcher(value);
- if (httpMatcher.find()) {
- try {
- return new HTTPDocumentSource(DefaultHTTPClient.createInitializedHTTPClient(), value);
- } catch ( URISyntaxException e ) {
- throw new ParameterException("Invalid source IRI: '" + value + "'");
- }
- }
- final Matcher fileMatcher = FILE_DOCUMENT_PATTERN.matcher(value);
- if (fileMatcher.find()) {
- return new FileDocumentSource( new File( fileMatcher.group(1) ) );
- }
- throw new ParameterException("Invalid source protocol: '" + value + "'");
- }
-
- }
-
-}
http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/core/src/main/java/org/apache/any23/cli/MimeDetector.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/cli/MimeDetector.java b/core/src/main/java/org/apache/any23/cli/MimeDetector.java
deleted file mode 100644
index c9072cb..0000000
--- a/core/src/main/java/org/apache/any23/cli/MimeDetector.java
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.any23.cli;
-
-import com.beust.jcommander.IStringConverter;
-import com.beust.jcommander.Parameter;
-import com.beust.jcommander.Parameters;
-import org.apache.any23.http.DefaultHTTPClient;
-import org.apache.any23.http.DefaultHTTPClientConfiguration;
-import org.apache.any23.http.HTTPClient;
-import org.apache.any23.mime.MIMEType;
-import org.apache.any23.mime.MIMETypeDetector;
-import org.apache.any23.mime.TikaMIMETypeDetector;
-import org.apache.any23.source.DocumentSource;
-import org.apache.any23.source.FileDocumentSource;
-import org.apache.any23.source.HTTPDocumentSource;
-import org.apache.any23.source.StringDocumentSource;
-
-import java.io.File;
-import java.net.URISyntaxException;
-import java.util.LinkedList;
-import java.util.List;
-
-/**
- * Commandline tool to detect <b>MIME Type</b>s from
- * file, HTTP and direct input sources.
- * The implementation of this tool is based on {@link org.apache.any23.mime.TikaMIMETypeDetector}.
- *
- * @author Michele Mostarda (mostarda@fbk.eu)
- */
-@Parameters(commandNames = { "mimes" }, commandDescription = "MIME Type Detector Tool.")
-public class MimeDetector implements Tool{
-
- public static final String FILE_DOCUMENT_PREFIX = "file://";
-
- public static final String INLINE_DOCUMENT_PREFIX = "inline://";
-
- public static final String URL_DOCUMENT_RE = "^https?://.*";
-
- @Parameter(
- arity = 1,
- description = "Input document URL, {http://path/to/resource.html|file:///path/to/local.file|inline:// some inline content}",
- converter = MimeDetectorDocumentSourceConverter.class
- )
- private List<DocumentSource> document = new LinkedList<DocumentSource>();
-
- public void run() throws Exception {
- if (document.isEmpty()) {
- throw new IllegalArgumentException("No input document URL specified");
- }
-
- final DocumentSource documentSource = document.get(0);
- final MIMETypeDetector detector = new TikaMIMETypeDetector();
- final MIMEType mimeType = detector.guessMIMEType(
- documentSource.getDocumentIRI(),
- documentSource.openInputStream(),
- MIMEType.parse(documentSource.getContentType())
- );
- System.out.println(mimeType);
- }
-
- public static final class MimeDetectorDocumentSourceConverter implements IStringConverter<DocumentSource> {
-
- @Override
- public DocumentSource convert( String document ) {
- if (document.startsWith(FILE_DOCUMENT_PREFIX)) {
- return new FileDocumentSource( new File( document.substring(FILE_DOCUMENT_PREFIX.length()) ) );
- }
- if (document.startsWith(INLINE_DOCUMENT_PREFIX)) {
- return new StringDocumentSource( document.substring(INLINE_DOCUMENT_PREFIX.length()), "" );
- }
- if (document.matches(URL_DOCUMENT_RE)) {
- final HTTPClient client = new DefaultHTTPClient();
- client.init( DefaultHTTPClientConfiguration.singleton() );
- try {
- return new HTTPDocumentSource(client, document);
- } catch ( URISyntaxException e ) {
- throw new IllegalArgumentException("Invalid source IRI: '" + document + "'");
- }
- }
- throw new IllegalArgumentException("Unsupported protocol for document " + document);
- }
-
- }
-
-}
http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/core/src/main/java/org/apache/any23/cli/PluginVerifier.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/cli/PluginVerifier.java b/core/src/main/java/org/apache/any23/cli/PluginVerifier.java
deleted file mode 100644
index a747b49..0000000
--- a/core/src/main/java/org/apache/any23/cli/PluginVerifier.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.any23.cli;
-
-import com.beust.jcommander.Parameter;
-import com.beust.jcommander.Parameters;
-import com.beust.jcommander.converters.FileConverter;
-import org.apache.any23.extractor.ExtractorFactory;
-import org.apache.any23.mime.MIMEType;
-import org.apache.any23.plugin.Any23PluginManager;
-import org.apache.any23.plugin.Author;
-import java.io.File;
-import java.io.PrintStream;
-import java.util.Collection;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-
-/**
- * Commandline utility to verify the <b>Any23</b> plugins
- * and extract basic information.
- *
- * @author Michele Mostarda (mostarda@fbk.eu)
- */
-@Parameters(commandNames = { "verify" }, commandDescription = "Utility for plugin management verification.")
-public class PluginVerifier implements Tool {
-
- private Any23PluginManager pluginManager = Any23PluginManager.getInstance();
-
- @Parameter(
- description = "plugins-dir",
- converter = FileConverter.class
- )
- private List<File> pluginsDirs = new LinkedList<File>();
-
- public void run() throws Exception {
- if (pluginsDirs.isEmpty()) {
- throw new IllegalArgumentException("No plugin directory specified.");
- }
-
- final File pluginsDir = pluginsDirs.get(0);
- if (!pluginsDir.isDirectory()) {
- throw new IllegalArgumentException("<plugins-dir> must be a valid dir.");
- }
-
- pluginManager.loadJARDir(pluginsDir);
-
- final Iterator<ExtractorFactory> plugins = pluginManager.getExtractors();
-
- while (plugins.hasNext()) {
- printPluginData(plugins.next(), System.out);
- System.out.println("------------------------------------------------------------------------");
- }
- }
-
- private String getMimeTypesStr(Collection<MIMEType> mimeTypes) {
- final StringBuilder sb = new StringBuilder();
- for (MIMEType mt : mimeTypes) {
- sb.append(mt).append(' ');
- }
- return sb.toString();
- }
-
- private void printPluginData(ExtractorFactory extractorFactory, PrintStream ps) {
- final Author authorAnnotation = extractorFactory.getClass().getAnnotation(Author.class);
- ps.printf("Plugin author : %s\n", authorAnnotation == null ? "<unknown>" : authorAnnotation.name());
- ps.printf("Plugin factory : %s\n", extractorFactory.getClass());
- ps.printf("Plugin mime-types: %s\n", getMimeTypesStr( extractorFactory.getSupportedMIMETypes() ));
- }
-
-}
http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/core/src/main/java/org/apache/any23/cli/Rover.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/cli/Rover.java b/core/src/main/java/org/apache/any23/cli/Rover.java
deleted file mode 100644
index 26a8663..0000000
--- a/core/src/main/java/org/apache/any23/cli/Rover.java
+++ /dev/null
@@ -1,265 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.any23.cli;
-
-import com.beust.jcommander.IStringConverter;
-import com.beust.jcommander.Parameter;
-import com.beust.jcommander.ParameterException;
-import com.beust.jcommander.Parameters;
-import com.beust.jcommander.converters.FileConverter;
-import org.apache.any23.Any23;
-import org.apache.any23.configuration.Configuration;
-import org.apache.any23.configuration.DefaultConfiguration;
-import org.apache.any23.extractor.ExtractionParameters;
-import org.apache.any23.extractor.ExtractionParameters.ValidationMode;
-import org.apache.any23.filter.IgnoreAccidentalRDFa;
-import org.apache.any23.filter.IgnoreTitlesOfEmptyDocuments;
-import org.apache.any23.source.DocumentSource;
-import org.apache.any23.writer.BenchmarkTripleHandler;
-import org.apache.any23.writer.LoggingTripleHandler;
-import org.apache.any23.writer.ReportingTripleHandler;
-import org.apache.any23.writer.TripleHandler;
-import org.apache.any23.writer.TripleHandlerException;
-import org.apache.any23.writer.WriterFactoryRegistry;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.PrintStream;
-import java.io.PrintWriter;
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.util.LinkedList;
-import java.util.List;
-
-import static java.lang.String.format;
-
-/**
- * A default rover implementation. Goes and fetches a URL using an hint
- * as to what format should require, then tries to convert it to RDF.
- *
- * @author Michele Mostarda (mostarda@fbk.eu)
- * @author Richard Cyganiak (richard@cyganiak.de)
- * @author Gabriele Renzi
- */
-@Parameters(commandNames = { "rover" }, commandDescription = "Any23 Command Line Tool.")
-public class Rover implements Tool {
-
- private static final List<String> FORMATS = WriterFactoryRegistry.getInstance().getIdentifiers();
-
- private static final int DEFAULT_FORMAT_INDEX = 0;
-
- private static final Logger logger = LoggerFactory.getLogger(Rover.class);
-
- @Parameter(
- names = { "-o", "--output" },
- description = "Specify Output file (defaults to standard output)",
- converter = PrintStreamConverter.class
- )
- private PrintStream outputStream = System.out;
-
- @Parameter(description = "input IRIs {<url>|<file>}+", converter = ArgumentToIRIConverter.class)
- protected List<String> inputIRIs = new LinkedList<String>();
-
- @Parameter(names = { "-e", "--extractors" }, description = "a comma-separated list of extractors, e.g. rdf-xml,rdf-turtle")
- private List<String> extractors = new LinkedList<String>();
-
- @Parameter(names = { "-f", "--format" }, description = "the output format")
- private String format = FORMATS.get(DEFAULT_FORMAT_INDEX);
-
- @Parameter(
- names = { "-l", "--log" },
- description = "Produce log within a file.",
- converter = FileConverter.class
- )
- private File logFile = null;
-
- @Parameter(names = { "-s", "--stats" }, description = "Print out extraction statistics.")
- private boolean statistics;
-
- @Parameter(names = { "-t", "--notrivial" }, description = "Filter trivial statements (e.g. CSS related ones).")
- private boolean noTrivial;
-
- @Parameter(names = { "-p", "--pedantic" }, description = "Validate and fixes HTML content detecting commons issues.")
- private boolean pedantic;
-
- @Parameter(names = { "-n", "--nesting" }, description = "Disable production of nesting triples.")
- private boolean nestingDisabled;
-
- @Parameter(names = { "-d", "--defaultns" }, description = "Override the default namespace used to produce statements.")
- private String defaultns;
-
- // non parameters
-
- private TripleHandler tripleHandler;
-
- private ReportingTripleHandler reportingTripleHandler;
-
- private BenchmarkTripleHandler benchmarkTripleHandler;
-
- private Any23 any23;
-
- private ExtractionParameters extractionParameters;
-
- protected void configure() {
- try {
- tripleHandler = WriterFactoryRegistry.getInstance().getWriterInstanceByIdentifier(format, outputStream);
- } catch (Exception e) {
- throw new NullPointerException(
- format("Invalid output format '%s', admitted values: %s",
- format,
- FORMATS
- )
- );
- }
-
- if (logFile != null) {
- try {
- tripleHandler = new LoggingTripleHandler(tripleHandler, new PrintWriter(logFile));
- } catch (FileNotFoundException fnfe) {
- throw new IllegalArgumentException( format("Can not write to log file [%s]", logFile), fnfe );
- }
- }
-
- if (statistics) {
- benchmarkTripleHandler = new BenchmarkTripleHandler(tripleHandler);
- tripleHandler = benchmarkTripleHandler;
- }
-
- if (noTrivial) {
- tripleHandler = new IgnoreAccidentalRDFa(new IgnoreTitlesOfEmptyDocuments(tripleHandler),
- true // suppress stylesheet triples.
- );
- }
-
- reportingTripleHandler = new ReportingTripleHandler(tripleHandler);
-
- final Configuration configuration = DefaultConfiguration.singleton();
- extractionParameters =
- pedantic
- ?
- new ExtractionParameters(configuration, ValidationMode.ValidateAndFix, nestingDisabled)
- :
- new ExtractionParameters(configuration, ValidationMode.None , nestingDisabled);
- if (defaultns != null) {
- extractionParameters.setProperty(ExtractionParameters.EXTRACTION_CONTEXT_IRI_PROPERTY,
- defaultns);
- }
-
- any23 = (extractors.isEmpty()) ? new Any23()
- : new Any23(extractors.toArray(new String[extractors.size()]));
- any23.setHTTPUserAgent(Any23.DEFAULT_HTTP_CLIENT_USER_AGENT + "/" + Any23.VERSION);
- }
-
- protected String printReports() {
- final StringBuilder sb = new StringBuilder();
- if (benchmarkTripleHandler != null) sb.append( benchmarkTripleHandler.report() ).append('\n');
- if (reportingTripleHandler != null) sb.append( reportingTripleHandler.printReport() ).append('\n');
- return sb.toString();
- }
-
- protected void performExtraction(DocumentSource documentSource) throws Exception {
- if (!any23.extract(extractionParameters, documentSource, tripleHandler).hasMatchingExtractors()) {
- throw new IllegalStateException(format("No suitable extractors found for source %s", documentSource));
- }
- }
-
- protected void close() {
- if (tripleHandler != null) {
- try {
- tripleHandler.close();
- } catch (TripleHandlerException the) {
- throw new RuntimeException("Error while closing TripleHandler", the);
- }
- }
-
- if (outputStream != null && outputStream != System.out) { // TODO: low - find better solution to avoid closing system out.
- outputStream.close();
- }
- }
-
- public void run() throws Exception {
- if (inputIRIs.isEmpty()) {
- throw new IllegalArgumentException("Expected at least 1 argument.");
- }
-
- configure();
-
- // perform conversions
-
- try {
- final long start = System.currentTimeMillis();
- for (String inputIRI : inputIRIs) {
- DocumentSource source = any23.createDocumentSource(inputIRI);
-
- performExtraction( source );
- }
- final long elapsed = System.currentTimeMillis() - start;
-
- if (benchmarkTripleHandler != null) {
- System.err.println(benchmarkTripleHandler.report());
- }
-
- logger.info("Extractors used: " + reportingTripleHandler.getExtractorNames());
- logger.info(reportingTripleHandler.getTotalTriples() + " triples, " + elapsed + "ms");
- } finally {
- close();
- }
- }
-
- public static final class ArgumentToIRIConverter implements IStringConverter<String> {
-
- @Override
- public String convert(String uri) {
- uri = uri.trim();
- if (uri.toLowerCase().startsWith("http:") || uri.toLowerCase().startsWith("https:")) {
- try {
- return new URL(uri).toString();
- } catch (MalformedURLException murle) {
- throw new ParameterException(format("Invalid IRI: '%s': %s", uri, murle.getMessage()));
- }
- }
-
- final File f = new File(uri);
- if (!f.exists()) {
- throw new ParameterException(format("No such file: [%s]", f.getAbsolutePath()));
- }
- if (f.isDirectory()) {
- throw new ParameterException(format("Found a directory: [%s]", f.getAbsolutePath()));
- }
- return f.toURI().toString();
- }
-
- }
-
- public static final class PrintStreamConverter implements IStringConverter<PrintStream> {
-
- @Override
- public PrintStream convert( String value ) {
- final File file = new File(value);
- try {
- return new PrintStream(file);
- } catch (FileNotFoundException fnfe) {
- throw new ParameterException(format("Cannot open file '%s': %s", file, fnfe.getMessage()));
- }
- }
-
- }
-
-}
http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/core/src/main/java/org/apache/any23/cli/ToolRunner.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/cli/ToolRunner.java b/core/src/main/java/org/apache/any23/cli/ToolRunner.java
deleted file mode 100644
index 90daeb3..0000000
--- a/core/src/main/java/org/apache/any23/cli/ToolRunner.java
+++ /dev/null
@@ -1,263 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.any23.cli;
-
-import com.beust.jcommander.JCommander;
-import com.beust.jcommander.Parameter;
-import com.beust.jcommander.converters.FileConverter;
-import org.apache.any23.Any23;
-import org.apache.any23.plugin.Any23PluginManager;
-import org.apache.any23.util.LogUtils;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.PrintStream;
-import java.util.Date;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.Properties;
-
-import static java.lang.System.currentTimeMillis;
-import static java.lang.System.exit;
-
-/**
- * This class is the main class responsible to provide a uniform command-line
- * access points to all the others tools like {@link Rover}.
- *
- * @see ExtractorDocumentation
- * @see Rover
- */
-public final class ToolRunner {
-
- public static final File DEFAULT_PLUGIN_DIR = new File(new File(System.getProperty("user.home")), ".any23/plugins");
-
- private static final PrintStream infoStream = System.err;
-
- @Parameter( names = { "-h", "--help" }, description = "Display help information." )
- private boolean printHelp;
-
- @Parameter( names = { "-v", "--version" }, description = "Display version information." )
- private boolean showVersion;
-
- @Parameter( names = { "-X", "--verbose" }, description = "Produce execution verbose output." )
- private boolean verbose;
-
- @Parameter(
- names = { "--plugins-dir" },
- description = "The Any23 plugins directory.",
- converter = FileConverter.class
- )
- private File pluginsDir = DEFAULT_PLUGIN_DIR;
-
- public static void main( String[] args ) throws Exception {
- exit( new ToolRunner().execute( args ) );
- }
-
- public int execute(String...args) throws Exception {
- JCommander commander = new JCommander(this);
- commander.setProgramName(System.getProperty("app.name"));
-
- // TODO (low) : this dirty solution has been introduced because it is not possible to
- // parse arguments ( commander.parse() ) twice.
- final File pluginsDirOption;
- try {
- pluginsDirOption = parsePluginDirOption(args);
- } catch (Exception e) {
- System.err.println(e.getMessage());
- return 1;
- }
- if(pluginsDirOption != null) {
- pluginsDir = pluginsDirOption;
- }
-
- // add all plugins first
- final Iterator<Tool> tools = getToolsInClasspath();
- while (tools.hasNext()) {
- Tool tool = tools.next();
- commander.addCommand(tool);
- }
-
- commander.parse(args);
-
- Map<String, JCommander> commands = commander.getCommands();
- String parsedCommand = commander.getParsedCommand();
-
- if (printHelp) {
- commander.usage();
- return 0;
- }
-
- if (showVersion) {
- printVersionInfo();
- return 0;
- }
-
- if(parsedCommand == null) {
- infoStream.println("A command must be specified.");
- commander.usage();
- return 1;
- }
-
- if (verbose) {
- LogUtils.setVerboseLogging();
- } else {
- LogUtils.setDefaultLogging();
- }
-
- long start = currentTimeMillis();
- int exit = 0;
-
- Throwable error = null;
-
- // execute the parsed command
- infoStream.println();
- infoStream.println( "------------------------------------------------------------------------" );
- infoStream.printf( "Apache Any23 :: %s%n", parsedCommand );
- infoStream.println( "------------------------------------------------------------------------" );
- infoStream.println();
-
- try {
- Tool.class.cast( commands.get( parsedCommand ).getObjects().get( 0 ) ).run();
- } catch (Throwable t) {
- exit = 1;
- error = t;
- } finally {
- infoStream.println();
- infoStream.println( "------------------------------------------------------------------------" );
- infoStream.printf( "Apache Any23 %s%n", ( exit != 0 ) ? "FAILURE" : "SUCCESS" );
-
- if (exit != 0) {
- infoStream.println();
-
- if (verbose) {
- System.err.println( "Execution terminated with errors:" );
- error.printStackTrace(infoStream);
- } else {
- infoStream.printf( "Execution terminated with errors: %s%n", error.getMessage() );
- }
-
- infoStream.println();
- }
-
- infoStream.printf( "Total time: %ss%n", ( ( currentTimeMillis() - start ) / 1000 ) );
- infoStream.printf( "Finished at: %s%n", new Date() );
-
- final Runtime runtime = Runtime.getRuntime();
- final int megaUnit = 1024 * 1024;
- infoStream.printf( "Final Memory: %sM/%sM%n", ( runtime.totalMemory() - runtime.freeMemory() ) / megaUnit,
- runtime.totalMemory() / megaUnit );
-
- infoStream.println( "------------------------------------------------------------------------" );
- }
-
- return exit;
- }
-
- Iterator<Tool> getToolsInClasspath() throws IOException {
- final Any23PluginManager pluginManager = Any23PluginManager.getInstance();
- if (pluginsDir.exists() && pluginsDir.isDirectory()) {
- pluginManager.loadJARDir(pluginsDir);
- }
- return pluginManager.getTools();
- }
-
- private static void printVersionInfo() {
- Properties properties = new Properties();
- InputStream input = ToolRunner.class.getClassLoader().getResourceAsStream( "META-INF/maven/org.apache.any23/any23-core/pom.properties" );
-
- if ( input != null ) {
- try {
- properties.load( input );
- } catch ( IOException e ) {
- // ignore, just don't load the properties
- } finally {
- try {
- input.close();
- } catch (IOException e) {
- // close quietly
- }
- }
- }
-
- infoStream.printf( "Apache Any23 %s%n", Any23.VERSION );
- infoStream.printf( "Java version: %s, vendor: %s%n",
- System.getProperty( "java.version" ),
- System.getProperty( "java.vendor" ) );
- infoStream.printf( "Java home: %s%n", System.getProperty( "java.home" ) );
- infoStream.printf( "Default locale: %s_%s, platform encoding: %s%n",
- System.getProperty( "user.language" ),
- System.getProperty( "user.country" ),
- System.getProperty( "sun.jnu.encoding" ) );
- infoStream.printf( "OS name: \"%s\", version: \"%s\", arch: \"%s\", family: \"%s\"%n",
- System.getProperty( "os.name" ),
- System.getProperty( "os.version" ),
- System.getProperty( "os.arch" ),
- getOsFamily() );
- }
-
- private static final String getOsFamily() {
- String osName = System.getProperty( "os.name" ).toLowerCase();
- String pathSep = System.getProperty( "path.separator" );
-
- if (osName.contains("windows")) {
- return "windows";
- } else if (osName.contains("os/2")) {
- return "os/2";
- } else if (osName.contains("z/os") || osName.contains("os/390")) {
- return "z/os";
- } else if (osName.contains("os/400")) {
- return "os/400";
- } else if (pathSep.equals( ";" )) {
- return "dos";
- } else if (osName.contains("mac")) {
- if (osName.endsWith("x")) {
- return "mac"; // MACOSX
- }
- return "unix";
- } else if (osName.contains("nonstop_kernel")) {
- return "tandem";
- } else if (osName.contains("openvms")) {
- return "openvms";
- } else if (pathSep.equals(":")) {
- return "unix";
- }
-
- return "undefined";
- }
-
- private static File parsePluginDirOption(String[] args) {
- int optionIndex = -1;
- for(int i = 0; i < args.length; i++) {
- if("--plugins-dir".equals(args[i])) {
- optionIndex = i;
- }
- }
- if(optionIndex == -1) return null;
-
- if(optionIndex == args.length - 1) {
- throw new IllegalArgumentException("Missing argument for --plugins-dir option.");
- }
- final File pluginsDir = new File( args[optionIndex + 1] );
- if( ! pluginsDir.isDirectory() ) {
- throw new IllegalArgumentException("Expected a directory for --plugins-dir option value.");
- }
- return pluginsDir;
- }
-
-}
http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/core/src/main/java/org/apache/any23/cli/VocabPrinter.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/cli/VocabPrinter.java b/core/src/main/java/org/apache/any23/cli/VocabPrinter.java
deleted file mode 100644
index 7fde887..0000000
--- a/core/src/main/java/org/apache/any23/cli/VocabPrinter.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.any23.cli;
-
-import org.apache.any23.vocab.RDFSchemaUtils;
-import org.eclipse.rdf4j.rio.RDFFormat;
-import org.eclipse.rdf4j.rio.RDFWriterRegistry;
-import org.eclipse.rdf4j.rio.Rio;
-
-import com.beust.jcommander.IStringConverter;
-import com.beust.jcommander.Parameter;
-import com.beust.jcommander.Parameters;
-
-/**
- * Prints out the vocabulary <i>RDFSchema</i> as <i>NQuads</i>.
- *
- * @author Michele Mostarda (mostarda@fbk.eu)
- */
-@Parameters(commandNames = { "vocab" }, commandDescription = "Prints out the RDF Schema of the vocabularies used by Any23.")
-public class VocabPrinter implements Tool {
-
- @Parameter(names = { "-f", "--format" }, description = "Vocabulary output format", converter = RDFFormatConverter.class)
- private RDFFormat format = RDFFormat.NQUADS;
-
- public void run() throws Exception {
- RDFSchemaUtils.serializeVocabularies(format, System.out);
- }
-
- public static final class RDFFormatConverter implements
- IStringConverter<RDFFormat> {
-
- @Override
- public RDFFormat convert(String value) {
- return RDFWriterRegistry.getInstance().getFileFormatForMIMEType(value).orElseThrow(Rio.unsupportedFormat(value));
- }
-
- }
-
-}
http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/core/src/main/java/org/apache/any23/cli/package-info.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/cli/package-info.java b/core/src/main/java/org/apache/any23/cli/package-info.java
deleted file mode 100644
index 40ae928..0000000
--- a/core/src/main/java/org/apache/any23/cli/package-info.java
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * This package contains some command-line utilities which allow users
- * to use the main <i>Any23</i> features via <i>commandline</i> shell.
- */
-package org.apache.any23.cli;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/core/src/test/java/org/apache/any23/cli/ExtractorDocumentationTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/cli/ExtractorDocumentationTest.java b/core/src/test/java/org/apache/any23/cli/ExtractorDocumentationTest.java
deleted file mode 100644
index 98616ba..0000000
--- a/core/src/test/java/org/apache/any23/cli/ExtractorDocumentationTest.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.any23.cli;
-
-import org.junit.Test;
-
-/**
- * Test case for {@link ExtractorDocumentation} CLI.
- *
- * @author Michele Mostarda (mostarda@fbk.eu)
- */
-public class ExtractorDocumentationTest extends ToolTestBase {
-
- private static final String TARGET_EXTRACTOR = "html-microdata";
-
- public ExtractorDocumentationTest() {
- super(ExtractorDocumentation.class);
- }
-
- @Test
- public void testList() throws Exception {
- runToolCheckExit0("--list");
- }
-
- @Test
- public void testAll() throws Exception {
- runToolCheckExit0("--all");
- }
-
- //@Ignore("no available example")
- @Test
- public void testExampleInput() throws Exception {
- runToolCheckExit0("-i", TARGET_EXTRACTOR);
- }
-
- //@Ignore("no available example")
- @Test
- public void testExampleOutput() throws Exception {
- runToolCheckExit0("-o", TARGET_EXTRACTOR);
- }
-
-}
http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/core/src/test/java/org/apache/any23/cli/MicrodataParserTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/cli/MicrodataParserTest.java b/core/src/test/java/org/apache/any23/cli/MicrodataParserTest.java
deleted file mode 100644
index a80e729..0000000
--- a/core/src/test/java/org/apache/any23/cli/MicrodataParserTest.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.any23.cli;
-
-import org.junit.Ignore;
-import org.junit.Test;
-
-/**
- * Test case for {@link MicrodataParser} CLI.
- *
- * @author Michele Mostarda (mostarda@fbk.eu)
- */
-public class MicrodataParserTest extends ToolTestBase {
-
- public MicrodataParserTest() {
- super(MicrodataParser.class);
- }
-
- @Test
- public void testRunOnFile() throws Exception {
- runToolCheckExit0("file:"+copyResourceToTempFile("/microdata/microdata-nested.html").getAbsolutePath());
- }
-
- @Ignore("ANY23-140 - Revise Any23 tests to remove fetching of web content")
- @Test
- public void testRunOnHTTPResource() throws Exception {
- runToolCheckExit0("http://www.imdb.com/title/tt1375666/");
- }
-
-
-}
http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/core/src/test/java/org/apache/any23/cli/MimeDetectorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/cli/MimeDetectorTest.java b/core/src/test/java/org/apache/any23/cli/MimeDetectorTest.java
deleted file mode 100644
index 3894d32..0000000
--- a/core/src/test/java/org/apache/any23/cli/MimeDetectorTest.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.any23.cli;
-
-import org.junit.Test;
-
-/**
- * Test case for {@link MimeDetector} CLI.
- *
- * @author Michele Mostarda (mostarda@fbk.eu)
- */
-public class MimeDetectorTest extends ToolTestBase {
-
- public MimeDetectorTest() {
- super(MimeDetector.class);
- }
-
- @Test
- public void testDetectURL() throws Exception {
- assumeOnlineAllowed();
- runToolCheckExit0("http://twitter.com#micmos");
- }
-
- @Test
- public void testDetectFile() throws Exception {
- assumeOnlineAllowed();
- runToolCheckExit0("file://"+copyResourceToTempFile("/application/trix/test1.trx").getAbsolutePath());
- }
-
- @Test
- public void testDetectInline() throws Exception {
- assumeOnlineAllowed();
- runToolCheckExit0( new String[] {"inline://<http://s> <http://p> <http://o> ."} );
- }
-
-}
http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/core/src/test/java/org/apache/any23/cli/PluginVerifierTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/cli/PluginVerifierTest.java b/core/src/test/java/org/apache/any23/cli/PluginVerifierTest.java
deleted file mode 100644
index bdee9ae..0000000
--- a/core/src/test/java/org/apache/any23/cli/PluginVerifierTest.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.any23.cli;
-
-import org.junit.Test;
-
-/**
- * Test case for {@link PluginVerifier} CLI.
- *
- * @author Michele Mostarda (mostarda@fbk.eu)
- */
-public class PluginVerifierTest extends ToolTestBase {
-
- public PluginVerifierTest() {
- super(PluginVerifier.class);
- }
-
- @Test
- public void testRun() throws Exception {
- runToolCheckExit0(".");
- }
-
-}
http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/core/src/test/java/org/apache/any23/cli/RoverTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/cli/RoverTest.java b/core/src/test/java/org/apache/any23/cli/RoverTest.java
deleted file mode 100644
index 893220a..0000000
--- a/core/src/test/java/org/apache/any23/cli/RoverTest.java
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.any23.cli;
-
-import org.apache.any23.rdf.RDFUtils;
-import org.apache.any23.util.FileUtils;
-import org.apache.any23.util.StringUtils;
-import org.apache.any23.util.URLUtils;
-import org.junit.Assert;
-import org.junit.Assume;
-import org.junit.Ignore;
-import org.junit.Test;
-import org.eclipse.rdf4j.model.Statement;
-import org.eclipse.rdf4j.rio.RDFFormat;
-
-import java.io.File;
-import java.util.Arrays;
-
-/**
- * Test case for {@link Rover}.
- *
- * @author Michele Mostarda (mostarda@fbk.eu)
- */
-@Ignore("Twitter microdata not parsing correctly right now")
-public class RoverTest extends ToolTestBase {
-
- private static final String[] TARGET_FILES = {
- "/microdata/microdata-nested.html",
- "/org/apache/any23/extractor/csv/test-semicolon.csv"
- };
-
- private static final String[] TARGET_URLS = {
- "http://twitter.com/micmos",
- "http://twitter.com/dpalmisano"
- };
-
- public RoverTest() {
- super(Rover.class);
- }
-
- @Test
- public void testRunMultiFiles() throws Exception {
-
- String[] copiedTargets = new String[TARGET_FILES.length];
- for(int i = 0; i < TARGET_FILES.length; i++)
- {
- File tempFile = copyResourceToTempFile(TARGET_FILES[i]);
-
- copiedTargets[i] = tempFile.getAbsolutePath();
- }
-
- runWithMultiSourcesAndVerify(copiedTargets, 0);
- }
-
- @Test
- public void testRunWithDefaultNS() throws Exception {
- final String DEFAULT_GRAPH = "http://test/default/ns";
- final File outFile = File.createTempFile("rover-test", "out", tempDirectory);
- final int exitCode = runTool(
- String.format(
- "-o %s -f nquads -p -n %s -d %s",
- outFile.getAbsolutePath(),
- copyResourceToTempFile("/cli/rover-test1.nq").getAbsolutePath(),
- DEFAULT_GRAPH
- )
- );
-
- Assert.assertEquals("Unexpected exit code.", 0, exitCode);
- Assert.assertTrue(outFile.exists());
- final String fileContent = FileUtils.readFileContent(outFile);
- final String[] lines = fileContent.split("\\n");
- int graphCounter = 0;
- for(String line : lines) {
- if(line.contains(DEFAULT_GRAPH)) {
- graphCounter++;
- }
- }
- Assert.assertEquals(0, graphCounter);
- }
-
- /* BEGIN: online tests. */
-
- @Test
- public void testRunMultiURLs() throws Exception {
- // Assuming first accessibility to remote resources.
- assumeOnlineAllowed();
- for(String targetURL : TARGET_URLS) {
- Assume.assumeTrue( URLUtils.isOnline(targetURL) );
- }
-
- runWithMultiSourcesAndVerify(TARGET_URLS, 0);
- }
-
- private void runWithMultiSourcesAndVerify(String[] targets, int expectedExit) throws Exception {
- final File outFile = File.createTempFile("rover-test", "out", tempDirectory);
- final File logFile = File.createTempFile("rover-test", "log", tempDirectory);
-
- final int exitCode = runTool(
- String.format(
- "-o %s -f nquads -l %s -p -n %s",
- outFile.getAbsolutePath(),
- logFile.getAbsolutePath(),
- StringUtils.join(" ", targets)
- )
- );
- Assert.assertEquals("Unexpected exit code.", expectedExit, exitCode);
-
- Assert.assertTrue(outFile.exists());
- Assert.assertTrue(logFile.exists());
-
- final String logFileContent = FileUtils.readFileContent(logFile);
- Assert.assertEquals(
- "Unexpected number of log lines.",
- targets.length + 1, // Header line.
- StringUtils.countNL(logFileContent)
- );
-
- final String outNQuads = FileUtils.readFileContent(outFile);
- final Statement[] statements = RDFUtils.parseRDF(RDFFormat.NQUADS, outNQuads);
- System.out.println(Arrays.toString(statements));
- Assert.assertTrue("Unexpected number of statements.", statements.length > 9);
- }
-
-}
http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/core/src/test/java/org/apache/any23/cli/ToolRunnerTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/cli/ToolRunnerTest.java b/core/src/test/java/org/apache/any23/cli/ToolRunnerTest.java
deleted file mode 100644
index 881a782..0000000
--- a/core/src/test/java/org/apache/any23/cli/ToolRunnerTest.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.any23.cli;
-
-import junit.framework.Assert;
-import org.junit.Test;
-
-import java.io.IOException;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.Set;
-
-import static org.junit.Assert.assertTrue;
-
-/**
- * Test case for {@link ToolRunner}.
- *
- * @author Michele Mostarda (mostarda@fbk.eu)
- */
-public class ToolRunnerTest {
-
- private final Set<Class<? extends Tool>> coreTools = new HashSet<Class<? extends Tool>>(){{
- add(ExtractorDocumentation.class);
- add(MicrodataParser.class);
- add(MimeDetector.class);
- add(PluginVerifier.class);
- add(Rover.class);
- add(VocabPrinter.class);
- }};
-
- @Test
- public void testGetToolsInClasspath() throws IOException {
- Iterator<Tool> tools = new ToolRunner().getToolsInClasspath();
- assertTrue("No core tools have been detected", tools.hasNext());
- while (tools.hasNext()) {
- assertTrue("Some core tools have not been detected.", coreTools.contains(tools.next().getClass()));
- }
- }
-
- @Test
- public void testGetVersion() throws Exception {
- Assert.assertEquals(0, new ToolRunner().execute("-v") );
- }
-
- @Test
- public void testGetHelp() throws Exception {
- Assert.assertEquals(0, new ToolRunner().execute("-h") );
- }
-
-}
http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/core/src/test/java/org/apache/any23/cli/ToolTestBase.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/cli/ToolTestBase.java b/core/src/test/java/org/apache/any23/cli/ToolTestBase.java
deleted file mode 100644
index fef49cd..0000000
--- a/core/src/test/java/org/apache/any23/cli/ToolTestBase.java
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.any23.cli;
-
-import com.beust.jcommander.Parameters;
-import org.apache.any23.Any23OnlineTestBase;
-
-import java.util.Arrays;
-
-import static java.lang.String.format;
-import static org.junit.Assert.assertEquals;
-
-/**
- * Base class for <i>CLI</i> related tests.
- *
- * @author Michele Mostarda (mostarda@fbk.eu)
- */
-// TODO: improve support for Tool testing, intercept i/o streams.
-public abstract class ToolTestBase extends Any23OnlineTestBase {
-
- public static final String TOOL_RUN_METHOD = "run";
-
- private final Class<? extends Tool> toolClazz;
-
- protected ToolTestBase(Class<? extends Tool> tool) {
- if (tool == null) throw new NullPointerException();
- toolClazz = tool;
- }
-
- /**
- * Runs the underlying tool.
- *
- * @param args tool arguments.
- * @return the tool exit code.
- * @throws Exception
- */
- protected int runTool(String... args) throws Exception {
- final String commandName = toolClazz.getAnnotation( Parameters.class ).commandNames()[0];
-
- final String[] enhancedArgs = new String[args.length + 1];
- enhancedArgs[0] = commandName;
- System.arraycopy( args, 0, enhancedArgs, 1, args.length );
-
- return new ToolRunner().execute( enhancedArgs );
- }
-
- /**
- * Runs the underlying tool.
- *
- * @param args args tool arguments.
- * @return the tool exit code.
- * @throws Exception
- */
- protected int runTool(String args) throws Exception {
- return runTool(args.split(" "));
- }
-
- /**
- * Runs the underlying tool and verify the exit code to <code>0</code>.
- *
- * @param args tool arguments.
- * @throws Exception
- */
- protected void runToolCheckExit0(String... args) throws Exception {
- assertEquals(
- format(
- "Unexpected exit code for tool [%s] invoked with %s",
- toolClazz.getSimpleName(),
- Arrays.asList(args)
- ),
- 0,
- runTool(args)
- );
- }
-
-}
http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/core/src/test/java/org/apache/any23/cli/VocabPrinterTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/cli/VocabPrinterTest.java b/core/src/test/java/org/apache/any23/cli/VocabPrinterTest.java
deleted file mode 100644
index 1c841dc..0000000
--- a/core/src/test/java/org/apache/any23/cli/VocabPrinterTest.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.any23.cli;
-
-import org.junit.Test;
-
-/**
- * Test case for {@link VocabPrinter} CLI.
- *
- * @author Michele Mostarda (mostarda@fbk.eu)
- */
-public class VocabPrinterTest extends ToolTestBase {
-
- public VocabPrinterTest() {
- super(VocabPrinter.class);
- }
-
- @Test
- public void testRun() throws Exception {
- runToolCheckExit0();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/plugins/basic-crawler/pom.xml
----------------------------------------------------------------------
diff --git a/plugins/basic-crawler/pom.xml b/plugins/basic-crawler/pom.xml
index bffd7e2..d5a5b05 100644
--- a/plugins/basic-crawler/pom.xml
+++ b/plugins/basic-crawler/pom.xml
@@ -27,7 +27,6 @@
<groupId>org.apache.any23.plugins</groupId>
<artifactId>apache-any23-basic-crawler</artifactId>
- <version>1.0.6-SNAPSHOT</version>
<name>Apache Any23 :: Plugins :: Basic Crawler</name>
<description>Any23 plugin for crawling sites.</description>
@@ -56,6 +55,21 @@
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>org.apache.any23</groupId>
+ <artifactId>apache-any23-cli</artifactId>
+ <version>2.0-SNAPSHOT</version>
+ <scope>provided</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.any23</groupId>
+ <artifactId>apache-any23-cli</artifactId>
+ <version>2.0-SNAPSHOT</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+
<!-- Crawler4j -->
<dependency>
<groupId>edu.uci.ics</groupId>
http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/plugins/html-scraper/pom.xml
----------------------------------------------------------------------
diff --git a/plugins/html-scraper/pom.xml b/plugins/html-scraper/pom.xml
index e04651a..359b08a 100644
--- a/plugins/html-scraper/pom.xml
+++ b/plugins/html-scraper/pom.xml
@@ -27,7 +27,6 @@
<groupId>org.apache.any23.plugins</groupId>
<artifactId>apache-any23-html-scraper</artifactId>
- <version>1.0.7-SNAPSHOT</version>
<name>Apache Any23 :: Plugins :: HTML Scraper</name>
<description>Any23 plugin for scraping HTML code.</description>
http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/plugins/office-scraper/pom.xml
----------------------------------------------------------------------
diff --git a/plugins/office-scraper/pom.xml b/plugins/office-scraper/pom.xml
index db5e3ef..40476a3 100644
--- a/plugins/office-scraper/pom.xml
+++ b/plugins/office-scraper/pom.xml
@@ -27,7 +27,6 @@
<groupId>org.apache.any23.plugins</groupId>
<artifactId>apache-any23-office-scraper</artifactId>
- <version>1.0.6-SNAPSHOT</version>
<name>Apache Any23 :: Plugins :: Office Scraper</name>
<description>Any23 plugin for scraping metadata from MS Office related file formats.</description>
http://git-wip-us.apache.org/repos/asf/any23/blob/242b130b/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 0a8c69b..db0448b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -203,6 +203,7 @@
<module>mime</module>
<module>encoding</module>
<module>core</module>
+ <module>cli</module>
<module>plugins/basic-crawler</module>
<module>plugins/html-scraper</module>
<module>plugins/office-scraper</module>