You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by ha...@apache.org on 2018/10/23 19:20:18 UTC
[2/2] any23 git commit: ANY23-396 Overhaul WriterFactory API
ANY23-396 Overhaul WriterFactory API
Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/692c583f
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/692c583f
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/692c583f
Branch: refs/heads/master
Commit: 692c583f848c5b7ae5a7940c857bfb0a9542c0d5
Parents: 12640a9
Author: Hans <fi...@gmail.com>
Authored: Fri Sep 14 10:29:33 2018 -0500
Committer: Hans <fi...@gmail.com>
Committed: Tue Oct 23 14:14:26 2018 -0500
----------------------------------------------------------------------
api/pom.xml | 6 +
.../org/apache/any23/configuration/Setting.java | 269 ++++++++++++++++++
.../apache/any23/configuration/Settings.java | 156 +++++++++++
.../any23/writer/DecoratingWriterFactory.java | 45 +++
.../org/apache/any23/writer/TripleFormat.java | 239 ++++++++++++++++
.../org/apache/any23/writer/TripleWriter.java | 60 ++++
.../any23/writer/TripleWriterFactory.java | 128 +++++++++
.../org/apache/any23/writer/WriterFactory.java | 45 ++-
.../any23/writer/WriterFactoryRegistry.java | 271 ++++++++++++-------
.../any23/configuration/SettingsTest.java | 227 ++++++++++++++++
.../apache/any23/writer/TripleFormatTest.java | 57 ++++
.../main/java/org/apache/any23/cli/Rover.java | 80 +++++-
.../apache/any23/cli/ExtractorsFlowTest.java | 93 +++++++
.../java/org/apache/any23/cli/RoverTest.java | 39 +++
.../apache/any23/cli/flows/PeopleExtractor.java | 113 ++++++++
.../any23/cli/flows/PeopleExtractorFactory.java | 39 +++
.../org.apache.any23.writer.WriterFactory | 1 +
.../org/apache/any23/writer/JSONLDWriter.java | 36 ++-
.../any23/writer/JSONLDWriterFactory.java | 22 +-
.../org/apache/any23/writer/JSONWriter.java | 57 ++--
.../apache/any23/writer/JSONWriterFactory.java | 30 +-
.../org/apache/any23/writer/NQuadsWriter.java | 35 ++-
.../any23/writer/NQuadsWriterFactory.java | 26 +-
.../org/apache/any23/writer/NTriplesWriter.java | 31 ++-
.../any23/writer/NTriplesWriterFactory.java | 26 +-
.../any23/writer/RDFWriterTripleHandler.java | 104 +++++--
.../org/apache/any23/writer/RDFXMLWriter.java | 31 ++-
.../any23/writer/RDFXMLWriterFactory.java | 26 +-
.../org/apache/any23/writer/TriXWriter.java | 29 +-
.../apache/any23/writer/TriXWriterFactory.java | 24 +-
.../any23/writer/TripleWriterHandler.java | 114 ++++++++
.../org/apache/any23/writer/TurtleWriter.java | 55 +++-
.../any23/writer/TurtleWriterFactory.java | 24 +-
.../org/apache/any23/writer/URIListWriter.java | 66 ++---
.../any23/writer/URIListWriterFactory.java | 21 +-
.../org/apache/any23/writer/WriterSettings.java | 59 ++++
.../org/apache/any23/writer/package-info.java | 2 +-
.../org/apache/any23/writer/JSONWriterTest.java | 40 ++-
.../apache/any23/writer/WriterRegistryTest.java | 16 +-
.../org/apache/any23/servlet/WebResponder.java | 18 +-
.../resources/cli/basic-with-stylesheet.html | 29 ++
41 files changed, 2446 insertions(+), 343 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/any23/blob/692c583f/api/pom.xml
----------------------------------------------------------------------
diff --git a/api/pom.xml b/api/pom.xml
index ae275bd..748db36 100644
--- a/api/pom.xml
+++ b/api/pom.xml
@@ -43,6 +43,12 @@
<groupId>org.eclipse.rdf4j</groupId>
<artifactId>rdf4j-rio-api</artifactId>
</dependency>
+
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
</dependencies>
<build>
http://git-wip-us.apache.org/repos/asf/any23/blob/692c583f/api/src/main/java/org/apache/any23/configuration/Setting.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/org/apache/any23/configuration/Setting.java b/api/src/main/java/org/apache/any23/configuration/Setting.java
new file mode 100644
index 0000000..6932afd
--- /dev/null
+++ b/api/src/main/java/org/apache/any23/configuration/Setting.java
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.configuration;
+
+import java.lang.reflect.GenericArrayType;
+import java.lang.reflect.ParameterizedType;
+import java.lang.reflect.Type;
+import java.lang.reflect.TypeVariable;
+import java.util.HashMap;
+import java.util.Optional;
+import java.util.regex.Pattern;
+
+/**
+ * Represents a {@link Setting.Key Key} paired with a compatible value.
+ *
+ * @author Hans Brende (hansbrende@apache.org)
+ */
+public final class Setting<V> {
+
+ /**
+ * Convenience method for creating a new setting key with the specified identifier and value class.
+ * If the desired value type is a {@link ParameterizedType} such as {@code List<String>},
+ * or custom value-checking is required, then this method is not appropriate; instead,
+ * extend the {@link Key} class directly.
+ *
+ * @param identifier a unique identifier for this key
+ * @param valueType the type of value allowed by this key
+ * @return a new {@link Key} instance initialized with the specified identifier and value type
+ * @throws IllegalArgumentException if the identifier or value type is invalid
+ */
+ public static <V> Key<V> newKey(String identifier, Class<V> valueType) {
+ return new Key<V>(identifier, valueType) {};
+ }
+
+ /**
+ * Represents the key for a {@link Setting}.
+ */
+ public static abstract class Key<V> {
+ private final String identifier;
+ private final Type valueType;
+
+ private Key(String identifier, Class<V> valueType) {
+ this.identifier = checkIdentifier(identifier);
+ if ((this.valueType = valueType) == null) {
+ throw new IllegalArgumentException("value type cannot be null");
+ }
+
+ if (valueType.isArray()) {
+ throw new IllegalArgumentException(identifier + " value class must be immutable");
+ } else if (valueType.getTypeParameters().length != 0) {
+ throw new IllegalArgumentException(identifier + " setting key must fill in type parameters for " + valueType.toGenericString());
+ } else if (valueType.isPrimitive()) {
+ //ensure using primitive wrapper classes
+ //so that Class.isInstance(), etc. will work as expected
+ throw new IllegalArgumentException(identifier + " value class cannot be primitive");
+ }
+ }
+
+ private static final Pattern identifierPattern = Pattern.compile("[a-z][0-9a-z]*(\\.[a-z][0-9a-z]*)*");
+ private static String checkIdentifier(String identifier) {
+ if (identifier == null) {
+ throw new IllegalArgumentException("identifier cannot be null");
+ }
+ if (!identifierPattern.matcher(identifier).matches()) {
+ throw new IllegalArgumentException("identifier does not match " + identifierPattern.pattern());
+ }
+ return identifier;
+ }
+
+ /**
+ * Constructs a new key with the specified identifier.
+ * @param identifier the identifier for this key
+ * @throws IllegalArgumentException if the identifier is invalid, or the value type was determined to be invalid
+ */
+ protected Key(String identifier) {
+ this.identifier = checkIdentifier(identifier);
+
+ Type type = valueType = getValueType();
+
+ if (type instanceof Class) {
+ if (((Class) type).isArray()) {
+ throw new IllegalArgumentException(identifier + " value class must be immutable");
+ } else if (((Class) type).getTypeParameters().length != 0) {
+ throw new IllegalArgumentException(identifier + " setting key must fill in type parameters for " + ((Class) type).toGenericString());
+ }
+ } else if (type instanceof GenericArrayType) {
+ throw new IllegalArgumentException(identifier + " value class must be immutable");
+ } else if (type instanceof TypeVariable) {
+ throw new IllegalArgumentException("Invalid setting key type 'Key<" + type.getTypeName() + ">' for identifier " + identifier);
+ } else if (!(type instanceof ParameterizedType)) {
+ throw new IllegalArgumentException(identifier + " invalid key type " + type + " (" + type.getClass().getName() + ")");
+ }
+ }
+
+ private Type getValueType() {
+ HashMap<TypeVariable<?>, Type> mapping = new HashMap<>();
+ Class<?> rawType = getClass();
+ assert rawType != Key.class;
+ for (;;) {
+ Type superclass = rawType.getGenericSuperclass();
+ if (superclass instanceof ParameterizedType) {
+ rawType = (Class)((ParameterizedType) superclass).getRawType();
+ Type[] args = ((ParameterizedType) superclass).getActualTypeArguments();
+ if (Key.class.equals(rawType)) {
+ Type t = args[0];
+ return mapping.getOrDefault(t, t);
+ }
+ TypeVariable<?>[] vars = rawType.getTypeParameters();
+ for (int i = 0, len = vars.length; i < len; i++) {
+ Type t = args[i];
+ mapping.put(vars[i], t instanceof TypeVariable ? mapping.get(t) : t);
+ }
+ } else {
+ rawType = (Class<?>)superclass;
+ if (Key.class.equals(rawType)) {
+ throw new IllegalArgumentException(getClass() + " does not supply type arguments");
+ }
+ }
+ }
+ }
+
+ /**
+ * Subclasses may override this method to check that new settings for this key are valid.
+ * The default implementation of this method throws a {@link NullPointerException} if the new value is null and the initial value was non-null.
+ *
+ * @param initial the setting containing the initial value for this key, or null if the setting has not yet been initialized
+ * @param newValue the new value for this setting
+ * @throws Exception if the new value for this setting was invalid
+ */
+ protected void checkValue(Setting<V> initial, V newValue) throws Exception {
+ if (newValue == null && initial != null && initial.value != null) {
+ throw new NullPointerException();
+ }
+ }
+
+ private Setting<V> checked(Setting<V> origin, V value) {
+ try {
+ checkValue(origin, value);
+ } catch (Exception e) {
+ throw new IllegalArgumentException("invalid value for key '" + identifier + "': " + value, e);
+ }
+ return new Setting<>(this, value);
+ }
+
+ /**
+ * @return a new {@link Setting} object with this key and the supplied value.
+ *
+ * @throws IllegalArgumentException if the new value was invalid, as determined by:
+ * <pre>
+ * {@code this.checkValue(null, value)}
+ * </pre>
+ *
+ * @see #checkValue(Setting, V)
+ */
+ public final Setting<V> withValue(V value) {
+ return checked(null, value);
+ }
+
+ /**
+ * @param o the object to check for equality
+ * @return {@code this == o}
+ */
+ public final boolean equals(Object o) {
+ return super.equals(o);
+ }
+
+ /**
+ * @return the identity-based hashcode of this key
+ */
+ public final int hashCode() {
+ return super.hashCode();
+ }
+
+ public String toString() {
+ return identifier + ": " + valueType.getTypeName();
+ }
+ }
+
+ private final Key<V> key;
+ private final V value;
+
+ private Setting(Key<V> key, V value) {
+ this.key = key;
+ this.value = value;
+ }
+
+ /**
+ * @return the identifier for this setting
+ */
+ public String getIdentifier() {
+ return key.identifier;
+ }
+
+ /**
+ * @return the value for this setting
+ */
+ public V getValue() {
+ return value;
+ }
+
+ /**
+ * @return the type of value supported for this setting
+ */
+ public Type getValueType() {
+ return key.valueType;
+ }
+
+ /**
+ * @return the supplied setting, if it has the same key as this setting
+ */
+ @SuppressWarnings("unchecked")
+ public final Optional<Setting<V>> cast(Setting<?> setting) {
+ return setting == null || setting.key != this.key ? Optional.empty() : Optional.of((Setting<V>)setting);
+ }
+
+ /**
+ * @return a new {@link Setting} object with this setting's {@link Key Key} and the supplied value.
+ *
+ * @throws IllegalArgumentException if the new value was invalid, as determined by:
+ * <pre>
+ * {@code this.key.checkValue(this, newValue)}
+ * </pre>
+ *
+ * @see Key#checkValue(Setting, V)
+ */
+ public Setting<V> withValue(V newValue) {
+ return key.checked(this, newValue);
+ }
+
+ /**
+ * @return true if the supplied object is an instance of {@link Setting} and has the same key and value as this object.
+ */
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (!(o instanceof Setting)) return false;
+
+ Setting<?> setting = (Setting<?>) o;
+
+ if (key != setting.key) return false;
+ return value != null ? value.equals(setting.value) : setting.value == null;
+ }
+
+ @Override
+ public int hashCode() {
+ return 31 * key.hashCode() + (value != null ? value.hashCode() : 0);
+ }
+
+ @Override
+ public String toString() {
+ return key.identifier + "=" + value;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/any23/blob/692c583f/api/src/main/java/org/apache/any23/configuration/Settings.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/org/apache/any23/configuration/Settings.java b/api/src/main/java/org/apache/any23/configuration/Settings.java
new file mode 100644
index 0000000..1289be3
--- /dev/null
+++ b/api/src/main/java/org/apache/any23/configuration/Settings.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.configuration;
+
+import java.util.AbstractSet;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+
+/**
+ * This class represents an <i>immutable</i> {@link Set} of {@link Setting} objects,
+ * with the additional property that no two settings having the same {@link Setting#getIdentifier() identifier}
+ * can be simultaneously present in a {@code Settings} object.
+ *
+ * @author Hans Brende (hansbrende@apache.org)
+ */
+public final class Settings extends AbstractSet<Setting<?>> {
+
+ private static final Settings EMPTY_SETTINGS = new Settings(Collections.emptyMap());
+
+ private final Map<String, Setting<?>> values;
+
+ private Settings(Map<String, Setting<?>> values) {
+ this.values = values;
+ }
+
+ /**
+ * Returns the setting with the same {@link Setting.Key Key} as the supplied setting, if present.
+ */
+ public <E> Optional<Setting<E>> find(Setting<E> setting) {
+ return setting.cast(values.get(setting.getIdentifier()));
+ }
+
+ /**
+ * Returns the value set for {@code defaultSetting}'s {@link Setting.Key Key}, if present.
+ * Otherwise, returns {@code defaultSetting}'s value.
+ * <br><br>
+ * This method is semantically equivalent to:
+ * <br><br>
+ * <pre>
+ * {@code find(defaultSetting).orElse(defaultSetting).getValue()}
+ * </pre>
+ */
+ public <E> E get(Setting<E> defaultSetting) {
+ return find(defaultSetting).orElse(defaultSetting).getValue();
+ }
+
+
+ ///////////////////////////////////////
+ // AbstractSet overrides
+ ///////////////////////////////////////
+
+ @Override
+ public boolean contains(Object o) {
+ if (!(o instanceof Setting<?>)) {
+ return false;
+ }
+ return o.equals(values.get(((Setting<?>) o).getIdentifier()));
+ }
+
+ @Override
+ public int size() {
+ return values.size();
+ }
+
+ @Override
+ public Iterator<Setting<?>> iterator() {
+ return values.values().iterator();
+ }
+
+ ///////////////////////////////////////
+ // public constructors
+ ///////////////////////////////////////
+
+ /**
+ * Returns an empty {@link Settings} object.
+ */
+ public static Settings of() {
+ return EMPTY_SETTINGS;
+ }
+
+ /**
+ * Returns a singleton {@link Settings} object, containing only the supplied setting.
+ */
+ public static Settings of(Setting<?> s) {
+ return new Settings(Collections.singletonMap(s.getIdentifier(), s));
+ }
+
+ /**
+ * Returns a {@link Settings} object containing the supplied settings.
+ * For any two settings having the same key, the first will be overwritten by the second.
+ * @throws IllegalArgumentException if any two settings have the same identifier
+ */
+ public static Settings of(Setting<?>... settings) {
+ Map<String, Setting<?>> map = mapForSize(settings.length);
+ for (Setting<?> s : settings) put(map, s);
+ return ofModifiable(map);
+ }
+
+ /**
+ * Returns a {@link Settings} object containing the supplied settings.
+ * @throws IllegalArgumentException if any two settings have the same identifier
+ */
+ public static Settings of(Collection<? extends Setting<?>> c) {
+ if (c instanceof Settings) {
+ return (Settings)c;
+ }
+ int size = c.size();
+ if (size == 0) {
+ return EMPTY_SETTINGS;
+ }
+ Map<String, Setting<?>> map = mapForSize(size);
+ for (Setting<?> s : c) put(map, s);
+ return ofModifiable(map);
+ }
+
+ ///////////////////////////////////////
+ // Private static helpers
+ ///////////////////////////////////////
+
+ private static Settings ofModifiable(Map<String, Setting<?>> map) {
+ return new Settings(Collections.unmodifiableMap(map));
+ }
+
+ private static void put(Map<String, Setting<?>> map, Setting<?> setting) {
+ Setting<?> existing = map.put(setting.getIdentifier(), setting);
+ if (existing != null) {
+ throw new IllegalArgumentException(setting.getIdentifier() + " is already defined");
+ }
+ }
+
+ private static final float loadFactor = 0.75f;
+ private static Map<String, Setting<?>> mapForSize(int size) {
+ return new HashMap<>((int)(size / loadFactor) + 1, loadFactor);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/any23/blob/692c583f/api/src/main/java/org/apache/any23/writer/DecoratingWriterFactory.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/org/apache/any23/writer/DecoratingWriterFactory.java b/api/src/main/java/org/apache/any23/writer/DecoratingWriterFactory.java
new file mode 100644
index 0000000..cc66372
--- /dev/null
+++ b/api/src/main/java/org/apache/any23/writer/DecoratingWriterFactory.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.writer;
+
+import org.apache.any23.configuration.Settings;
+
+/**
+ * Base interface used for constructors of decorating {@link TripleHandler} implementations.
+ * @author Hans Brende (hansbrende@apache.org)
+ */
+public interface DecoratingWriterFactory extends BaseWriterFactory<TripleHandler> {
+
+ /**
+ *
+ * @return the settings supported by handlers produced by this factory
+ */
+ @Override
+ Settings getSupportedSettings();
+
+ /**
+ * @param delegate the {@link TripleWriter} to delegate input to
+ * @param settings the settings with which to configure the returned handler
+ * @return a {@link TripleHandler} which writes to the specified delegate
+ * @throws NullPointerException if the delegate or settings is null
+ * @throws IllegalArgumentException if the settings are not correctly configured
+ */
+ @Override
+ TripleHandler getTripleWriter(TripleHandler delegate, Settings settings);
+
+}
http://git-wip-us.apache.org/repos/asf/any23/blob/692c583f/api/src/main/java/org/apache/any23/writer/TripleFormat.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/org/apache/any23/writer/TripleFormat.java b/api/src/main/java/org/apache/any23/writer/TripleFormat.java
new file mode 100644
index 0000000..01292eb
--- /dev/null
+++ b/api/src/main/java/org/apache/any23/writer/TripleFormat.java
@@ -0,0 +1,239 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.writer;
+
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.ValueFactory;
+import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
+import org.eclipse.rdf4j.rio.RDFFormat;
+
+import java.nio.charset.Charset;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Locale;
+import java.util.Optional;
+import java.util.stream.Collectors;
+
+/**
+ * @author Hans Brende (hansbrende@apache.org)
+ */
+public class TripleFormat {
+ private final String name;
+ private final IRI standardIRI;
+ private final List<String> mimeTypes;
+ private final Charset charset;
+ private final List<String> fileExtensions;
+ private final Capabilities capabilities;
+ RDFFormat rdfFormat;
+
+ private static final ValueFactory vf = SimpleValueFactory.getInstance();
+
+ private static final int WRITES_TRIPLES = 1;
+ private static final int WRITES_GRAPHS = 1 << 1;
+ private static final int WRITES_NAMESPACES = 1 << 2;
+
+ public static final Capabilities NONSTANDARD = new Capabilities(0);
+ public static final Capabilities TRIPLES = new Capabilities(WRITES_TRIPLES);
+ public static final Capabilities QUADS = new Capabilities(WRITES_TRIPLES | WRITES_GRAPHS);
+ public static final Capabilities TRIPLES_AND_NAMESPACES = TRIPLES.withNamespaces();
+ public static final Capabilities QUADS_AND_NAMESPACES = QUADS.withNamespaces();
+
+ public static class Capabilities {
+ private final int raw;
+
+ private Capabilities(int raw) {
+ this.raw = raw;
+ }
+
+ public boolean has(Capabilities other) {
+ int oraw = other.raw;
+ return (raw & oraw) == oraw;
+ }
+
+ private Capabilities withNamespaces() {
+ return new Capabilities(raw | WRITES_NAMESPACES);
+ }
+
+ //TODO: add "supportsComments()"
+ }
+
+ private static IllegalArgumentException mimeTypeErr(String mt) {
+ return new IllegalArgumentException(mt + " is not a valid mimetype");
+ }
+
+ private static IllegalArgumentException extensionErr(String ext) {
+ return new IllegalArgumentException(ext + " is not a valid extension");
+ }
+
+ private static <E> E checkNonNull(E object, String name) {
+ if (object == null) {
+ throw new IllegalArgumentException(name + " must not be null");
+ }
+ return object;
+ }
+
+ //see https://tools.ietf.org/html/rfc2045#section-5.1
+ private static void checkMimeTypes(List<String> mts) {
+ if (checkNonNull(mts, "mimetypes").isEmpty()) {
+ throw new IllegalArgumentException("mimetypes must not be empty");
+ }
+ for (String mt : mts) {
+ boolean slash = false;
+ for (int i = 0, len = checkNonNull(mt, "mimetype").length(); i < len; i++) {
+ char ch = mt.charAt(i);
+ if (ch <= ' ' || ch >= 127 || ch == '(' || ch == ')' ||
+ ch == '<' || ch == '>' || ch == '@' || ch == ',' ||
+ ch == ';' || ch == ':' || ch == '\\' || ch == '"' ||
+ ch == '[' || ch == ']' || ch == '?' || ch == '='
+ //also disallow wildcards:
+ || ch == '*') {
+ throw mimeTypeErr(mt);
+ } else if (ch == '/') {
+ if (slash || i == 0 || i + 1 == len) {
+ throw mimeTypeErr(mt);
+ }
+ slash = true;
+ }
+ }
+ if (!slash) {
+ throw mimeTypeErr(mt);
+ }
+ }
+ }
+
+ private static void checkExtensions(List<String> exts) {
+ for (String ext : checkNonNull(exts, "extensions")) {
+ int illegalDot = 0;
+ for (int i = 0, len = checkNonNull(ext, "extension").length(); i < len; i++) {
+ char ch = ext.charAt(i);
+ if (ch <= ' ' || ch >= 127 || ch == '<' || ch == '>' ||
+ ch == ':' || ch == '"' || ch == '/' || ch == '\\' ||
+ ch == '|' || ch == '?' || ch == '*') {
+ throw extensionErr(ext);
+ } else if (ch == '.') {
+ int next = i + 1;
+ if (i == illegalDot || next == len) {
+ throw extensionErr(ext);
+ }
+ illegalDot = next;
+ }
+ }
+ }
+ }
+
+ private static String normalizeMimeType(String mt) {
+ return mt.toLowerCase(Locale.ENGLISH);
+ }
+
+ private static String normalizeExtension(String ext) {
+ return ext.toLowerCase(Locale.ENGLISH);
+ }
+
+ private TripleFormat(String name, Collection<String> mimeTypes, Charset charset,
+ Collection<String> fileExtensions, String standardIRI, Capabilities capabilities) {
+ this.name = checkNonNull(name, "display name");
+ checkMimeTypes(this.mimeTypes = Collections.unmodifiableList(mimeTypes.stream()
+ .map(TripleFormat::normalizeMimeType).distinct().collect(Collectors.toList())));
+ if ((this.charset = charset) != null && !charset.canEncode()) {
+ throw new IllegalArgumentException(charset + " does not allow encoding");
+ }
+ checkExtensions(this.fileExtensions = Collections.unmodifiableList(fileExtensions.stream()
+ .map(TripleFormat::normalizeExtension).distinct().collect(Collectors.toList())));
+ this.standardIRI = standardIRI == null ? null : vf.createIRI(standardIRI);
+ this.capabilities = checkNonNull(capabilities, "capabilities");
+ }
+
+ public static TripleFormat of(String displayName, Collection<String> mimeTypes, Charset defaultCharset,
+ Collection<String> fileExtensions, String standardIRI, Capabilities capabilities) {
+ return new TripleFormat(displayName, mimeTypes, defaultCharset, fileExtensions, standardIRI, capabilities);
+ }
+
+ public Optional<Charset> getCharset() {
+ return Optional.ofNullable(charset);
+ }
+
+ static Capabilities capabilities(RDFFormat format) {
+ if (format.supportsContexts()) {
+ return format.supportsNamespaces() ? QUADS_AND_NAMESPACES : QUADS;
+ } else {
+ return format.supportsNamespaces() ? TRIPLES_AND_NAMESPACES : TRIPLES;
+ }
+ }
+
+ private static String iri(IRI iri) {
+ return iri == null ? null : iri.stringValue();
+ }
+
+ static TripleFormat of(RDFFormat format) {
+ TripleFormat f = of(format.getName(), format.getMIMETypes(),
+ format.getCharset(), format.getFileExtensions(), iri(format.getStandardURI()),
+ capabilities(format));
+ f.rdfFormat = format;
+ return f;
+ }
+
+ RDFFormat toRDFFormat() {
+ RDFFormat fmt = rdfFormat;
+ if (fmt != null) {
+ return fmt;
+ }
+ Capabilities capabilities = this.capabilities;
+ if (!capabilities.has(TRIPLES)) {
+ throw new UnsupportedOperationException("This format does not print RDF triples");
+ }
+ return rdfFormat = new RDFFormat(name, mimeTypes, charset, fileExtensions, standardIRI,
+ capabilities.has(TRIPLES_AND_NAMESPACES), capabilities.has(QUADS));
+ }
+
+ public Optional<IRI> getStandardIRI() {
+ return Optional.ofNullable(standardIRI);
+ }
+
+ public List<String> getMimeTypes() {
+ return mimeTypes;
+ }
+
+ public String getMimeType() {
+ return mimeTypes.get(0);
+ }
+
+ public List<String> getExtensions() {
+ return fileExtensions;
+ }
+
+ public Optional<String> getExtension() {
+ return fileExtensions.isEmpty() ? Optional.empty() : Optional.of(fileExtensions.get(0));
+ }
+
+ public Capabilities getCapabilities() {
+ return capabilities;
+ }
+
+ public String getDisplayName() {
+ return name;
+ }
+
+ public String toString() {
+ return name + mimeTypes.stream().collect(
+ Collectors.joining(", ", " (mimeTypes=", "; "))
+ + fileExtensions.stream().collect(
+ Collectors.joining(", ", "ext=", ")"));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/any23/blob/692c583f/api/src/main/java/org/apache/any23/writer/TripleWriter.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/org/apache/any23/writer/TripleWriter.java b/api/src/main/java/org/apache/any23/writer/TripleWriter.java
new file mode 100644
index 0000000..3800045
--- /dev/null
+++ b/api/src/main/java/org/apache/any23/writer/TripleWriter.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.writer;
+
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.Resource;
+import org.eclipse.rdf4j.model.Value;
+
+/**
+ * Base interface for triple writers that don't need an extraction context to write triples
+ *
+ * @author Hans Brende (hansbrende@apache.org)
+ */
+public interface TripleWriter extends AutoCloseable {
+
+ /**
+ * Writes a triple and, optionally, a graph resource name.
+ * @param s the subject to write
+ * @param p the predicate to write
+ * @param o the object to write
+ * @param g the graph name to write, or null
+ * @throws TripleHandlerException if there is an error writing the triple
+ */
+ void writeTriple(Resource s, IRI p, Value o, Resource g) throws TripleHandlerException;
+
+ /**
+ * Writes a prefix-namespace mapping. <br><b>NOTE:</b> this method should be called
+ * <b>before</b> writing out any triples. Calling this method <b>after</b> writing
+ * out a triple may result in the prefix-namespace mapping being ignored.
+ * @param prefix the namespace prefix
+ * @param uri the namespace uri
+ * @throws TripleHandlerException if there was an error writing out the prefix-namespace mapping
+ */
+ void writeNamespace(String prefix, String uri) throws TripleHandlerException;
+
+ /**
+ * Releases resources associated with this {@link TripleWriter}, and flushes (but by default does not close)
+ * any underlying {@link java.io.OutputStream}s. Future invocations of methods of this writer
+ * produce <b>undefined behavior</b> after this method has been called.
+ * @throws TripleHandlerException if there was an error closing this {@link TripleWriter}
+ */
+ @Override
+ void close() throws TripleHandlerException;
+
+}
http://git-wip-us.apache.org/repos/asf/any23/blob/692c583f/api/src/main/java/org/apache/any23/writer/TripleWriterFactory.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/org/apache/any23/writer/TripleWriterFactory.java b/api/src/main/java/org/apache/any23/writer/TripleWriterFactory.java
new file mode 100644
index 0000000..20d4995
--- /dev/null
+++ b/api/src/main/java/org/apache/any23/writer/TripleWriterFactory.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.writer;
+
+import org.apache.any23.configuration.Settings;
+import org.apache.any23.extractor.ExtractionContext;
+import org.eclipse.rdf4j.common.lang.FileFormat;
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.Resource;
+import org.eclipse.rdf4j.model.Value;
+import org.eclipse.rdf4j.rio.RDFFormat;
+
+import java.io.OutputStream;
+
+/**
+ * Base interface for constructors of {@link TripleHandler} implementations
+ * that write to an {@link OutputStream} using a particular {@link FileFormat}.
+ * @author Hans Brende (hansbrende@apache.org)
+ */
+public interface TripleWriterFactory extends BaseWriterFactory<OutputStream> {
+
+ /**
+ * @deprecated since 2.3. Use {@link #getTripleFormat()} instead.
+ */
+ @Override
+ @Deprecated
+ default RDFFormat getRdfFormat() {
+ return getTripleFormat().toRDFFormat();
+ }
+
+ /**
+ * @return the format used to write to {@link OutputStream}s
+ */
+ TripleFormat getTripleFormat();
+
+ /**
+ * @deprecated since 2.3. Use {@link #getTripleFormat()}.{@link TripleFormat#getMimeType() getMimeType()} instead.
+ */
+ @Override
+ @Deprecated
+ default String getMimeType() {
+ return getTripleFormat().getMimeType();
+ }
+
+ /**
+ * @deprecated since 2.3. Use {@link #getTripleWriter(OutputStream, Settings)} instead.
+ */
+ @Override
+ @Deprecated
+ default FormatWriter getRdfWriter(OutputStream os) {
+ TripleHandler th = getTripleWriter(os, Settings.of());
+ return th instanceof FormatWriter ? (FormatWriter)th : new FormatWriter() {
+ @Override
+ public boolean isAnnotated() {
+ return false;
+ }
+ @Override
+ public void setAnnotated(boolean f) {}
+ @Override
+ public void startDocument(IRI documentIRI) throws TripleHandlerException {
+ th.startDocument(documentIRI);
+ }
+ @Override
+ public void openContext(ExtractionContext context) throws TripleHandlerException {
+ th.openContext(context);
+ }
+ @Override
+ public void receiveTriple(Resource s, IRI p, Value o, IRI g, ExtractionContext context) throws TripleHandlerException {
+ th.receiveTriple(s, p, o, g, context);
+ }
+ @Override
+ public void receiveNamespace(String prefix, String uri, ExtractionContext context) throws TripleHandlerException {
+ th.receiveNamespace(prefix, uri, context);
+ }
+ @Override
+ public void closeContext(ExtractionContext context) throws TripleHandlerException {
+ th.closeContext(context);
+ }
+ @Override
+ public void endDocument(IRI documentIRI) throws TripleHandlerException {
+ th.endDocument(documentIRI);
+ }
+ @Override
+ public void setContentLength(long contentLength) {
+ th.setContentLength(contentLength);
+ }
+ @Override
+ public void close() throws TripleHandlerException {
+ th.close();
+ }
+ };
+ }
+
+
+ /**
+ *
+ * @return the settings supported by writers produced by this factory
+ */
+ @Override
+ Settings getSupportedSettings();
+
+
+ /**
+ * @param out the {@link OutputStream} to write to
+ * @param settings the settings with which to configure the writer
+ * @return a {@link TripleHandler} which writes to the specified {@link OutputStream}
+ * @throws NullPointerException if the output stream or settings is null
+ * @throws IllegalArgumentException if the settings are not correctly configured
+ */
+ @Override
+ TripleHandler getTripleWriter(OutputStream out, Settings settings);
+
+}
http://git-wip-us.apache.org/repos/asf/any23/blob/692c583f/api/src/main/java/org/apache/any23/writer/WriterFactory.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/org/apache/any23/writer/WriterFactory.java b/api/src/main/java/org/apache/any23/writer/WriterFactory.java
index 3012beb..060177b 100644
--- a/api/src/main/java/org/apache/any23/writer/WriterFactory.java
+++ b/api/src/main/java/org/apache/any23/writer/WriterFactory.java
@@ -19,18 +19,59 @@ package org.apache.any23.writer;
import java.io.OutputStream;
+import org.apache.any23.configuration.Settings;
import org.eclipse.rdf4j.rio.RDFFormat;
/**
- * @author Peter Ansell p_ansell@yahoo.com
- *
+ * The superinterface of all {@link TripleHandler} factory interfaces.
+ * Do not implement this interface directly. Instead, implement one of the subinterfaces {@link TripleWriterFactory} or {@link DecoratingWriterFactory}.
+ * @author Peter Ansell (p_ansell@yahoo.com)
+ * @author Hans Brende (hansbrende@apache.org)
*/
public interface WriterFactory {
+
+ /**
+ * @deprecated since 2.3. Use {@link TripleWriterFactory#getTripleFormat()} instead.
+ */
+ @Deprecated
RDFFormat getRdfFormat();
String getIdentifier();
+ /**
+ * @deprecated since 2.3. Use {@link TripleWriterFactory#getTripleFormat()}.{@link TripleFormat#getMimeType() getMimeType()} instead.
+ */
+ @Deprecated
String getMimeType();
+ /**
+ * @deprecated since 2.3. Use {@link TripleWriterFactory#getTripleWriter(OutputStream, Settings)} instead.
+ */
+ @Deprecated
FormatWriter getRdfWriter(OutputStream os);
}
+
+interface BaseWriterFactory<Output> extends WriterFactory {
+
+ Settings getSupportedSettings();
+
+ TripleHandler getTripleWriter(Output output, Settings settings);
+
+ @Override
+ @Deprecated
+ default FormatWriter getRdfWriter(OutputStream os) {
+ throw new UnsupportedOperationException("this class does not support getRdfWriter()");
+ }
+
+ @Override
+ @Deprecated
+ default String getMimeType() {
+ throw new UnsupportedOperationException("this class does not support getMimeType()");
+ }
+
+ @Override
+ @Deprecated
+ default RDFFormat getRdfFormat() {
+ throw new UnsupportedOperationException("this class does not support getRdfFormat()");
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/any23/blob/692c583f/api/src/main/java/org/apache/any23/writer/WriterFactoryRegistry.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/org/apache/any23/writer/WriterFactoryRegistry.java b/api/src/main/java/org/apache/any23/writer/WriterFactoryRegistry.java
index cbe5f9a..64830d8 100644
--- a/api/src/main/java/org/apache/any23/writer/WriterFactoryRegistry.java
+++ b/api/src/main/java/org/apache/any23/writer/WriterFactoryRegistry.java
@@ -19,15 +19,21 @@ package org.apache.any23.writer;
import java.io.OutputStream;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import java.util.Objects;
import java.util.ServiceConfigurationError;
import java.util.ServiceLoader;
+import java.util.concurrent.CopyOnWriteArrayList;
+import java.util.concurrent.CopyOnWriteArraySet;
+import org.apache.any23.configuration.Settings;
+import org.eclipse.rdf4j.rio.RDFFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -35,6 +41,7 @@ import org.slf4j.LoggerFactory;
* Registry class for {@link WriterFactory}s.
*
* @author Michele Mostarda (mostarda@fbk.eu)
+ * @author Hans Brende (hansbrende@apache.org)
*/
public class WriterFactoryRegistry {
@@ -43,54 +50,54 @@ public class WriterFactoryRegistry {
/**
* Singleton instance.
*/
- private static WriterFactoryRegistry instance;
+ private static class InstanceHolder {
+ private static final WriterFactoryRegistry instance = new WriterFactoryRegistry();
+ }
+
+ private static final WriterFactory[] EMPTY_WRITERS = new WriterFactory[0];
/**
* List of registered writers.
*/
- private final List<WriterFactory> writers =
- new ArrayList<>();
+ private final List<WriterFactory> writers = new CopyOnWriteArrayList<>();
/**
- * MIME Type to {@link FormatWriter} class.
+ * MIME Type to {@link WriterFactory} class.
*/
- private final Map<String,List<WriterFactory>> mimeToWriter =
- new HashMap<>();
+ private final Map<String, List<WriterFactory>> mimeToWriter = Collections.synchronizedMap(new HashMap<>());
/**
- * Identifier to {@link FormatWriter} class.
+ * Identifier to {@link WriterFactory} class.
*/
- private final Map<String,WriterFactory> idToWriter =
- new HashMap<>();
+ private final Map<String, WriterFactory> idToWriter = new HashMap<>();
- private List<String> identifiers = new ArrayList<>();
+ private final List<String> identifiers = new CopyOnWriteArrayList<>();
+
+ private final Collection<String> mimeTypes = new CopyOnWriteArraySet<>();
public WriterFactoryRegistry() {
- ServiceLoader<WriterFactory> serviceLoader = java.util.ServiceLoader.load(WriterFactory.class, this.getClass().getClassLoader());
-
- Iterator<WriterFactory> iterator = serviceLoader.iterator();
+ ServiceLoader<WriterFactory> serviceLoader = java.util.ServiceLoader.load(WriterFactory.class, this.getClass().getClassLoader());
+
+ Iterator<WriterFactory> iterator = serviceLoader.iterator();
// use while(true) loop so that we can isolate all service loader errors from .next and .hasNext to a single service
- while(true)
- {
- try
- {
- if(!iterator.hasNext())
- break;
-
- WriterFactory factory = iterator.next();
-
- this.register(factory);
- }
- catch(ServiceConfigurationError error)
- {
- LOG.error("Found error loading a WriterFactory", error);
- }
- }
+
+ ArrayList<WriterFactory> factories = new ArrayList<>();
+ while (true) {
+ try {
+ if (!iterator.hasNext())
+ break;
+ factories.add(iterator.next());
+ } catch(ServiceConfigurationError error) {
+ LOG.error("Found error loading a WriterFactory", error);
+ }
+ }
+
+ registerAll(factories.toArray(EMPTY_WRITERS));
}
/**
- * Reads the identifier specified for the given {@link FormatWriter}.
+ * Reads the identifier specified for the given {@link WriterFactory}.
*
* @param writerClass writer class.
* @return identifier.
@@ -100,97 +107,182 @@ public class WriterFactoryRegistry {
}
/**
- * Reads the <i>MIME Type</i> specified for the given {@link FormatWriter}.
+ * Reads the <i>MIME Type</i> specified for the given {@link WriterFactory}.
*
* @param writerClass writer class.
* @return MIME type.
*/
public static String getMimeType(WriterFactory writerClass) {
- return writerClass.getMimeType();
+ if (writerClass instanceof TripleWriterFactory) {
+ return ((TripleWriterFactory)writerClass).getTripleFormat().getMimeType();
+ } else if (writerClass instanceof DecoratingWriterFactory) {
+ return null;
+ } else {
+ return reportAndGetCompatFormat(writerClass).getMimeType();
+ }
}
/**
* @return the {@link WriterFactoryRegistry} singleton instance.
*/
- public static synchronized WriterFactoryRegistry getInstance() {
- if(instance == null) {
- instance = new WriterFactoryRegistry();
+ public static WriterFactoryRegistry getInstance() {
+ return InstanceHolder.instance;
+ }
+
+ @SuppressWarnings("deprecation")
+ private static TripleFormat reportAndGetCompatFormat(WriterFactory f) {
+ LOG.warn("{} must implement either {} or {}.", f.getClass(), TripleWriterFactory.class, DecoratingWriterFactory.class);
+ final String mimeType = f.getMimeType();
+ RDFFormat fmt;
+ try {
+ fmt = f.getRdfFormat();
+ } catch (RuntimeException e) {
+ return TripleFormat.of(mimeType, Collections.singleton(mimeType), null,
+ Collections.emptySet(), null, TripleFormat.NONSTANDARD);
}
- return instance;
+ if (mimeType == null || fmt.hasDefaultMIMEType(mimeType)) {
+ return TripleFormat.of(fmt);
+ }
+ //override default MIME type on mismatch
+ return TripleFormat.of(fmt.getName(), Collections.singleton(mimeType), fmt.getCharset(),
+ fmt.getFileExtensions(), fmt.getStandardURI().stringValue(), TripleFormat.capabilities(fmt));
+ }
+
+ private static TripleWriterFactory getCompatFactory(WriterFactory f) {
+ final TripleFormat format = reportAndGetCompatFormat(f);
+ return new TripleWriterFactory() {
+ @Override
+ public TripleFormat getTripleFormat() {
+ return format;
+ }
+
+ @Override
+ @SuppressWarnings("deprecation")
+ public TripleHandler getTripleWriter(OutputStream os, Settings settings) {
+ return f.getRdfWriter(os);
+ }
+
+ @Override
+ public Settings getSupportedSettings() {
+ return Settings.of();
+ }
+
+ @Override
+ public String getIdentifier() {
+ return f.getIdentifier();
+ }
+ };
}
/**
* Registers a new {@link WriterFactory} to the registry.
*
- * @param writerClass the class of the writer to be registered.
+ * @param f the writer factory to be registered.
* @throws IllegalArgumentException if the id or the mimetype are null
* or empty strings or if the identifier has been already defined.
*/
- public synchronized void register(WriterFactory writerClass) {
- if(writerClass == null)
+ public void register(WriterFactory f) {
+ if (f == null)
throw new NullPointerException("writerClass cannot be null.");
- final String id = writerClass.getIdentifier();
- final String mimeType = writerClass.getMimeType();
- if(id == null || id.trim().length() == 0) {
- throw new IllegalArgumentException("Invalid identifier returned by writer " + writerClass);
+ registerAll(new WriterFactory[]{f});
+ }
+
+ private void registerAll(WriterFactory[] factories) {
+ final int count = factories.length;
+ if (count == 0) {
+ return;
}
- if(mimeType == null || mimeType.trim().length() == 0) {
- throw new IllegalArgumentException("Invalid MIME type returned by writer " + writerClass);
+ final HashMap<String, ArrayList<WriterFactory>> mimes = new HashMap<>();
+ final String[] ids = new String[count];
+
+ for (int i = 0; i < count; i++) {
+ WriterFactory f = factories[i];
+ if (!(f instanceof BaseWriterFactory<?>)) {
+ //backwards compatibility: view vanilla WriterFactory as TripleWriterFactory
+ f = factories[i] = getCompatFactory(f);
+ }
+ final String id = ids[i] = f.getIdentifier();
+ if (id == null || id.trim().isEmpty()) {
+ throw new IllegalArgumentException("Invalid identifier returned by writer " + f);
+ }
+ if (f instanceof TripleWriterFactory) {
+ String mimeType = ((TripleWriterFactory)f).getTripleFormat().getMimeType();
+ if (mimeType == null || mimeType.trim().isEmpty()) {
+ throw new IllegalArgumentException("Invalid MIME type returned by writer " + f);
+ }
+ mimes.computeIfAbsent(mimeType, k -> new ArrayList<>()).add(f);
+ }
+ }
+
+ final List<String> idList = Arrays.asList(ids);
+ final List<WriterFactory> factoryList = Arrays.asList(factories);
+ final Map<String, WriterFactory> idToWriter;
+ synchronized (idToWriter = this.idToWriter) {
+ for (int i = 0; i < count; i++) {
+ String id = ids[i];
+ if (idToWriter.putIfAbsent(id, factories[i]) != null) {
+ idToWriter.keySet().removeAll(idList.subList(0, i));
+ throw new IllegalArgumentException("The writer identifier is already declared: " + id);
+ }
+ }
}
- if(idToWriter.containsKey(id))
- throw new IllegalArgumentException("The writer identifier is already declared.");
-
- writers.add(writerClass);
- identifiers.add(writerClass.getIdentifier());
- List<WriterFactory> writerClasses = mimeToWriter.get(mimeType);
- if(writerClasses == null) {
- writerClasses = new ArrayList<>();
- mimeToWriter.put(mimeType, writerClasses);
+ //add in bulk to reduce writes to CopyOnWriteArrayList
+ writers.addAll(factoryList);
+ identifiers.addAll(idList);
+ for (Map.Entry<String, ArrayList<WriterFactory>> entry : mimes.entrySet()) {
+ String mimeType = entry.getKey();
+ mimeTypes.add(mimeType);
+ mimeToWriter.computeIfAbsent(mimeType, k -> new CopyOnWriteArrayList<>()).addAll(entry.getValue());
}
- writerClasses.add(writerClass);
- idToWriter.put(id, writerClass);
}
/**
- * Verifies if a {@link FormatWriter} with given <code>id</code> identifier has been registered.
+ * Verifies if a {@link WriterFactory} with given <code>id</code> identifier has been registered.
*
* @param id identifier.
* @return <code>true</code> if the identifier has been registered, <code>false</code> otherwise.
*/
- public synchronized boolean hasIdentifier(String id) {
- return idToWriter.containsKey(id);
+ public boolean hasIdentifier(String id) {
+ synchronized (idToWriter) {
+ return idToWriter.containsKey(id);
+ }
}
/**
* @return the list of all the specified identifiers.
*/
- public synchronized List<String> getIdentifiers() {
+ public List<String> getIdentifiers() {
+ //no synchronized block needed for CopyOnWriteArrayList
return Collections.unmodifiableList(identifiers);
}
/**
- * @return the list of MIME types covered by the registered {@link FormatWriter}s.
+ * @return the list of MIME types covered by the registered {@link WriterFactory} instances.
*/
- public synchronized Collection<String> getMimeTypes() {
- return Collections.unmodifiableCollection(mimeToWriter.keySet());
+ public Collection<String> getMimeTypes() {
+ //no synchronized block needed for CopyOnWriteArraySet
+ return Collections.unmodifiableCollection(mimeTypes);
}
/**
- * @return the list of all the registered {@link FormatWriter}s.
+ * @return the list of all the registered {@link WriterFactory} instances.
*/
- public synchronized List<WriterFactory> getWriters() {
+ public List<WriterFactory> getWriters() {
+ //no synchronized block needed for CopyOnWriteArrayList
return Collections.unmodifiableList(writers);
}
/**
- * Returns the {@link FormatWriter} identified by <code>id</code>.
+ * Returns the {@link WriterFactory} identified by <code>id</code>.
*
* @param id the writer identifier.
- * @return the class of the {@link FormatWriter} matching the <code>id</code>
- * or <code>null</code> if not found.s
+ * @return the {@link WriterFactory} matching the <code>id</code>
+ * or <code>null</code> if not found.
*/
- public synchronized WriterFactory getWriterByIdentifier(String id) {
- return idToWriter.get(id);
+ public WriterFactory getWriterByIdentifier(String id) {
+ synchronized (idToWriter) {
+ return idToWriter.get(id);
+ }
}
/**
@@ -199,42 +291,29 @@ public class WriterFactoryRegistry {
* @param mimeType a MIMEType.
* @return a list of matching writers or an empty list.
*/
- public synchronized Collection<WriterFactory> getWritersByMimeType(String mimeType) {
- return mimeToWriter.get(mimeType);
+ public Collection<WriterFactory> getWritersByMimeType(String mimeType) {
+ //no synchronized block needed for synchronized map
+ //return CopyOnWriteArrayList to avoid ConcurrentModificationExceptions on iteration
+ List<WriterFactory> list = mimeToWriter.get(mimeType);
+ return list != null ? Collections.unmodifiableList(list) : Collections.emptyList();
}
/**
- * Returns an instance of {@link FormatWriter} ready to write on the given <code>os</code>
+ * Returns an instance of {@link FormatWriter} ready to write on the given
* {@link OutputStream}.
*
- * @param id the identifier of the {@link FormatWriter} to crate an instance.
+ * @param id the identifier of the {@link FormatWriter} to instantiate.
* @param os the output stream.
* @return the not <code>null</code> {@link FormatWriter} instance.
* @throws NullPointerException if the <code>id</code> doesn't match any registered writer.
- */
- public synchronized FormatWriter getWriterInstanceByIdentifier(String id, OutputStream os) {
- final WriterFactory writerClazz = getWriterByIdentifier(id);
- if(writerClazz == null)
- throw new NullPointerException(
- String.format("Cannot find writer with id '%s' .", id)
- );
- return createWriter(writerClazz, os);
- }
-
- /**
- * Crates a writer instance.
*
- * @param clazz class to instantiate.
- * @param os output stream to pass as constructor argument.
- * @return created instance.
- * @throws IllegalArgumentException if an error occurs during instantiation.
+ * @deprecated since 2.3. Use {@link #getWriterByIdentifier(String)}
+ * in combination with {@link TripleWriterFactory#getTripleWriter(OutputStream, Settings)} instead.
*/
- private FormatWriter createWriter(WriterFactory clazz, OutputStream os) {
- try {
- return clazz.getRdfWriter(os);
- } catch (Exception e) {
- throw new IllegalArgumentException("Error while initializing format writer " + clazz + " .", e);
- }
+ @Deprecated
+ public FormatWriter getWriterInstanceByIdentifier(String id, OutputStream os) {
+ return Objects.requireNonNull(getWriterByIdentifier(id),
+ "Cannot find writer with id " + id).getRdfWriter(os);
}
}
http://git-wip-us.apache.org/repos/asf/any23/blob/692c583f/api/src/test/java/org/apache/any23/configuration/SettingsTest.java
----------------------------------------------------------------------
diff --git a/api/src/test/java/org/apache/any23/configuration/SettingsTest.java b/api/src/test/java/org/apache/any23/configuration/SettingsTest.java
new file mode 100644
index 0000000..a5a7b6e
--- /dev/null
+++ b/api/src/test/java/org/apache/any23/configuration/SettingsTest.java
@@ -0,0 +1,227 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.configuration;
+
+import org.junit.Test;
+
+import java.lang.reflect.ParameterizedType;
+import java.lang.reflect.Type;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertSame;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+@SuppressWarnings("ResultOfMethodCallIgnored")
+public class SettingsTest {
+
+ @Test
+ public void testNonNullSetting() {
+ Setting<String> nonNull = Setting.newKey("nulltest", String.class).withValue("A nonnull string");
+ try {
+ nonNull.withValue(null);
+ fail();
+ } catch (IllegalArgumentException e) {
+ //test passes; ignore
+ }
+ }
+
+ @Test
+ public void testNullableSetting() {
+ Setting<String> nullable = Setting.newKey("nulltest", String.class).withValue(null);
+ assertNull(nullable.withValue(null).getValue());
+ }
+
+ @Test
+ public void testDuplicateIdentifiers() {
+ try {
+ Setting<String> first = Setting.newKey("foo", String.class).withValue("");
+ Setting<String> second = Setting.newKey("foo", String.class).withValue("");
+
+ Settings.of(first, second);
+
+ fail();
+ } catch (IllegalArgumentException e) {
+ //test passes; ignore
+ }
+ }
+
+ @Test
+ public void testFind() {
+ Setting<String> key = Setting.newKey("foo", String.class).withValue("key");
+ Setting<String> element = key.withValue("element");
+
+ Settings settings = Settings.of(element);
+
+ Optional<Setting<String>> actual = settings.find(key);
+
+ assertTrue(actual.isPresent());
+
+ assertSame(element, actual.get());
+
+ assertTrue(settings.contains(element));
+ assertFalse(settings.contains(key));
+ }
+
+ @Test
+ public void testGetPresentSetting() {
+ Setting<String> key = Setting.newKey("foo", String.class).withValue("key");
+
+ Setting<String> actual = key.withValue("actual");
+ Settings settings = Settings.of(actual);
+
+ assertSame(actual.getValue(), settings.get(key));
+ }
+
+ @Test
+ public void testGetAbsentSetting() {
+ Setting<String> key = Setting.newKey("foo", String.class).withValue("key");
+
+ Setting<String> actual = Setting.newKey("foo", String.class).withValue("actual");
+ Settings settings = Settings.of(actual);
+
+ assertSame(key.getValue(), settings.get(key));
+ }
+
+ @Test
+ public void testGetNullSetting() {
+ Setting.Key<String> baseKey = Setting.newKey("foo", String.class);
+
+ Settings settings = Settings.of(baseKey.withValue(null));
+ assertNull(settings.get(baseKey.withValue("not null")));
+ }
+
+ @Test
+ public void testSettingType() {
+ assertEquals(CharSequence.class, Setting.newKey("foo", CharSequence.class).withValue("").getValueType());
+ assertEquals(CharSequence.class, new Setting.Key<CharSequence>("foo"){}.withValue("").getValueType());
+
+ Type mapType = new Setting.Key<Map<String, Integer>>(
+ "foo"){}.withValue(Collections.emptyMap()).getValueType();
+
+ assertTrue(mapType instanceof ParameterizedType);
+ assertEquals("java.util.Map<java.lang.String, java.lang.Integer>", mapType.getTypeName());
+
+ class Key0<Bar, V> extends Setting.Key<V> {
+ Key0() {
+ super("foo");
+ }
+ }
+
+ class Key2<Baz, V, Bar> extends Key0<V, Bar> { }
+
+ class Key3<V> extends Key2<Boolean, Integer, List<Optional<String>>> { }
+
+ class Key4 extends Key3<Boolean> { }
+
+ Type complicatedType = new Key4().withValue(Collections.emptyList()).getValueType();
+
+ assertTrue(complicatedType instanceof ParameterizedType);
+ assertEquals("java.util.List<java.util.Optional<java.lang.String>>", complicatedType.getTypeName());
+
+ class Key3Simple<V> extends Key2<Boolean, Integer, String> { }
+
+ class Key4Simple extends Key3Simple<Boolean> { }
+
+ Type simpleType = new Key4Simple().withValue("").getValueType();
+
+ assertEquals(String.class, simpleType);
+ }
+
+
+
+ @Test
+ public void testBadSetting() {
+ try {
+ new Setting.Key("foo") {};
+ fail();
+ } catch (IllegalArgumentException e) {
+ //test passes; ignore
+ }
+
+ try {
+ Setting.newKey("foo", null);
+ fail();
+ } catch (IllegalArgumentException e) {
+ //test passes; ignore
+ }
+
+ try {
+ Setting.newKey(null, Integer.class);
+ fail();
+ } catch (IllegalArgumentException e) {
+ //test passes; ignore
+ }
+
+ try {
+ Setting.newKey(" ", Integer.class);
+ fail();
+ } catch (IllegalArgumentException e) {
+ //test passes; ignore
+ }
+
+ try {
+ Setting.newKey("foo", boolean.class);
+ fail();
+ } catch (IllegalArgumentException e) {
+ //test passes; ignore
+ }
+
+ try {
+ Setting.newKey("foo", Integer[].class);
+ fail();
+ } catch (IllegalArgumentException e) {
+ //test passes; ignore
+ }
+
+ try {
+ new Setting.Key<Integer[]>("foo") {};
+ fail();
+ } catch (IllegalArgumentException e) {
+ //test passes; ignore
+ }
+
+ try {
+ new Setting.Key<List<Integer>[]>("foo") {};
+ fail();
+ } catch (IllegalArgumentException e) {
+ //test passes; ignore
+ }
+
+ class BadKeyCreator {
+ private <V> void badKey() {
+ new Setting.Key<V>("foo") {};
+ }
+ }
+
+ try {
+ new BadKeyCreator().badKey();
+ fail();
+ } catch (IllegalArgumentException e) {
+ //test passes; ignore
+ }
+ }
+
+
+}
http://git-wip-us.apache.org/repos/asf/any23/blob/692c583f/api/src/test/java/org/apache/any23/writer/TripleFormatTest.java
----------------------------------------------------------------------
diff --git a/api/src/test/java/org/apache/any23/writer/TripleFormatTest.java b/api/src/test/java/org/apache/any23/writer/TripleFormatTest.java
new file mode 100644
index 0000000..f91202c
--- /dev/null
+++ b/api/src/test/java/org/apache/any23/writer/TripleFormatTest.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.writer;
+
+import org.eclipse.rdf4j.rio.RDFFormat;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertSame;
+
+public class TripleFormatTest {
+
+ @Test
+ public void testRdf4jRoundTripping() {
+
+ RDFFormat[] formats = {
+ RDFFormat.TRIX, RDFFormat.NQUADS, RDFFormat.RDFA, RDFFormat.TRIG,
+ RDFFormat.N3, RDFFormat.RDFXML, RDFFormat.TURTLE, RDFFormat.JSONLD,
+ RDFFormat.NTRIPLES, RDFFormat.BINARY, RDFFormat.RDFJSON
+ };
+
+ for (RDFFormat expected : formats) {
+ TripleFormat tf = TripleFormat.of(expected);
+
+ RDFFormat actual = tf.toRDFFormat();
+ assertSame(expected, actual);
+
+ tf.rdfFormat = null;
+ actual = tf.toRDFFormat();
+ assertNotSame(expected, actual);
+
+ assertEquals(expected.getName(), actual.getName());
+ assertEquals(expected.getStandardURI(), actual.getStandardURI());
+ assertEquals(expected.getCharset(), actual.getCharset());
+ assertEquals(expected.getFileExtensions(), actual.getFileExtensions());
+ assertEquals(expected.supportsContexts(), actual.supportsContexts());
+ assertEquals(expected.supportsNamespaces(), actual.supportsNamespaces());
+ }
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/any23/blob/692c583f/cli/src/main/java/org/apache/any23/cli/Rover.java
----------------------------------------------------------------------
diff --git a/cli/src/main/java/org/apache/any23/cli/Rover.java b/cli/src/main/java/org/apache/any23/cli/Rover.java
index 5b49b39..ef912f7 100644
--- a/cli/src/main/java/org/apache/any23/cli/Rover.java
+++ b/cli/src/main/java/org/apache/any23/cli/Rover.java
@@ -25,13 +25,18 @@ import com.beust.jcommander.converters.FileConverter;
import org.apache.any23.Any23;
import org.apache.any23.configuration.Configuration;
import org.apache.any23.configuration.DefaultConfiguration;
+import org.apache.any23.configuration.Setting;
+import org.apache.any23.configuration.Settings;
import org.apache.any23.extractor.ExtractionParameters;
import org.apache.any23.extractor.ExtractionParameters.ValidationMode;
import org.apache.any23.filter.IgnoreAccidentalRDFa;
import org.apache.any23.filter.IgnoreTitlesOfEmptyDocuments;
import org.apache.any23.source.DocumentSource;
import org.apache.any23.writer.BenchmarkTripleHandler;
+import org.apache.any23.writer.DecoratingWriterFactory;
+import org.apache.any23.writer.TripleWriterFactory;
import org.apache.any23.writer.LoggingTripleHandler;
+import org.apache.any23.writer.NTriplesWriterFactory;
import org.apache.any23.writer.ReportingTripleHandler;
import org.apache.any23.writer.TripleHandler;
import org.apache.any23.writer.TripleHandlerException;
@@ -41,12 +46,16 @@ import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileNotFoundException;
+import java.io.OutputStream;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.net.MalformedURLException;
import java.net.URL;
+import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
+import java.util.ListIterator;
+import java.util.Objects;
import static java.lang.String.format;
@@ -57,15 +66,42 @@ import static java.lang.String.format;
* @author Michele Mostarda (mostarda@fbk.eu)
* @author Richard Cyganiak (richard@cyganiak.de)
* @author Gabriele Renzi
+ * @author Hans Brende (hansbrende@apache.org)
*/
@Parameters(commandNames = { "rover" }, commandDescription = "Any23 Command Line Tool.")
public class Rover extends BaseTool {
- private static final List<String> FORMATS = WriterFactoryRegistry.getInstance().getIdentifiers();
+ private static final Logger logger = LoggerFactory.getLogger(Rover.class);
- private static final int DEFAULT_FORMAT_INDEX = 0;
+ private static final WriterFactoryRegistry registry = WriterFactoryRegistry.getInstance();
+ private static final String DEFAULT_WRITER_IDENTIFIER = NTriplesWriterFactory.IDENTIFIER;
+
+ static {
+ final Setting<Boolean> ALWAYS_SUPPRESS_CSS_TRIPLES = Setting.newKey(
+ "alwayssuppresscsstriples", Boolean.class)
+ .withValue(Boolean.TRUE);
+ final Settings supportedSettings = Settings.of(ALWAYS_SUPPRESS_CSS_TRIPLES);
+
+ registry.register(new DecoratingWriterFactory() {
+
+ @Override
+ public TripleHandler getTripleWriter(TripleHandler delegate, Settings settings) {
+ boolean always = settings.get(ALWAYS_SUPPRESS_CSS_TRIPLES);
+ return new IgnoreAccidentalRDFa(new IgnoreTitlesOfEmptyDocuments(delegate), always);
+ }
+
+ @Override
+ public Settings getSupportedSettings() {
+ return supportedSettings;
+ }
+
+ @Override
+ public String getIdentifier() {
+ return "notrivial";
+ }
+ });
+ }
- private static final Logger logger = LoggerFactory.getLogger(Rover.class);
@Parameter(
names = { "-o", "--output" },
@@ -80,8 +116,10 @@ public class Rover extends BaseTool {
@Parameter(names = { "-e", "--extractors" }, description = "a comma-separated list of extractors, e.g. rdf-xml,rdf-turtle")
private List<String> extractors = new LinkedList<>();
- @Parameter(names = { "-f", "--format" }, description = "the output format")
- private String format = FORMATS.get(DEFAULT_FORMAT_INDEX);
+ @Parameter(names = { "-f", "--format" }, description = "a comma-separated list of writer factories, e.g. notrivial,nquads")
+ private List<String> formats = new LinkedList<String>() {{
+ add(DEFAULT_WRITER_IDENTIFIER);
+ }};
@Parameter(
names = { "-l", "--log" },
@@ -93,7 +131,7 @@ public class Rover extends BaseTool {
@Parameter(names = { "-s", "--stats" }, description = "Print out extraction statistics.")
private boolean statistics;
- @Parameter(names = { "-t", "--notrivial" }, description = "Filter trivial statements (e.g. CSS related ones).")
+ @Parameter(names = { "-t", "--notrivial" }, description = "Filter trivial statements (e.g. CSS related ones). [DEPRECATED: As of version 2.3, use --format instead.]")
private boolean noTrivial;
@Parameter(names = { "-p", "--pedantic" }, description = "Validate and fixes HTML content detecting commons issues.")
@@ -127,16 +165,28 @@ public class Rover extends BaseTool {
outputStream = out;
}
+ private static TripleHandler getWriter(String id, OutputStream os) {
+ TripleWriterFactory f = (TripleWriterFactory)registry.getWriterByIdentifier(id);
+ Objects.requireNonNull(f, () -> "Invalid writer id '" + id + "'; admitted values: " + registry.getIdentifiers());
+ return f.getTripleWriter(os, Settings.of()); //TODO parse TripleWriter settings from format list
+ }
+
+ private static TripleHandler getWriter(String id, TripleHandler delegate) {
+ DecoratingWriterFactory f = (DecoratingWriterFactory)registry.getWriterByIdentifier(id);
+ Objects.requireNonNull(f, () -> "Invalid writer id '" + id + "'; admitted values: " + registry.getIdentifiers());
+ return f.getTripleWriter(delegate, Settings.of()); //TODO parse delegate settings from format list
+ }
+
protected void configure() {
- try {
- tripleHandler = WriterFactoryRegistry.getInstance().getWriterInstanceByIdentifier(format, outputStream);
- } catch (Exception e) {
- throw new NullPointerException(
- format("Invalid output format '%s', admitted values: %s",
- format,
- FORMATS
- )
- );
+ List<String> formats = this.formats;
+ if (formats.isEmpty()) {
+ formats = Collections.singletonList(DEFAULT_WRITER_IDENTIFIER);
+ }
+ ListIterator<String> l = formats.listIterator(formats.size());
+ tripleHandler = getWriter(l.previous(), outputStream);
+
+ while (l.hasPrevious()) {
+ tripleHandler = getWriter(l.previous(), tripleHandler);
}
if (logFile != null) {
http://git-wip-us.apache.org/repos/asf/any23/blob/692c583f/cli/src/test/java/org/apache/any23/cli/ExtractorsFlowTest.java
----------------------------------------------------------------------
diff --git a/cli/src/test/java/org/apache/any23/cli/ExtractorsFlowTest.java b/cli/src/test/java/org/apache/any23/cli/ExtractorsFlowTest.java
new file mode 100644
index 0000000..0b75f57
--- /dev/null
+++ b/cli/src/test/java/org/apache/any23/cli/ExtractorsFlowTest.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.cli;
+
+import org.apache.any23.cli.flows.PeopleExtractor;
+import org.apache.any23.rdf.RDFUtils;
+import org.apache.commons.io.FileUtils;
+import org.eclipse.rdf4j.model.Model;
+import org.eclipse.rdf4j.model.impl.TreeModel;
+import org.eclipse.rdf4j.rio.Rio;
+import org.junit.Assert;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.lang.invoke.MethodHandles;
+import java.util.Arrays;
+import java.util.stream.Stream;
+
+/**
+ * This is example for task ANY23-396
+ *
+ * @author Jacek Grzebyta (jgrzebyta@apache.org)
+ * @author Hans Brende (hansbrende@apache.org)
+ */
+public class ExtractorsFlowTest extends ToolTestBase {
+
+ private static final String testingDatafile = "/org/apache/any23/extractor/csv/test-comma.csv";
+ private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+ public ExtractorsFlowTest() {
+ super(Rover.class);
+ }
+
+ /**
+ * Emulates action described in ANY23-396.
+ */
+ @Test
+ public void runTestFor396() throws Exception {
+ File outputFile = File.createTempFile("mockdata-", ".ttl", tempDirectory);
+ File logFile = File.createTempFile("log-exec-", ".txt", tempDirectory);
+
+ runTool(String.format("-l %s -o %s -f people,turtle -e csv -d %s %s",
+ logFile.getAbsolutePath(),
+ outputFile.getAbsolutePath(),
+ PeopleExtractor.RAW_NS,
+ copyResourceToTempFile(testingDatafile).getAbsolutePath()));
+
+ // populate expected model
+ Model expected = new TreeModel();
+ Stream.of("Davide Palmisano", "Michele Mostarda", "Giovanni Tummarello")
+ .map(PeopleExtractor::createPerson).forEach(expected::addAll);
+
+ if (log.isDebugEnabled()) {
+ log.debug("\n\nlog file content:\n{}", FileUtils.readFileToString(logFile, "utf-8"));
+ log.debug("\n\nData file: \n{}", FileUtils.readFileToString(outputFile, "utf-8"));
+ }
+
+ Assert.assertTrue(assertCompareModels(expected, outputFile));
+ }
+
+ /**
+ * Compare expected model and received from input File.
+ */
+ private boolean assertCompareModels(Model expected, File received) throws Exception {
+ Model receivedModel = new TreeModel();
+ receivedModel.addAll(Arrays.asList(RDFUtils.parseRDF(
+ Rio.getParserFormatForFileName(received.getName()).orElseThrow(AssertionError::new),
+ new BufferedInputStream(new FileInputStream(received)),
+ received.toURI().toString()
+ )));
+
+ return receivedModel.containsAll(expected);
+ }
+}
http://git-wip-us.apache.org/repos/asf/any23/blob/692c583f/cli/src/test/java/org/apache/any23/cli/RoverTest.java
----------------------------------------------------------------------
diff --git a/cli/src/test/java/org/apache/any23/cli/RoverTest.java b/cli/src/test/java/org/apache/any23/cli/RoverTest.java
index 15054e4..c2b7a86 100644
--- a/cli/src/test/java/org/apache/any23/cli/RoverTest.java
+++ b/cli/src/test/java/org/apache/any23/cli/RoverTest.java
@@ -90,6 +90,45 @@ public class RoverTest extends ToolTestBase {
Assert.assertEquals(0, graphCounter);
}
+ @Test
+ public void testDelegatingWriterFactory() throws Exception {
+ final File outFile = File.createTempFile("rover-test", "out", tempDirectory);
+ final String DEFAULT_GRAPH = "http://test/default/ns";
+ final String stylesheet = "http://www.w3.org/1999/xhtml/vocab#stylesheet";
+
+ Assert.assertEquals("Unexpected exit code.", 0, runTool(
+ String.format(
+ "-o %s -f nquads %s -d %s",
+ outFile.getAbsolutePath(),
+ copyResourceToTempFile("/cli/basic-with-stylesheet.html").getAbsolutePath(),
+ DEFAULT_GRAPH
+ )
+ ));
+
+ String content = FileUtils.readFileContent(outFile);
+
+ Assert.assertTrue(content.contains(stylesheet));
+
+ final int lineCountWithStylesheet = content.split("\\n").length;
+
+ Assert.assertEquals("Unexpected exit code.", 0, runTool(
+ String.format(
+ "-o %s -f notrivial,nquads %s -d %s",
+ outFile.getAbsolutePath(),
+ copyResourceToTempFile("/cli/basic-with-stylesheet.html").getAbsolutePath(),
+ DEFAULT_GRAPH
+ )
+ ));
+
+ content = FileUtils.readFileContent(outFile);
+
+ Assert.assertTrue(!content.contains(stylesheet));
+
+ final int lineCountWithoutStylesheet = content.split("\\n").length;
+
+ Assert.assertEquals(lineCountWithStylesheet - 1, lineCountWithoutStylesheet);
+ }
+
/* BEGIN: online tests. */
@Test
http://git-wip-us.apache.org/repos/asf/any23/blob/692c583f/cli/src/test/java/org/apache/any23/cli/flows/PeopleExtractor.java
----------------------------------------------------------------------
diff --git a/cli/src/test/java/org/apache/any23/cli/flows/PeopleExtractor.java b/cli/src/test/java/org/apache/any23/cli/flows/PeopleExtractor.java
new file mode 100644
index 0000000..d1f31c0
--- /dev/null
+++ b/cli/src/test/java/org/apache/any23/cli/flows/PeopleExtractor.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except csvModel compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to csvModel writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.any23.cli.flows;
+
+import org.apache.any23.extractor.ExtractionContext;
+import org.apache.any23.vocab.CSV;
+import org.apache.any23.writer.CompositeTripleHandler;
+import org.apache.any23.writer.TripleHandler;
+import org.apache.any23.writer.TripleHandlerException;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.Literal;
+import org.eclipse.rdf4j.model.Model;
+import org.eclipse.rdf4j.model.Resource;
+import org.eclipse.rdf4j.model.Statement;
+import org.eclipse.rdf4j.model.Value;
+import org.eclipse.rdf4j.model.ValueFactory;
+import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
+import org.eclipse.rdf4j.model.impl.TreeModel;
+import org.eclipse.rdf4j.model.util.Models;
+import org.eclipse.rdf4j.model.vocabulary.RDF;
+import org.eclipse.rdf4j.model.vocabulary.XMLSchema;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.lang.invoke.MethodHandles;
+import java.util.Collections;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * Proof of concept for ANY23-396 example.
+ */
+public class PeopleExtractor extends CompositeTripleHandler {
+
+ private Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+ private static final CSV csv = CSV.getInstance();
+ private static final ValueFactory vf = SimpleValueFactory.getInstance();
+ public static final String RAW_NS = "urn:dataser:raw/";
+ private static final IRI RAW_FIRST_NAME = vf.createIRI(RAW_NS, "FirstName");
+ private static final IRI RAW_LAST_NAME = vf.createIRI(RAW_NS, "LastName");
+
+ private static final String NAMESPACE = "http://supercustom.net/ontology/";
+ private static final IRI PERSON = vf.createIRI(NAMESPACE, "Person");
+ private static final IRI FULL_NAME = vf.createIRI(NAMESPACE, "fullName");
+ private static final IRI HASH = vf.createIRI(NAMESPACE, "hash");
+
+ public static Model createPerson(String fullName) {
+ IRI s = vf.createIRI("http://rdf.supercustom.net/data/", DigestUtils.sha1Hex(fullName));
+ Model model = new TreeModel();
+ model.add(s, RDF.TYPE, PERSON);
+ model.add(s, FULL_NAME, vf.createLiteral(fullName));
+ model.add(s, HASH, vf.createLiteral(s.getLocalName(), XMLSchema.HEXBINARY));
+ return model;
+ };
+
+ private final Model csvModel = new TreeModel();
+
+ public PeopleExtractor(TripleHandler delegate) {
+ super(Collections.singletonList(delegate));
+ }
+
+ @Override
+ public void receiveTriple(Resource s, IRI p, Value o, IRI g, ExtractionContext context) throws TripleHandlerException {
+ if ("csv".equals(context.getExtractorName())) {
+ csvModel.add(s, p, o, vf.createIRI(context.getUniqueID()));
+ } else {
+ super.receiveTriple(s, p, o, g, context);
+ }
+ }
+
+ @Override
+ public void closeContext(ExtractionContext context) throws TripleHandlerException {
+ Set<Resource> subjects = csvModel.filter(null, RDF.TYPE, csv.rowType)
+ .stream().map(Statement::getSubject).collect(Collectors.toSet());
+
+ log.debug("List of rows: {}", subjects);
+
+ for (Resource rowId : subjects) {
+ String firstName = Models.objectLiteral(csvModel.filter(rowId, RAW_FIRST_NAME, null))
+ .map(Literal::getLabel).orElse("");
+
+ String lastName = Models.objectLiteral(csvModel.filter(rowId, RAW_LAST_NAME, null))
+ .map(Literal::getLabel).orElse("");
+
+ String fullName = firstName + " " + lastName;
+
+ for (Statement s : createPerson(fullName)) {
+ super.receiveTriple(s.getSubject(), s.getPredicate(), s.getObject(), null, context);
+ }
+ }
+
+ csvModel.clear();
+
+ super.closeContext(context);
+ }
+
+}
\ No newline at end of file