You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by mo...@apache.org on 2012/05/20 18:12:45 UTC

svn commit: r1340761 - in /incubator/any23/trunk/core/src/main/java/org/apache/any23: Any23.java cli/MimeDetector.java http/DefaultHTTPClient.java http/DefaultHTTPClientConfiguration.java io/nquads/NQuadsParser.java

Author: mostarda
Date: Sun May 20 16:12:45 2012
New Revision: 1340761

URL: http://svn.apache.org/viewvc?rev=1340761&view=rev
Log:
Added explicit DefaultHTTPClientConfiguration and replaced with previous anonymous class usage. This is a minor code refactoring.

Added:
    incubator/any23/trunk/core/src/main/java/org/apache/any23/http/DefaultHTTPClientConfiguration.java
Modified:
    incubator/any23/trunk/core/src/main/java/org/apache/any23/Any23.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/MimeDetector.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/http/DefaultHTTPClient.java
    incubator/any23/trunk/core/src/main/java/org/apache/any23/io/nquads/NQuadsParser.java

Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/Any23.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/Any23.java?rev=1340761&r1=1340760&r2=1340761&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/Any23.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/Any23.java Sun May 20 16:12:45 2012
@@ -28,8 +28,8 @@ import org.apache.any23.extractor.Single
 import org.apache.any23.extractor.SingleDocumentExtractionReport;
 import org.apache.any23.http.AcceptHeaderBuilder;
 import org.apache.any23.http.DefaultHTTPClient;
+import org.apache.any23.http.DefaultHTTPClientConfiguration;
 import org.apache.any23.http.HTTPClient;
-import org.apache.any23.http.HTTPClientConfiguration;
 import org.apache.any23.mime.MIMEType;
 import org.apache.any23.mime.MIMETypeDetector;
 import org.apache.any23.mime.TikaMIMETypeDetector;
@@ -217,20 +217,7 @@ public class Any23 {
                 throw new IOException("Must call " + Any23.class.getSimpleName() +
                         ".setHTTPUserAgent(String) before extracting from HTTP URI");
             }
-            httpClient.init( new HTTPClientConfiguration() {
-                public String getUserAgent() {
-                    return userAgent;
-                }
-                public String getAcceptHeader() {
-                    return Any23.this.getAcceptHeader();
-                }
-                public int getDefaultTimeout() {
-                    return configuration.getPropertyIntOrFail("any23.http.client.timeout");
-                }
-                public int getMaxConnections() {
-                    return configuration.getPropertyIntOrFail("any23.http.client.max.connections");
-                }
-            } );
+            httpClient.init( new DefaultHTTPClientConfiguration(this.getAcceptHeader()) );
             httpClientInitialized = true;
         }
         return httpClient;

Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/MimeDetector.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/MimeDetector.java?rev=1340761&r1=1340760&r2=1340761&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/MimeDetector.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/MimeDetector.java Sun May 20 16:12:45 2012
@@ -20,10 +20,9 @@ package org.apache.any23.cli;
 import com.beust.jcommander.IStringConverter;
 import com.beust.jcommander.Parameter;
 import com.beust.jcommander.Parameters;
-import org.apache.any23.configuration.DefaultConfiguration;
 import org.apache.any23.http.DefaultHTTPClient;
+import org.apache.any23.http.DefaultHTTPClientConfiguration;
 import org.apache.any23.http.HTTPClient;
-import org.apache.any23.http.HTTPClientConfiguration;
 import org.apache.any23.mime.MIMEType;
 import org.apache.any23.mime.MIMETypeDetector;
 import org.apache.any23.mime.TikaMIMETypeDetector;
@@ -89,21 +88,7 @@ public class MimeDetector implements Too
             }
             if (document.matches(URL_DOCUMENT_RE)) {
                 final HTTPClient client = new DefaultHTTPClient();
-                // TODO: anonymous config class also used in Any23. centralize.
-                client.init(new HTTPClientConfiguration() {
-                    public String getUserAgent() {
-                        return DefaultConfiguration.singleton().getPropertyOrFail("any23.http.user.agent.default");
-                    }
-                    public String getAcceptHeader() {
-                        return "";
-                    }
-                    public int getDefaultTimeout() {
-                        return DefaultConfiguration.singleton().getPropertyIntOrFail("any23.http.client.timeout");
-                    }
-                    public int getMaxConnections() {
-                        return DefaultConfiguration.singleton().getPropertyIntOrFail("any23.http.client.max.connections");
-                    }
-                });
+                client.init( DefaultHTTPClientConfiguration.singleton() );
                 try {
                     return new HTTPDocumentSource(client, document);
                 } catch ( URISyntaxException e ) {

Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/http/DefaultHTTPClient.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/http/DefaultHTTPClient.java?rev=1340761&r1=1340760&r2=1340761&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/http/DefaultHTTPClient.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/http/DefaultHTTPClient.java Sun May 20 16:12:45 2012
@@ -17,7 +17,6 @@
 
 package org.apache.any23.http;
 
-import org.apache.any23.configuration.DefaultConfiguration;
 import org.apache.commons.httpclient.Header;
 import org.apache.commons.httpclient.HostConfiguration;
 import org.apache.commons.httpclient.HttpClient;
@@ -57,38 +56,13 @@ public class DefaultHTTPClient implement
     private String contentType = null;
 
     /**
-     * Creates a default {@link HTTPClientConfiguration} instance.
-     *
-     * @return a deault configuration.
-     */
-    public static HTTPClientConfiguration createDefaultConfiguration() {
-        return new HTTPClientConfiguration() {
-            public String getUserAgent() {
-                return DefaultConfiguration.singleton().getPropertyOrFail("any23.http.user.agent.default");
-            }
-
-            public String getAcceptHeader() {
-                return null;
-            }
-
-            public int getDefaultTimeout() {
-                return DefaultConfiguration.singleton().getPropertyIntOrFail("any23.http.client.timeout");
-            }
-
-            public int getMaxConnections() {
-                return DefaultConfiguration.singleton().getPropertyIntOrFail("any23.http.client.max.connections");
-            }
-        };
-    }
-
-    /**
      * Creates a {@link DefaultHTTPClient} instance already initialized
      *
      * @return
      */
     public static DefaultHTTPClient createInitializedHTTPClient() {
         final DefaultHTTPClient defaultHTTPClient = new DefaultHTTPClient();
-        defaultHTTPClient.init( createDefaultConfiguration() );
+        defaultHTTPClient.init( DefaultHTTPClientConfiguration.singleton() );
         return defaultHTTPClient;
     }
 

Added: incubator/any23/trunk/core/src/main/java/org/apache/any23/http/DefaultHTTPClientConfiguration.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/http/DefaultHTTPClientConfiguration.java?rev=1340761&view=auto
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/http/DefaultHTTPClientConfiguration.java (added)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/http/DefaultHTTPClientConfiguration.java Sun May 20 16:12:45 2012
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.http;
+
+import org.apache.any23.configuration.DefaultConfiguration;
+
+/**
+ * Default implementation of {@link HTTPClientConfiguration}.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public class DefaultHTTPClientConfiguration implements HTTPClientConfiguration {
+
+    private static DefaultHTTPClientConfiguration instance;
+
+    public static DefaultHTTPClientConfiguration singleton() {
+        if(instance == null) {
+            instance = new DefaultHTTPClientConfiguration();
+        }
+        return instance;
+    }
+
+    private String userAgent;
+    private int    defaultTimeout;
+    private int    maxConnections;
+    private String acceptHeader;
+
+    /**
+     * Constructor.
+     *
+     * @param userAgent the user agent descriptor string.
+     * @param defaultTimeout the default timeout, cannot be <code>&lt&eq to 0</code>
+     * @param maxConnections the default max connections, cannot be <code>&lt&eq to 0</code>
+     * @param acceptHeader the accept header string, can be <code>null</code>.
+     */
+    public DefaultHTTPClientConfiguration(
+            String userAgent, int defaultTimeout, int maxConnections, String acceptHeader
+    ) {
+        if(userAgent == null)   throw new IllegalArgumentException("userAgent cannot be null.");
+        if(defaultTimeout <= 0) throw new IllegalArgumentException("defaultTimeout cannot be <= 0 .");
+        if(maxConnections <= 0) throw new IllegalArgumentException("maxConnections cannot be <= 0 .");
+        this.userAgent      = userAgent;
+        this.defaultTimeout = defaultTimeout;
+        this.maxConnections = maxConnections;
+        this.acceptHeader   = acceptHeader;
+    }
+
+    /**
+     * Constructor.
+     * initialized with default {@link DefaultConfiguration} parameters
+     *
+     * @param acceptHeader the value to initialize <code>acceptHeader</code>.
+     */
+    public DefaultHTTPClientConfiguration(String acceptHeader) {
+        this(
+                DefaultConfiguration.singleton().getPropertyOrFail   ("any23.http.user.agent.default"),
+                DefaultConfiguration.singleton().getPropertyIntOrFail("any23.http.client.timeout"),
+                DefaultConfiguration.singleton().getPropertyIntOrFail("any23.http.client.max.connections"),
+                acceptHeader
+        );
+    }
+
+    /**
+     * Constructor.
+     * initialized with default {@link DefaultConfiguration} parameters and <code>acceptHeader=null</>.
+     */
+    public DefaultHTTPClientConfiguration() {
+        this(null);
+    }
+
+    public String getUserAgent() {
+        return userAgent;
+    }
+
+    public int getDefaultTimeout() {
+        return defaultTimeout;
+    }
+
+    public int getMaxConnections() {
+        return maxConnections;
+    }
+
+    public String getAcceptHeader() {
+        return acceptHeader;
+    }
+
+}
\ No newline at end of file

Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/io/nquads/NQuadsParser.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/io/nquads/NQuadsParser.java?rev=1340761&r1=1340760&r2=1340761&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/io/nquads/NQuadsParser.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/io/nquads/NQuadsParser.java Sun May 20 16:12:45 2012
@@ -470,7 +470,7 @@ public class NQuadsParser extends RDFPar
      * @return the literal attribute.
      * @throws IOException
      */
-    private LiteralAttribute parseLiteralAttribute(BufferedReader br) throws IOException {
+    private LiteralAttribute parseLiteralAttribute(BufferedReader br) throws IOException, RDFParseException {
         char c = readChar(br);
         if(c != '^' && c != '@') {
             reset(br);
@@ -483,29 +483,47 @@ public class NQuadsParser extends RDFPar
             assertChar(br, '^');
         }
 
-        // Consuming eventual open URI.
-        mark(br);
-        c = readChar(br);
-        if(c != '<') {
+        final String attribute;
+        if (isLang) {
+            StringBuilder sb = new StringBuilder();
+            while (true) {
+                c = readChar(br);
+                if (c != ' ' && c != '<') {
+                    mark(br);
+                    sb.append(c);
+                } else {
+                    break;
+                }
+            }
             reset(br);
+            attribute = sb.toString();
+        } else {
+            attribute = parseURI(br).toString();
         }
 
-        StringBuilder sb = new StringBuilder();
-        while(true) {
-            c = readChar(br);
-            if(c == '>') {
-                mark(br);
-                continue;
-            }
-            if(c != ' ' && c != '<') {
-                mark(br);
-                sb.append(c);
-            } else {
-                break;
-            }
-        }
-        reset(br);
-        return new LiteralAttribute( isLang, sb.toString() );
+//        // Consuming eventual open URI.
+//        mark(br);
+//        c = readChar(br);
+//        if(c != '<') {
+//            reset(br);
+//        }
+//
+//        StringBuilder sb = new StringBuilder();
+//        while(true) {
+//            c = readChar(br);
+//            if(c == '>') {
+//                mark(br);
+//                continue;
+//            }
+//            if(c != ' ' && c != '<') {
+//                mark(br);
+//                sb.append(c);
+//            } else {
+//                break;
+//            }
+//        }
+//        reset(br);
+        return new LiteralAttribute( isLang, attribute);
     }
 
     /**