You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by mo...@apache.org on 2012/05/20 18:12:45 UTC
svn commit: r1340761 - in
/incubator/any23/trunk/core/src/main/java/org/apache/any23: Any23.java
cli/MimeDetector.java http/DefaultHTTPClient.java
http/DefaultHTTPClientConfiguration.java io/nquads/NQuadsParser.java
Author: mostarda
Date: Sun May 20 16:12:45 2012
New Revision: 1340761
URL: http://svn.apache.org/viewvc?rev=1340761&view=rev
Log:
Added explicit DefaultHTTPClientConfiguration and replaced with previous anonymous class usage. This is a minor code refactoring.
Added:
incubator/any23/trunk/core/src/main/java/org/apache/any23/http/DefaultHTTPClientConfiguration.java
Modified:
incubator/any23/trunk/core/src/main/java/org/apache/any23/Any23.java
incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/MimeDetector.java
incubator/any23/trunk/core/src/main/java/org/apache/any23/http/DefaultHTTPClient.java
incubator/any23/trunk/core/src/main/java/org/apache/any23/io/nquads/NQuadsParser.java
Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/Any23.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/Any23.java?rev=1340761&r1=1340760&r2=1340761&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/Any23.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/Any23.java Sun May 20 16:12:45 2012
@@ -28,8 +28,8 @@ import org.apache.any23.extractor.Single
import org.apache.any23.extractor.SingleDocumentExtractionReport;
import org.apache.any23.http.AcceptHeaderBuilder;
import org.apache.any23.http.DefaultHTTPClient;
+import org.apache.any23.http.DefaultHTTPClientConfiguration;
import org.apache.any23.http.HTTPClient;
-import org.apache.any23.http.HTTPClientConfiguration;
import org.apache.any23.mime.MIMEType;
import org.apache.any23.mime.MIMETypeDetector;
import org.apache.any23.mime.TikaMIMETypeDetector;
@@ -217,20 +217,7 @@ public class Any23 {
throw new IOException("Must call " + Any23.class.getSimpleName() +
".setHTTPUserAgent(String) before extracting from HTTP URI");
}
- httpClient.init( new HTTPClientConfiguration() {
- public String getUserAgent() {
- return userAgent;
- }
- public String getAcceptHeader() {
- return Any23.this.getAcceptHeader();
- }
- public int getDefaultTimeout() {
- return configuration.getPropertyIntOrFail("any23.http.client.timeout");
- }
- public int getMaxConnections() {
- return configuration.getPropertyIntOrFail("any23.http.client.max.connections");
- }
- } );
+ httpClient.init( new DefaultHTTPClientConfiguration(this.getAcceptHeader()) );
httpClientInitialized = true;
}
return httpClient;
Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/MimeDetector.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/MimeDetector.java?rev=1340761&r1=1340760&r2=1340761&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/MimeDetector.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/MimeDetector.java Sun May 20 16:12:45 2012
@@ -20,10 +20,9 @@ package org.apache.any23.cli;
import com.beust.jcommander.IStringConverter;
import com.beust.jcommander.Parameter;
import com.beust.jcommander.Parameters;
-import org.apache.any23.configuration.DefaultConfiguration;
import org.apache.any23.http.DefaultHTTPClient;
+import org.apache.any23.http.DefaultHTTPClientConfiguration;
import org.apache.any23.http.HTTPClient;
-import org.apache.any23.http.HTTPClientConfiguration;
import org.apache.any23.mime.MIMEType;
import org.apache.any23.mime.MIMETypeDetector;
import org.apache.any23.mime.TikaMIMETypeDetector;
@@ -89,21 +88,7 @@ public class MimeDetector implements Too
}
if (document.matches(URL_DOCUMENT_RE)) {
final HTTPClient client = new DefaultHTTPClient();
- // TODO: anonymous config class also used in Any23. centralize.
- client.init(new HTTPClientConfiguration() {
- public String getUserAgent() {
- return DefaultConfiguration.singleton().getPropertyOrFail("any23.http.user.agent.default");
- }
- public String getAcceptHeader() {
- return "";
- }
- public int getDefaultTimeout() {
- return DefaultConfiguration.singleton().getPropertyIntOrFail("any23.http.client.timeout");
- }
- public int getMaxConnections() {
- return DefaultConfiguration.singleton().getPropertyIntOrFail("any23.http.client.max.connections");
- }
- });
+ client.init( DefaultHTTPClientConfiguration.singleton() );
try {
return new HTTPDocumentSource(client, document);
} catch ( URISyntaxException e ) {
Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/http/DefaultHTTPClient.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/http/DefaultHTTPClient.java?rev=1340761&r1=1340760&r2=1340761&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/http/DefaultHTTPClient.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/http/DefaultHTTPClient.java Sun May 20 16:12:45 2012
@@ -17,7 +17,6 @@
package org.apache.any23.http;
-import org.apache.any23.configuration.DefaultConfiguration;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HostConfiguration;
import org.apache.commons.httpclient.HttpClient;
@@ -57,38 +56,13 @@ public class DefaultHTTPClient implement
private String contentType = null;
/**
- * Creates a default {@link HTTPClientConfiguration} instance.
- *
- * @return a deault configuration.
- */
- public static HTTPClientConfiguration createDefaultConfiguration() {
- return new HTTPClientConfiguration() {
- public String getUserAgent() {
- return DefaultConfiguration.singleton().getPropertyOrFail("any23.http.user.agent.default");
- }
-
- public String getAcceptHeader() {
- return null;
- }
-
- public int getDefaultTimeout() {
- return DefaultConfiguration.singleton().getPropertyIntOrFail("any23.http.client.timeout");
- }
-
- public int getMaxConnections() {
- return DefaultConfiguration.singleton().getPropertyIntOrFail("any23.http.client.max.connections");
- }
- };
- }
-
- /**
* Creates a {@link DefaultHTTPClient} instance already initialized
*
* @return
*/
public static DefaultHTTPClient createInitializedHTTPClient() {
final DefaultHTTPClient defaultHTTPClient = new DefaultHTTPClient();
- defaultHTTPClient.init( createDefaultConfiguration() );
+ defaultHTTPClient.init( DefaultHTTPClientConfiguration.singleton() );
return defaultHTTPClient;
}
Added: incubator/any23/trunk/core/src/main/java/org/apache/any23/http/DefaultHTTPClientConfiguration.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/http/DefaultHTTPClientConfiguration.java?rev=1340761&view=auto
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/http/DefaultHTTPClientConfiguration.java (added)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/http/DefaultHTTPClientConfiguration.java Sun May 20 16:12:45 2012
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.http;
+
+import org.apache.any23.configuration.DefaultConfiguration;
+
+/**
+ * Default implementation of {@link HTTPClientConfiguration}.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public class DefaultHTTPClientConfiguration implements HTTPClientConfiguration {
+
+ private static DefaultHTTPClientConfiguration instance;
+
+ public static DefaultHTTPClientConfiguration singleton() {
+ if(instance == null) {
+ instance = new DefaultHTTPClientConfiguration();
+ }
+ return instance;
+ }
+
+ private String userAgent;
+ private int defaultTimeout;
+ private int maxConnections;
+ private String acceptHeader;
+
+ /**
+ * Constructor.
+ *
+ * @param userAgent the user agent descriptor string.
+ * @param defaultTimeout the default timeout, cannot be <code><&eq to 0</code>
+ * @param maxConnections the default max connections, cannot be <code><&eq to 0</code>
+ * @param acceptHeader the accept header string, can be <code>null</code>.
+ */
+ public DefaultHTTPClientConfiguration(
+ String userAgent, int defaultTimeout, int maxConnections, String acceptHeader
+ ) {
+ if(userAgent == null) throw new IllegalArgumentException("userAgent cannot be null.");
+ if(defaultTimeout <= 0) throw new IllegalArgumentException("defaultTimeout cannot be <= 0 .");
+ if(maxConnections <= 0) throw new IllegalArgumentException("maxConnections cannot be <= 0 .");
+ this.userAgent = userAgent;
+ this.defaultTimeout = defaultTimeout;
+ this.maxConnections = maxConnections;
+ this.acceptHeader = acceptHeader;
+ }
+
+ /**
+ * Constructor.
+ * initialized with default {@link DefaultConfiguration} parameters
+ *
+ * @param acceptHeader the value to initialize <code>acceptHeader</code>.
+ */
+ public DefaultHTTPClientConfiguration(String acceptHeader) {
+ this(
+ DefaultConfiguration.singleton().getPropertyOrFail ("any23.http.user.agent.default"),
+ DefaultConfiguration.singleton().getPropertyIntOrFail("any23.http.client.timeout"),
+ DefaultConfiguration.singleton().getPropertyIntOrFail("any23.http.client.max.connections"),
+ acceptHeader
+ );
+ }
+
+ /**
+ * Constructor.
+ * initialized with default {@link DefaultConfiguration} parameters and <code>acceptHeader=null</>.
+ */
+ public DefaultHTTPClientConfiguration() {
+ this(null);
+ }
+
+ public String getUserAgent() {
+ return userAgent;
+ }
+
+ public int getDefaultTimeout() {
+ return defaultTimeout;
+ }
+
+ public int getMaxConnections() {
+ return maxConnections;
+ }
+
+ public String getAcceptHeader() {
+ return acceptHeader;
+ }
+
+}
\ No newline at end of file
Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/io/nquads/NQuadsParser.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/io/nquads/NQuadsParser.java?rev=1340761&r1=1340760&r2=1340761&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/io/nquads/NQuadsParser.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/io/nquads/NQuadsParser.java Sun May 20 16:12:45 2012
@@ -470,7 +470,7 @@ public class NQuadsParser extends RDFPar
* @return the literal attribute.
* @throws IOException
*/
- private LiteralAttribute parseLiteralAttribute(BufferedReader br) throws IOException {
+ private LiteralAttribute parseLiteralAttribute(BufferedReader br) throws IOException, RDFParseException {
char c = readChar(br);
if(c != '^' && c != '@') {
reset(br);
@@ -483,29 +483,47 @@ public class NQuadsParser extends RDFPar
assertChar(br, '^');
}
- // Consuming eventual open URI.
- mark(br);
- c = readChar(br);
- if(c != '<') {
+ final String attribute;
+ if (isLang) {
+ StringBuilder sb = new StringBuilder();
+ while (true) {
+ c = readChar(br);
+ if (c != ' ' && c != '<') {
+ mark(br);
+ sb.append(c);
+ } else {
+ break;
+ }
+ }
reset(br);
+ attribute = sb.toString();
+ } else {
+ attribute = parseURI(br).toString();
}
- StringBuilder sb = new StringBuilder();
- while(true) {
- c = readChar(br);
- if(c == '>') {
- mark(br);
- continue;
- }
- if(c != ' ' && c != '<') {
- mark(br);
- sb.append(c);
- } else {
- break;
- }
- }
- reset(br);
- return new LiteralAttribute( isLang, sb.toString() );
+// // Consuming eventual open URI.
+// mark(br);
+// c = readChar(br);
+// if(c != '<') {
+// reset(br);
+// }
+//
+// StringBuilder sb = new StringBuilder();
+// while(true) {
+// c = readChar(br);
+// if(c == '>') {
+// mark(br);
+// continue;
+// }
+// if(c != ' ' && c != '<') {
+// mark(br);
+// sb.append(c);
+// } else {
+// break;
+// }
+// }
+// reset(br);
+ return new LiteralAttribute( isLang, attribute);
}
/**