You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@labs.apache.org by th...@apache.org on 2007/02/21 02:17:53 UTC
svn commit: r509854 -
/labs/droids/src/plugins/protocol-http/src/java/org/apache/droids/protocol/http/Http.java
Author: thorsten
Date: Tue Feb 20 17:17:53 2007
New Revision: 509854
URL: http://svn.apache.org/viewvc?view=rev&rev=509854
Log:
Allowing to identify our crawler in the protocol.
Modified:
labs/droids/src/plugins/protocol-http/src/java/org/apache/droids/protocol/http/Http.java
Modified: labs/droids/src/plugins/protocol-http/src/java/org/apache/droids/protocol/http/Http.java
URL: http://svn.apache.org/viewvc/labs/droids/src/plugins/protocol-http/src/java/org/apache/droids/protocol/http/Http.java?view=diff&rev=509854&r1=509853&r2=509854
==============================================================================
--- labs/droids/src/plugins/protocol-http/src/java/org/apache/droids/protocol/http/Http.java (original)
+++ labs/droids/src/plugins/protocol-http/src/java/org/apache/droids/protocol/http/Http.java Tue Feb 20 17:17:53 2007
@@ -26,36 +26,62 @@
import org.apache.commons.logging.LogFactory;
import org.apache.droids.api.Protocol;
import org.apache.droids.conf.Configuration;
+import org.apache.droids.protocol.MediaType;
public class Http implements Protocol {
public static final Log LOG = LogFactory.getLog(Http.class);
+
private Configuration conf;
+
+ private String userAgent;
+
+ private int timeout;
+
+ private String from;
+
+ private String refer;
+
public Http() {
- }
+ }
+
public String getContentType(String url) {
URL source;
HttpURLConnection urlc = null;
try {
source = new URL(url);
- urlc = (HttpURLConnection) source.openConnection();
- return urlc.getContentType();
+ urlc = prepareConnection(source);
+ final String contentType = urlc.getContentType();
+ urlc.disconnect();
+ String [] types = contentType.split(";");
+ for (int i = 0; i < types.length; i++) {
+ if (MediaType.isContentType(types[i]))
+ return types[i];
+ }
+ return contentType;
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
- }finally{
- if(urlc!=null)urlc.disconnect();
+ } finally {
+ if (urlc != null)
+ urlc.disconnect();
}
return null;
}
public InputStream openStream(String url) throws IOException {
URL source;
+ HttpURLConnection urlc = null;
try {
source = new URL(url);
- return source.openStream();
+ urlc = prepareConnection(source);
+ // that returns 400 errors
+ //final InputStream openStream = urlc.getInputStream();
+ final InputStream openStream =source.openStream();
+ urlc.disconnect();
+ return openStream;
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
@@ -65,11 +91,67 @@
}
return null;
}
+
+ private HttpURLConnection prepareConnection(URL source) throws IOException {
+ HttpURLConnection urlc;
+ urlc = (HttpURLConnection) source.openConnection();
+ urlc.setRequestProperty("User-Agent", userAgent);
+ urlc.addRequestProperty("From", from);
+ urlc.setRequestProperty("Referer", refer);
+ urlc.setConnectTimeout(timeout);
+ return urlc;
+ }
+
public Configuration getConf() {
return conf;
}
public void setConf(Configuration conf) {
- this.conf=conf;
+ this.conf = conf;
+ this.from=conf.get("http.agent.email");
+ this.refer=conf.get("http.agent.url");
+ this.userAgent = getAgentString(conf.get("http.agent.name"), conf
+ .get("http.agent.version"), conf.get("http.agent.description"),
+ conf.get("http.agent.url"), from);
+ this.timeout = conf.getInt("http.timeout", 10000);
+ }
+
+ private static String getAgentString(String agentName, String agentVersion,
+ String agentDesc, String agentURL, String agentEmail) {
+
+ if ((agentName == null) || (agentName.trim().length() == 0)) {
+
+ StringBuffer buf = new StringBuffer();
+
+ buf.append(agentName);
+ if (agentVersion != null) {
+ buf.append("/");
+ buf.append(agentVersion);
+ }
+ if (((agentDesc != null) && (agentDesc.length() != 0))
+ || ((agentEmail != null) && (agentEmail.length() != 0))
+ || ((agentURL != null) && (agentURL.length() != 0))) {
+ buf.append(" (");
+
+ if ((agentDesc != null) && (agentDesc.length() != 0)) {
+ buf.append(agentDesc);
+ if ((agentURL != null) || (agentEmail != null))
+ buf.append("; ");
+ }
+
+ if ((agentURL != null) && (agentURL.length() != 0)) {
+ buf.append(agentURL);
+ if (agentEmail != null)
+ buf.append("; ");
+ }
+
+ if ((agentEmail != null) && (agentEmail.length() != 0))
+ buf.append(agentEmail);
+
+ buf.append(")");
+ }
+ return buf.toString();
+ } else
+ return null;
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@labs.apache.org
For additional commands, e-mail: commits-help@labs.apache.org