You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@labs.apache.org by th...@apache.org on 2007/02/21 02:17:53 UTC

svn commit: r509854 - /labs/droids/src/plugins/protocol-http/src/java/org/apache/droids/protocol/http/Http.java

Author: thorsten
Date: Tue Feb 20 17:17:53 2007
New Revision: 509854

URL: http://svn.apache.org/viewvc?view=rev&rev=509854
Log:
Allowing to identify our crawler in the protocol.

Modified:
    labs/droids/src/plugins/protocol-http/src/java/org/apache/droids/protocol/http/Http.java

Modified: labs/droids/src/plugins/protocol-http/src/java/org/apache/droids/protocol/http/Http.java
URL: http://svn.apache.org/viewvc/labs/droids/src/plugins/protocol-http/src/java/org/apache/droids/protocol/http/Http.java?view=diff&rev=509854&r1=509853&r2=509854
==============================================================================
--- labs/droids/src/plugins/protocol-http/src/java/org/apache/droids/protocol/http/Http.java (original)
+++ labs/droids/src/plugins/protocol-http/src/java/org/apache/droids/protocol/http/Http.java Tue Feb 20 17:17:53 2007
@@ -26,36 +26,62 @@
 import org.apache.commons.logging.LogFactory;
 import org.apache.droids.api.Protocol;
 import org.apache.droids.conf.Configuration;
+import org.apache.droids.protocol.MediaType;
 
 public class Http implements Protocol {
     public static final Log LOG = LogFactory.getLog(Http.class);
+
     private Configuration conf;
+
+    private String userAgent;
+
+    private int timeout;
+
+    private String from;
+
+    private String refer;
+
     public Http() {
-      }
+    }
+
     public String getContentType(String url) {
         URL source;
         HttpURLConnection urlc = null;
         try {
             source = new URL(url);
-            urlc = (HttpURLConnection) source.openConnection();
-            return urlc.getContentType();
+            urlc = prepareConnection(source);
+            final String contentType = urlc.getContentType();
+            urlc.disconnect();
+            String [] types = contentType.split(";");
+            for (int i = 0; i < types.length; i++) {
+                if (MediaType.isContentType(types[i]))
+                    return types[i];
+            }
+            return contentType;
         } catch (MalformedURLException e) {
             // TODO Auto-generated catch block
             e.printStackTrace();
         } catch (IOException e) {
             // TODO Auto-generated catch block
             e.printStackTrace();
-        }finally{
-            if(urlc!=null)urlc.disconnect();
+        } finally {
+            if (urlc != null)
+                urlc.disconnect();
         }
         return null;
     }
 
     public InputStream openStream(String url) throws IOException {
         URL source;
+        HttpURLConnection urlc = null;
         try {
             source = new URL(url);
-            return source.openStream();
+            urlc = prepareConnection(source);
+            // that returns 400 errors
+            //final InputStream openStream = urlc.getInputStream();
+            final InputStream openStream =source.openStream();
+            urlc.disconnect();
+            return openStream;
         } catch (MalformedURLException e) {
             // TODO Auto-generated catch block
             e.printStackTrace();
@@ -65,11 +91,67 @@
         }
         return null;
     }
+
+    private HttpURLConnection prepareConnection(URL source) throws IOException {
+        HttpURLConnection urlc;
+        urlc = (HttpURLConnection) source.openConnection();
+        urlc.setRequestProperty("User-Agent", userAgent);
+        urlc.addRequestProperty("From", from);
+        urlc.setRequestProperty("Referer", refer);
+        urlc.setConnectTimeout(timeout);
+        return urlc;
+    }
+
     public Configuration getConf() {
         return conf;
     }
 
     public void setConf(Configuration conf) {
-        this.conf=conf;
+        this.conf = conf;
+        this.from=conf.get("http.agent.email");
+        this.refer=conf.get("http.agent.url");
+        this.userAgent = getAgentString(conf.get("http.agent.name"), conf
+                .get("http.agent.version"), conf.get("http.agent.description"),
+                conf.get("http.agent.url"), from);
+        this.timeout = conf.getInt("http.timeout", 10000);
+    }
+
+    private static String getAgentString(String agentName, String agentVersion,
+            String agentDesc, String agentURL, String agentEmail) {
+
+        if ((agentName == null) || (agentName.trim().length() == 0)) {
+
+            StringBuffer buf = new StringBuffer();
+
+            buf.append(agentName);
+            if (agentVersion != null) {
+                buf.append("/");
+                buf.append(agentVersion);
+            }
+            if (((agentDesc != null) && (agentDesc.length() != 0))
+                    || ((agentEmail != null) && (agentEmail.length() != 0))
+                    || ((agentURL != null) && (agentURL.length() != 0))) {
+                buf.append(" (");
+
+                if ((agentDesc != null) && (agentDesc.length() != 0)) {
+                    buf.append(agentDesc);
+                    if ((agentURL != null) || (agentEmail != null))
+                        buf.append("; ");
+                }
+
+                if ((agentURL != null) && (agentURL.length() != 0)) {
+                    buf.append(agentURL);
+                    if (agentEmail != null)
+                        buf.append("; ");
+                }
+
+                if ((agentEmail != null) && (agentEmail.length() != 0))
+                    buf.append(agentEmail);
+
+                buf.append(")");
+            }
+            return buf.toString();
+        } else
+            return null;
     }
 }



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@labs.apache.org
For additional commands, e-mail: commits-help@labs.apache.org