You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@oodt.apache.org by bf...@apache.org on 2011/06/03 01:26:29 UTC

svn commit: r1130871 - in /oodt/branches/protocol/protocol-http/src: main/java/org/apache/oodt/cas/protocol/http/ main/java/org/apache/oodt/cas/protocol/http/util/ test/org/apache/oodt/cas/protocol/http/util/

Author: bfoster
Date: Thu Jun  2 23:26:28 2011
New Revision: 1130871

URL: http://svn.apache.org/viewvc?rev=1130871&view=rev
Log:

- refactoring to better support unit-testing

-----------------
OODT-194

Modified:
    oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/HttpProtocol.java
    oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/util/HttpUtils.java
    oodt/branches/protocol/protocol-http/src/test/org/apache/oodt/cas/protocol/http/util/TestHttpUtils.java

Modified: oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/HttpProtocol.java
URL: http://svn.apache.org/viewvc/oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/HttpProtocol.java?rev=1130871&r1=1130870&r2=1130871&view=diff
==============================================================================
--- oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/HttpProtocol.java (original)
+++ oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/HttpProtocol.java Thu Jun  2 23:26:28 2011
@@ -42,9 +42,11 @@ import java.io.OutputStream;
 import java.net.HttpURLConnection;
 import java.net.URI;
 import java.net.URL;
+import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.LinkedList;
 import java.util.List;
+import java.util.Map;
 import java.util.Scanner;
 import java.util.StringTokenizer;
 import java.util.regex.Matcher;
@@ -67,7 +69,7 @@ public class HttpProtocol implements Pro
 
   static String IGNORE = "ignore";
 
-  static HashMap<String, LinkedList<ProtocolFile>> linkChildren = new HashMap<String, LinkedList<ProtocolFile>>();
+  static Map<String, List<HttpFile>> linkChildren = new HashMap<String, List<HttpFile>>();
 
   static boolean takeAllFiles = true;
 
@@ -170,7 +172,11 @@ public class HttpProtocol implements Pro
   }
 
   public List<ProtocolFile> ls() throws ProtocolException {
-    return parseLink(currentFile);
+  	List<ProtocolFile> lsResults = new ArrayList<ProtocolFile>();
+  	for (HttpFile file : parseLink(currentFile)) {
+  		lsResults.add(file);
+  	}
+    return lsResults;
   }
 
   public ProtocolFile pwd() throws ProtocolException {
@@ -186,9 +192,9 @@ public class HttpProtocol implements Pro
     return this.isConnected;
   }
 
-  public LinkedList<ProtocolFile> parseLink(HttpFile file)
+  public List<HttpFile> parseLink(HttpFile file)
       throws ProtocolException {
-    LinkedList<ProtocolFile> children = linkChildren.get(file.getLink()
+    List<HttpFile> children = linkChildren.get(file.getLink()
         .toString());
     if (file.isDir() && children == null) {
       try {
@@ -202,13 +208,8 @@ public class HttpProtocol implements Pro
         }
 
         // Find links in URL.
-        List<Link> links = HttpUtils.findLinks(conn);
-        
-        // Convert links to HttpFiles.
-        children = new LinkedList<ProtocolFile>();
-        for (Link link : links) {
-          children.add(HttpUtils.toHttpFile(link, file));
-        }
+        children = new LinkedList<HttpFile>();
+        children.addAll(HttpUtils.findLinks(file));
         
         // Save children links found.
         linkChildren.put(file.getLink().toString(), children);
@@ -301,12 +302,12 @@ public class HttpProtocol implements Pro
       if (st.hasMoreTokens()) {
         do {
           String token = st.nextToken();
-          LinkedList<ProtocolFile> children = this.parseLink(curPath);
-          for (ProtocolFile pFile : children) {
+          List<HttpFile> children = this.parseLink(curPath);
+          for (HttpFile pFile : children) {
             if (pFile.getName().equals(token)) {
               // System.out.println("token " + token + " " +
               // pFile);
-              curPath = (HttpFile) pFile;
+              curPath = pFile;
               continue;
             }
           }

Modified: oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/util/HttpUtils.java
URL: http://svn.apache.org/viewvc/oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/util/HttpUtils.java?rev=1130871&r1=1130870&r2=1130871&view=diff
==============================================================================
--- oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/util/HttpUtils.java (original)
+++ oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/util/HttpUtils.java Thu Jun  2 23:26:28 2011
@@ -19,12 +19,16 @@ package org.apache.oodt.cas.protocol.htt
 //JDK imports
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.net.HttpURLConnection;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.net.URL;
+import java.util.ArrayList;
 import java.util.List;
 import java.util.Scanner;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 //APACHE imports
 import org.apache.commons.lang.Validate;
@@ -36,6 +40,7 @@ import org.apache.tika.parser.ParseConte
 import org.apache.tika.parser.html.HtmlParser;
 import org.apache.tika.sax.Link;
 import org.apache.tika.sax.LinkContentHandler;
+import org.apache.tika.sax.XHTMLContentHandler;
 import org.xml.sax.SAXException;
 
 /**
@@ -62,7 +67,9 @@ public class HttpUtils {
 	public static URI resolveUri(URI base, String path) throws URISyntaxException {
 		Validate.notNull(base, "base URI must not be NULL");
 		Validate.notNull(path, "resolve path must not be NULL");
-		if (path.startsWith("/")) {
+		if (path.startsWith("http://")) {
+			return new URI(path);
+		} else if (path.startsWith("/")) {
 			return new URI(base.getScheme() + "://" + base.getHost() + path);
 		} else {
 			if (base.toString().endsWith("/")) {
@@ -96,13 +103,18 @@ public class HttpUtils {
     return sb.toString();
   }
 
-	public static List<Link> findLinks(HttpURLConnection conn) throws IOException, SAXException, TikaException {
-    LinkContentHandler handler = new LinkContentHandler();
-    
-    new HtmlParser().parse(new ByteArrayInputStream(HttpUtils.readUrl(conn).getBytes()),
-        handler, new Metadata(), new ParseContext());
-    
-    return handler.getLinks();
+	public static List<HttpFile> findLinks(HttpFile file) throws IOException, URISyntaxException {
+		// Pattern looking for <a href="(group-1)"/>(group-2)</a>
+		Pattern linkPattern = Pattern.compile("<\\s*a\\s+href\\s*=\\s*\"(.+?)\"\\s*>(.+?)<\\s*/\\s*a\\s*>"); 
+		Matcher matcher = linkPattern.matcher(HttpUtils.readUrl(connect(file.getLink())));
+		List<HttpFile> httpFiles = new ArrayList<HttpFile>();
+		while (matcher.find()) {
+			String link = matcher.group(1);
+			String virtualPath = matcher.group(2);
+			URL url = resolveUri(file.getLink().toURI(), link).toURL();
+			httpFiles.add(new HttpFile(link, isDirectory(url, virtualPath), url, file));
+		}
+		return httpFiles;
 	}
 	
 	public static HttpFile toHttpFile(Link link, HttpFile parent) throws IOException {

Modified: oodt/branches/protocol/protocol-http/src/test/org/apache/oodt/cas/protocol/http/util/TestHttpUtils.java
URL: http://svn.apache.org/viewvc/oodt/branches/protocol/protocol-http/src/test/org/apache/oodt/cas/protocol/http/util/TestHttpUtils.java?rev=1130871&r1=1130870&r2=1130871&view=diff
==============================================================================
--- oodt/branches/protocol/protocol-http/src/test/org/apache/oodt/cas/protocol/http/util/TestHttpUtils.java (original)
+++ oodt/branches/protocol/protocol-http/src/test/org/apache/oodt/cas/protocol/http/util/TestHttpUtils.java Thu Jun  2 23:26:28 2011
@@ -23,6 +23,12 @@ import java.net.MalformedURLException;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.net.URL;
+import java.util.List;
+
+import org.apache.oodt.cas.protocol.http.HttpFile;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.sax.Link;
+import org.xml.sax.SAXException;
 
 //JUnits imports
 import junit.framework.TestCase;
@@ -34,7 +40,11 @@ import junit.framework.TestCase;
  */
 public class TestHttpUtils extends TestCase {
 	
-	private static final String URL_OF_THIS_TEST = "http://svn.apache.org/repos/asf/oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/util/TestHttpUtils.java";
+	private static final String APACHE_SVN_SITE = "http://svn.apache.org";
+	
+	private static final String PROTOCOL_HTTP_SVN_LOC = "/repos/asf/oodt/branches/protocol/protocol-http";
+	private static final String PARENT_URL_OF_THIS_TEST = PROTOCOL_HTTP_SVN_LOC + "/src/test/org/apache/oodt/cas/protocol/http/util";
+	private static final String URL_OF_THIS_TEST = PARENT_URL_OF_THIS_TEST + "/TestHttpUtils.java";
 	
 	public void testResolveUri() throws URISyntaxException {
 		URI baseUri = new URI("http://localhost/base/directory/");
@@ -53,11 +63,33 @@ public class TestHttpUtils extends TestC
 	}
 	
 	public void testConnectUrl() throws MalformedURLException, IOException {
-		HttpURLConnection conn = HttpUtils.connect(new URL(URL_OF_THIS_TEST));
+		HttpURLConnection conn = HttpUtils.connect(new URL(APACHE_SVN_SITE + URL_OF_THIS_TEST));
 		assertNotSame(0, conn.getDate());
 		String urlText = HttpUtils.readUrl(conn);
 		assertTrue(urlText.contains("public class TestHttpUtils extends TestCase {"));
 		conn.disconnect();
 	}
+	
+	public void testRedirector() throws MalformedURLException {
+		URL url = new URL("http://localhost:80");
+		URL redirectedURL = new URL("http://localhost:8080");
+		assertFalse(HttpUtils.checkForRedirection(url, url));
+		assertTrue(HttpUtils.checkForRedirection(url, redirectedURL));
+	}
 
+	public void testFindLinks() throws MalformedURLException, IOException, URISyntaxException {
+		URL url = new URL(APACHE_SVN_SITE + PARENT_URL_OF_THIS_TEST);
+		HttpFile parent = new HttpFile(PARENT_URL_OF_THIS_TEST, true, url, null);
+		HttpURLConnection conn = HttpUtils.connect(url);
+		System.out.println(HttpUtils.readUrl(conn));
+		List<HttpFile> httpFiles = HttpUtils.findLinks(parent);
+		boolean foundThisTest = false;
+		for (HttpFile httpFile : httpFiles) {
+			if (httpFile.getName().equals("TestHttpUtils.java")) {
+				foundThisTest = true;
+				break;
+			}
+		}
+		assertTrue(foundThisTest);
+	}
 }