You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@oodt.apache.org by bf...@apache.org on 2011/06/03 01:26:29 UTC
svn commit: r1130871 - in /oodt/branches/protocol/protocol-http/src:
main/java/org/apache/oodt/cas/protocol/http/
main/java/org/apache/oodt/cas/protocol/http/util/
test/org/apache/oodt/cas/protocol/http/util/
Author: bfoster
Date: Thu Jun 2 23:26:28 2011
New Revision: 1130871
URL: http://svn.apache.org/viewvc?rev=1130871&view=rev
Log:
- refactoring to better support unit-testing
-----------------
OODT-194
Modified:
oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/HttpProtocol.java
oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/util/HttpUtils.java
oodt/branches/protocol/protocol-http/src/test/org/apache/oodt/cas/protocol/http/util/TestHttpUtils.java
Modified: oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/HttpProtocol.java
URL: http://svn.apache.org/viewvc/oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/HttpProtocol.java?rev=1130871&r1=1130870&r2=1130871&view=diff
==============================================================================
--- oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/HttpProtocol.java (original)
+++ oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/HttpProtocol.java Thu Jun 2 23:26:28 2011
@@ -42,9 +42,11 @@ import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.URI;
import java.net.URL;
+import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
+import java.util.Map;
import java.util.Scanner;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
@@ -67,7 +69,7 @@ public class HttpProtocol implements Pro
static String IGNORE = "ignore";
- static HashMap<String, LinkedList<ProtocolFile>> linkChildren = new HashMap<String, LinkedList<ProtocolFile>>();
+ static Map<String, List<HttpFile>> linkChildren = new HashMap<String, List<HttpFile>>();
static boolean takeAllFiles = true;
@@ -170,7 +172,11 @@ public class HttpProtocol implements Pro
}
public List<ProtocolFile> ls() throws ProtocolException {
- return parseLink(currentFile);
+ List<ProtocolFile> lsResults = new ArrayList<ProtocolFile>();
+ for (HttpFile file : parseLink(currentFile)) {
+ lsResults.add(file);
+ }
+ return lsResults;
}
public ProtocolFile pwd() throws ProtocolException {
@@ -186,9 +192,9 @@ public class HttpProtocol implements Pro
return this.isConnected;
}
- public LinkedList<ProtocolFile> parseLink(HttpFile file)
+ public List<HttpFile> parseLink(HttpFile file)
throws ProtocolException {
- LinkedList<ProtocolFile> children = linkChildren.get(file.getLink()
+ List<HttpFile> children = linkChildren.get(file.getLink()
.toString());
if (file.isDir() && children == null) {
try {
@@ -202,13 +208,8 @@ public class HttpProtocol implements Pro
}
// Find links in URL.
- List<Link> links = HttpUtils.findLinks(conn);
-
- // Convert links to HttpFiles.
- children = new LinkedList<ProtocolFile>();
- for (Link link : links) {
- children.add(HttpUtils.toHttpFile(link, file));
- }
+ children = new LinkedList<HttpFile>();
+ children.addAll(HttpUtils.findLinks(file));
// Save children links found.
linkChildren.put(file.getLink().toString(), children);
@@ -301,12 +302,12 @@ public class HttpProtocol implements Pro
if (st.hasMoreTokens()) {
do {
String token = st.nextToken();
- LinkedList<ProtocolFile> children = this.parseLink(curPath);
- for (ProtocolFile pFile : children) {
+ List<HttpFile> children = this.parseLink(curPath);
+ for (HttpFile pFile : children) {
if (pFile.getName().equals(token)) {
// System.out.println("token " + token + " " +
// pFile);
- curPath = (HttpFile) pFile;
+ curPath = pFile;
continue;
}
}
Modified: oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/util/HttpUtils.java
URL: http://svn.apache.org/viewvc/oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/util/HttpUtils.java?rev=1130871&r1=1130870&r2=1130871&view=diff
==============================================================================
--- oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/util/HttpUtils.java (original)
+++ oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/util/HttpUtils.java Thu Jun 2 23:26:28 2011
@@ -19,12 +19,16 @@ package org.apache.oodt.cas.protocol.htt
//JDK imports
import java.io.ByteArrayInputStream;
import java.io.IOException;
+import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
+import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
//APACHE imports
import org.apache.commons.lang.Validate;
@@ -36,6 +40,7 @@ import org.apache.tika.parser.ParseConte
import org.apache.tika.parser.html.HtmlParser;
import org.apache.tika.sax.Link;
import org.apache.tika.sax.LinkContentHandler;
+import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.SAXException;
/**
@@ -62,7 +67,9 @@ public class HttpUtils {
public static URI resolveUri(URI base, String path) throws URISyntaxException {
Validate.notNull(base, "base URI must not be NULL");
Validate.notNull(path, "resolve path must not be NULL");
- if (path.startsWith("/")) {
+ if (path.startsWith("http://")) {
+ return new URI(path);
+ } else if (path.startsWith("/")) {
return new URI(base.getScheme() + "://" + base.getHost() + path);
} else {
if (base.toString().endsWith("/")) {
@@ -96,13 +103,18 @@ public class HttpUtils {
return sb.toString();
}
- public static List<Link> findLinks(HttpURLConnection conn) throws IOException, SAXException, TikaException {
- LinkContentHandler handler = new LinkContentHandler();
-
- new HtmlParser().parse(new ByteArrayInputStream(HttpUtils.readUrl(conn).getBytes()),
- handler, new Metadata(), new ParseContext());
-
- return handler.getLinks();
+ public static List<HttpFile> findLinks(HttpFile file) throws IOException, URISyntaxException {
+ // Pattern looking for <a href="(group-1)"/>(group-2)</a>
+ Pattern linkPattern = Pattern.compile("<\\s*a\\s+href\\s*=\\s*\"(.+?)\"\\s*>(.+?)<\\s*/\\s*a\\s*>");
+ Matcher matcher = linkPattern.matcher(HttpUtils.readUrl(connect(file.getLink())));
+ List<HttpFile> httpFiles = new ArrayList<HttpFile>();
+ while (matcher.find()) {
+ String link = matcher.group(1);
+ String virtualPath = matcher.group(2);
+ URL url = resolveUri(file.getLink().toURI(), link).toURL();
+ httpFiles.add(new HttpFile(link, isDirectory(url, virtualPath), url, file));
+ }
+ return httpFiles;
}
public static HttpFile toHttpFile(Link link, HttpFile parent) throws IOException {
Modified: oodt/branches/protocol/protocol-http/src/test/org/apache/oodt/cas/protocol/http/util/TestHttpUtils.java
URL: http://svn.apache.org/viewvc/oodt/branches/protocol/protocol-http/src/test/org/apache/oodt/cas/protocol/http/util/TestHttpUtils.java?rev=1130871&r1=1130870&r2=1130871&view=diff
==============================================================================
--- oodt/branches/protocol/protocol-http/src/test/org/apache/oodt/cas/protocol/http/util/TestHttpUtils.java (original)
+++ oodt/branches/protocol/protocol-http/src/test/org/apache/oodt/cas/protocol/http/util/TestHttpUtils.java Thu Jun 2 23:26:28 2011
@@ -23,6 +23,12 @@ import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
+import java.util.List;
+
+import org.apache.oodt.cas.protocol.http.HttpFile;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.sax.Link;
+import org.xml.sax.SAXException;
//JUnits imports
import junit.framework.TestCase;
@@ -34,7 +40,11 @@ import junit.framework.TestCase;
*/
public class TestHttpUtils extends TestCase {
- private static final String URL_OF_THIS_TEST = "http://svn.apache.org/repos/asf/oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/util/TestHttpUtils.java";
+ private static final String APACHE_SVN_SITE = "http://svn.apache.org";
+
+ private static final String PROTOCOL_HTTP_SVN_LOC = "/repos/asf/oodt/branches/protocol/protocol-http";
+ private static final String PARENT_URL_OF_THIS_TEST = PROTOCOL_HTTP_SVN_LOC + "/src/test/org/apache/oodt/cas/protocol/http/util";
+ private static final String URL_OF_THIS_TEST = PARENT_URL_OF_THIS_TEST + "/TestHttpUtils.java";
public void testResolveUri() throws URISyntaxException {
URI baseUri = new URI("http://localhost/base/directory/");
@@ -53,11 +63,33 @@ public class TestHttpUtils extends TestC
}
public void testConnectUrl() throws MalformedURLException, IOException {
- HttpURLConnection conn = HttpUtils.connect(new URL(URL_OF_THIS_TEST));
+ HttpURLConnection conn = HttpUtils.connect(new URL(APACHE_SVN_SITE + URL_OF_THIS_TEST));
assertNotSame(0, conn.getDate());
String urlText = HttpUtils.readUrl(conn);
assertTrue(urlText.contains("public class TestHttpUtils extends TestCase {"));
conn.disconnect();
}
+
+ public void testRedirector() throws MalformedURLException {
+ URL url = new URL("http://localhost:80");
+ URL redirectedURL = new URL("http://localhost:8080");
+ assertFalse(HttpUtils.checkForRedirection(url, url));
+ assertTrue(HttpUtils.checkForRedirection(url, redirectedURL));
+ }
+ public void testFindLinks() throws MalformedURLException, IOException, URISyntaxException {
+ URL url = new URL(APACHE_SVN_SITE + PARENT_URL_OF_THIS_TEST);
+ HttpFile parent = new HttpFile(PARENT_URL_OF_THIS_TEST, true, url, null);
+ HttpURLConnection conn = HttpUtils.connect(url);
+ System.out.println(HttpUtils.readUrl(conn));
+ List<HttpFile> httpFiles = HttpUtils.findLinks(parent);
+ boolean foundThisTest = false;
+ for (HttpFile httpFile : httpFiles) {
+ if (httpFile.getName().equals("TestHttpUtils.java")) {
+ foundThisTest = true;
+ break;
+ }
+ }
+ assertTrue(foundThisTest);
+ }
}