You are viewing a plain text version of this content. The canonical link for it is here.
Posted to droids-commits@incubator.apache.org by rf...@apache.org on 2011/12/05 03:20:05 UTC

svn commit: r1210317 - in /incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika: TikaDocumentParser.java TikaHtmlParser.java

Author: rfrovarp
Date: Mon Dec  5 03:20:04 2011
New Revision: 1210317

URL: http://svn.apache.org/viewvc?rev=1210317&view=rev
Log:
Fix DROIDS-161

Modified:
    incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java
    incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java

Modified: incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java?rev=1210317&r1=1210316&r2=1210317&view=diff
==============================================================================
--- incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java (original)
+++ incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java Mon Dec  5 03:20:04 2011
@@ -96,7 +96,13 @@ public class TikaDocumentParser implemen
       if (task instanceof LinkTask) {
 	      for(org.apache.tika.sax.Link tikaLink : linkHandler.getLinks()) {
 	        try {
-	          extractedTasks.add(new LinkTask((LinkTask)task, new URI(tikaLink.getUri()), depth, tikaLink.getText()));
+	          URI uri = new URI(tikaLink.getUri());
+            // Test to see if the scheme is empty
+            // This would indicate a relative URL, so resolve it against the task URI
+            if(uri.getScheme() == null) {
+              uri = ((Link) task).getURI().resolve(uri);
+            }
+            extractedTasks.add(new LinkTask((Link)task, uri, depth, tikaLink.getText()));
 	        } catch (URISyntaxException e) {
 	          if(log.isWarnEnabled()) {
 	            log.warn("URI not valid: "+ tikaLink.getUri());

Modified: incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java?rev=1210317&r1=1210316&r2=1210317&view=diff
==============================================================================
--- incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java (original)
+++ incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java Mon Dec  5 03:20:04 2011
@@ -99,7 +99,13 @@ public class TikaHtmlParser implements T
 	      int depth = task.getDepth() + 1;
 	      for(org.apache.tika.sax.Link tikaLink : linkHandler.getLinks()) {
 	        try {
-	          extractedTasks.add(new LinkTask((Link)task, new URI(tikaLink.getUri()), depth, tikaLink.getText()));
+	          URI uri = new URI(tikaLink.getUri());
+	          // Test to see if the scheme is empty
+	          // This would indicate a relative URL, so resolve it against the task URI
+	          if(uri.getScheme() == null) {
+	            uri = ((Link) task).getURI().resolve(uri);
+	          }
+	          extractedTasks.add(new LinkTask((Link)task, uri, depth, tikaLink.getText()));
 	        } catch (URISyntaxException e) {
 	          if(log.isWarnEnabled()) {
 	            log.warn("URI not valid: "+ tikaLink.getUri());