You are viewing a plain text version of this content. The canonical link for it is here.
Posted to droids-commits@incubator.apache.org by rf...@apache.org on 2011/12/05 03:20:05 UTC
svn commit: r1210317 - in
/incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika:
TikaDocumentParser.java TikaHtmlParser.java
Author: rfrovarp
Date: Mon Dec 5 03:20:04 2011
New Revision: 1210317
URL: http://svn.apache.org/viewvc?rev=1210317&view=rev
Log:
Fix DROIDS-161
Modified:
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java
Modified: incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java?rev=1210317&r1=1210316&r2=1210317&view=diff
==============================================================================
--- incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java (original)
+++ incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java Mon Dec 5 03:20:04 2011
@@ -96,7 +96,13 @@ public class TikaDocumentParser implemen
if (task instanceof LinkTask) {
for(org.apache.tika.sax.Link tikaLink : linkHandler.getLinks()) {
try {
- extractedTasks.add(new LinkTask((LinkTask)task, new URI(tikaLink.getUri()), depth, tikaLink.getText()));
+ URI uri = new URI(tikaLink.getUri());
+ // Test to see if the scheme is empty
+ // This would indicate a relative URL, so resolve it against the task URI
+ if(uri.getScheme() == null) {
+ uri = ((Link) task).getURI().resolve(uri);
+ }
+ extractedTasks.add(new LinkTask((Link)task, uri, depth, tikaLink.getText()));
} catch (URISyntaxException e) {
if(log.isWarnEnabled()) {
log.warn("URI not valid: "+ tikaLink.getUri());
Modified: incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java?rev=1210317&r1=1210316&r2=1210317&view=diff
==============================================================================
--- incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java (original)
+++ incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java Mon Dec 5 03:20:04 2011
@@ -99,7 +99,13 @@ public class TikaHtmlParser implements T
int depth = task.getDepth() + 1;
for(org.apache.tika.sax.Link tikaLink : linkHandler.getLinks()) {
try {
- extractedTasks.add(new LinkTask((Link)task, new URI(tikaLink.getUri()), depth, tikaLink.getText()));
+ URI uri = new URI(tikaLink.getUri());
+ // Test to see if the scheme is empty
+ // This would indicate a relative URL, so resolve it against the task URI
+ if(uri.getScheme() == null) {
+ uri = ((Link) task).getURI().resolve(uri);
+ }
+ extractedTasks.add(new LinkTask((Link)task, uri, depth, tikaLink.getText()));
} catch (URISyntaxException e) {
if(log.isWarnEnabled()) {
log.warn("URI not valid: "+ tikaLink.getUri());