You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by sn...@apache.org on 2012/11/12 22:20:56 UTC
svn commit: r1408465 - in /nutch/branches/2.x: CHANGES.txt
src/java/org/apache/nutch/util/TableUtil.java
src/test/org/apache/nutch/util/TestTableUtil.java
Author: snagel
Date: Mon Nov 12 21:20:55 2012
New Revision: 1408465
URL: http://svn.apache.org/viewvc?rev=1408465&view=rev
Log:
NUTCH-1484 TableUtil unreverseURL fails on file:// URLs
Modified:
nutch/branches/2.x/CHANGES.txt
nutch/branches/2.x/src/java/org/apache/nutch/util/TableUtil.java
nutch/branches/2.x/src/test/org/apache/nutch/util/TestTableUtil.java
Modified: nutch/branches/2.x/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1408465&r1=1408464&r2=1408465&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Mon Nov 12 21:20:55 2012
@@ -2,6 +2,8 @@ Nutch Change Log
Release 2.2 - Current Development
+* NUTCH-1484 TableUtil unreverseURL fails on file:// URLs (Rogério Pereira Araújo via snagel)
+
* NUTCH-1451 Upgrade automaton jar to 1.11-8 (lewismc)
* NUTCH-1496 ParserJob logs skipped urls with level info (Nathan Gass via lewismc)
Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/TableUtil.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/TableUtil.java?rev=1408465&r1=1408464&r2=1408465&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/TableUtil.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/TableUtil.java Mon Nov 12 21:20:55 2012
@@ -93,8 +93,8 @@ public class TableUtil {
pathBegin = reversedUrl.length();
String sub = reversedUrl.substring(0, pathBegin);
- String[] splits = StringUtils.split(sub, ':'); // {<reversed host>, <port>, <protocol>}
-
+ String[] splits = StringUtils.splitPreserveAllTokens(sub, ':'); // {<reversed host>, <port>, <protocol>}
+
buf.append(splits[1]); // add protocol
buf.append("://");
reverseAppendSplits(splits[0], buf); // splits[0] is reversed
Modified: nutch/branches/2.x/src/test/org/apache/nutch/util/TestTableUtil.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/util/TestTableUtil.java?rev=1408465&r1=1408464&r2=1408465&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/util/TestTableUtil.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/util/TestTableUtil.java Mon Nov 12 21:20:55 2012
@@ -28,6 +28,7 @@ public class TestTableUtil extends TestC
String urlString5 = "http://foo.com?a=/a/b&c=0";
String urlString5rev = "http://foo.com/?a=/a/b&c=0";
String urlString6 = "http://foo.com";
+ String urlString7 = "file:///var/www/index.html";
String reversedUrlString1 = "com.foo:http/";
String reversedUrlString2 = "com.foo:http:8900/";
@@ -35,6 +36,7 @@ public class TestTableUtil extends TestC
String reversedUrlString4 = "com.baz.bar:http:8983/to/index.html?a=b&c=d";
String reversedUrlString5 = "com.foo:http/?a=/a/b&c=0";
String reversedUrlString6 = "com.foo:http";
+ String reversedUrlString7 = ":file/var/www/index.html";
public void testReverseUrl() throws Exception {
assertReverse(urlString1, reversedUrlString1);
@@ -44,6 +46,7 @@ public class TestTableUtil extends TestC
assertReverse(urlString5, reversedUrlString5);
assertReverse(urlString5, reversedUrlString5);
assertReverse(urlString6, reversedUrlString6);
+ assertReverse(urlString7, reversedUrlString7);
}
public void testUnreverseUrl() throws Exception {
@@ -53,6 +56,7 @@ public class TestTableUtil extends TestC
assertUnreverse(reversedUrlString4, urlString4);
assertUnreverse(reversedUrlString5, urlString5rev);
assertUnreverse(reversedUrlString6, urlString6);
+ assertUnreverse(reversedUrlString7, urlString7);
}
private static void assertReverse(String url, String expectedReversedUrl) throws Exception {