You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ma...@apache.org on 2015/03/19 05:23:06 UTC
svn commit: r1667651 - in /nutch/trunk: CHANGES.txt
src/java/org/apache/nutch/util/DumpFileUtil.java
src/test/org/apache/nutch/util/DumpFileUtilTest.java
Author: mattmann
Date: Thu Mar 19 04:23:06 2015
New Revision: 1667651
URL: http://svn.apache.org/r1667651
Log:
Fix for NUTCH-1968 File Name too long issue of DumpFileUtil.java file contributed by Renxia Wang this closes #14.
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/util/DumpFileUtil.java
nutch/trunk/src/test/org/apache/nutch/util/DumpFileUtilTest.java
Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1667651&r1=1667650&r2=1667651&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Thu Mar 19 04:23:06 2015
@@ -2,6 +2,8 @@ Nutch Change Log
Nutch Current Development 1.10-SNAPSHOT
+* NUTCH-1968 File Name too long issue of DumpFileUtil.java file (Xin Zhang, Renxia Wang via mattmann)
+
* NUTCH-1966 Configuration endpoint for 1x REST API (Sujen Shah via mattmann)
* NUTCH-1967 Possible SIooBE in MimeAdaptiveFetchSchedule (markus)
Modified: nutch/trunk/src/java/org/apache/nutch/util/DumpFileUtil.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/util/DumpFileUtil.java?rev=1667651&r1=1667650&r2=1667651&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/util/DumpFileUtil.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/util/DumpFileUtil.java Thu Mar 19 04:23:06 2015
@@ -33,7 +33,8 @@ public class DumpFileUtil {
private final static String DIR_PATTERN = "%s/%s/%s";
private final static String FILENAME_PATTERN = "%s_%s.%s";
private final static Integer MAX_LENGTH_OF_FILENAME = 32;
-
+ private final static Integer MAX_LENGTH_OF_EXTENSION = 5;
+
public static String getUrlMD5(String url) {
byte[] digest = MD5Hash.digest(url).getDigest();
@@ -64,9 +65,14 @@ public class DumpFileUtil {
public static String createFileName(String md5, String fileBaseName, String fileExtension) {
if (fileBaseName.length() > MAX_LENGTH_OF_FILENAME) {
LOG.info("File name is too long. Truncated to {} characters.", MAX_LENGTH_OF_FILENAME);
- return String.format(FILENAME_PATTERN, md5, StringUtils.substring(fileBaseName, 0, MAX_LENGTH_OF_FILENAME), fileExtension);
- } else {
- return String.format(FILENAME_PATTERN, md5, fileBaseName, fileExtension);
+ fileBaseName = StringUtils.substring(fileBaseName, 0, MAX_LENGTH_OF_FILENAME);
+ }
+
+ if (fileExtension.length() > MAX_LENGTH_OF_EXTENSION) {
+ LOG.info("File extension is too long. Truncated to {} characters.", MAX_LENGTH_OF_EXTENSION);
+ fileExtension = StringUtils.substring(fileExtension, 0, MAX_LENGTH_OF_EXTENSION);
}
+
+ return String.format(FILENAME_PATTERN, md5, fileBaseName, fileExtension);
}
}
Modified: nutch/trunk/src/test/org/apache/nutch/util/DumpFileUtilTest.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/util/DumpFileUtilTest.java?rev=1667651&r1=1667650&r2=1667651&view=diff
==============================================================================
--- nutch/trunk/src/test/org/apache/nutch/util/DumpFileUtilTest.java (original)
+++ nutch/trunk/src/test/org/apache/nutch/util/DumpFileUtilTest.java Thu Mar 19 04:23:06 2015
@@ -59,5 +59,10 @@ public class DumpFileUtilTest {
String fullDir2 = DumpFileUtil.createFileName(DumpFileUtil.getUrlMD5(testUrl), tooLongBaseName, extension);
assertEquals("991e599262e04ea2ec76b6c5aed499a7_testtesttesttesttesttesttesttest.html", fullDir2);
+
+ String tooLongExtension = "testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest";
+ String fullDir3 = DumpFileUtil.createFileName(DumpFileUtil.getUrlMD5(testUrl), baseName, tooLongExtension);
+
+ assertEquals("991e599262e04ea2ec76b6c5aed499a7_test.testt", fullDir3);
}
}