You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ae...@apache.org on 2016/02/01 19:40:57 UTC
[38/50] [abbrv] hadoop git commit: MAPREDUCE-6616. Fail to create
jobhistory file if there are some multibyte characters in the job name.
Contributed by Kousuke Saruta.
MAPREDUCE-6616. Fail to create jobhistory file if there are some multibyte characters in the job name. Contributed by Kousuke Saruta.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/df99ea8a
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/df99ea8a
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/df99ea8a
Branch: refs/heads/HDFS-7240
Commit: df99ea8a92d600e669606d41d3887bd004e7a3cc
Parents: 8ee0603
Author: Akira Ajisaka <aa...@apache.org>
Authored: Fri Jan 29 16:19:28 2016 +0900
Committer: Akira Ajisaka <aa...@apache.org>
Committed: Fri Jan 29 16:20:29 2016 +0900
----------------------------------------------------------------------
hadoop-mapreduce-project/CHANGES.txt | 3 +
.../v2/jobhistory/FileNameIndexUtils.java | 171 +++++++++++-----
.../v2/jobhistory/TestFileNameIndexUtils.java | 199 ++++++++++++++++---
3 files changed, 296 insertions(+), 77 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/df99ea8a/hadoop-mapreduce-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index 08cd1d3..8261b34 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -709,6 +709,9 @@ Release 2.8.0 - UNRELEASED
MAPREDUCE-6563. Streaming documentation contains a stray '%' character.
(cnauroth)
+ MAPREDUCE-6616. Fail to create jobhistory file if there are some multibyte
+ characters in the job name. (Kousuke Saruta via aajisaka)
+
Release 2.7.3 - UNRELEASED
INCOMPATIBLE CHANGES
http://git-wip-us.apache.org/repos/asf/hadoop/blob/df99ea8a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/FileNameIndexUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/FileNameIndexUtils.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/FileNameIndexUtils.java
index eb0c54c..284fe80 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/FileNameIndexUtils.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/FileNameIndexUtils.java
@@ -22,6 +22,7 @@ import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.net.URLEncoder;
+import static java.nio.charset.StandardCharsets.UTF_8;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -35,7 +36,7 @@ public class FileNameIndexUtils {
// Sanitize job history file for predictable parsing
static final String DELIMITER = "-";
static final String DELIMITER_ESCAPE = "%2D";
-
+
private static final Log LOG = LogFactory.getLog(FileNameIndexUtils.class);
// Job history file names need to be backwards compatible
@@ -57,7 +58,8 @@ public class FileNameIndexUtils {
* @param indexInfo the index info.
* @return the done job history filename.
*/
- public static String getDoneFileName(JobIndexInfo indexInfo) throws IOException {
+ public static String getDoneFileName(JobIndexInfo indexInfo)
+ throws IOException {
return getDoneFileName(indexInfo,
JHAdminConfig.DEFAULT_MR_HS_JOBNAME_LIMIT);
}
@@ -66,49 +68,58 @@ public class FileNameIndexUtils {
int jobNameLimit) throws IOException {
StringBuilder sb = new StringBuilder();
//JobId
- sb.append(escapeDelimiters(TypeConverter.fromYarn(indexInfo.getJobId()).toString()));
+ sb.append(encodeJobHistoryFileName(escapeDelimiters(
+ TypeConverter.fromYarn(indexInfo.getJobId()).toString())));
sb.append(DELIMITER);
-
+
//SubmitTime
- sb.append(indexInfo.getSubmitTime());
+ sb.append(encodeJobHistoryFileName(String.valueOf(
+ indexInfo.getSubmitTime())));
sb.append(DELIMITER);
-
+
//UserName
- sb.append(escapeDelimiters(getUserName(indexInfo)));
+ sb.append(encodeJobHistoryFileName(escapeDelimiters(
+ getUserName(indexInfo))));
sb.append(DELIMITER);
-
+
//JobName
- sb.append(escapeDelimiters(trimJobName(
- getJobName(indexInfo), jobNameLimit)));
+ sb.append(trimURLEncodedString(encodeJobHistoryFileName(escapeDelimiters(
+ getJobName(indexInfo))), jobNameLimit));
sb.append(DELIMITER);
-
+
//FinishTime
- sb.append(indexInfo.getFinishTime());
+ sb.append(encodeJobHistoryFileName(
+ String.valueOf(indexInfo.getFinishTime())));
sb.append(DELIMITER);
-
+
//NumMaps
- sb.append(indexInfo.getNumMaps());
+ sb.append(encodeJobHistoryFileName(
+ String.valueOf(indexInfo.getNumMaps())));
sb.append(DELIMITER);
-
+
//NumReduces
- sb.append(indexInfo.getNumReduces());
+ sb.append(encodeJobHistoryFileName(
+ String.valueOf(indexInfo.getNumReduces())));
sb.append(DELIMITER);
-
+
//JobStatus
- sb.append(indexInfo.getJobStatus());
+ sb.append(encodeJobHistoryFileName(indexInfo.getJobStatus()));
sb.append(DELIMITER);
-
+
//QueueName
- sb.append(escapeDelimiters(getQueueName(indexInfo)));
+ sb.append(escapeDelimiters(encodeJobHistoryFileName(
+ getQueueName(indexInfo))));
sb.append(DELIMITER);
//JobStartTime
- sb.append(indexInfo.getJobStartTime());
+ sb.append(encodeJobHistoryFileName(
+ String.valueOf(indexInfo.getJobStartTime())));
- sb.append(JobHistoryUtils.JOB_HISTORY_FILE_EXTENSION);
- return encodeJobHistoryFileName(sb.toString());
+ sb.append(encodeJobHistoryFileName(
+ JobHistoryUtils.JOB_HISTORY_FILE_EXTENSION));
+ return sb.toString();
}
-
+
/**
* Parses the provided job history file name to construct a
* JobIndexInfo object which is returned.
@@ -116,21 +127,24 @@ public class FileNameIndexUtils {
* @param jhFileName the job history filename.
* @return a JobIndexInfo object built from the filename.
*/
- public static JobIndexInfo getIndexInfo(String jhFileName) throws IOException {
- String fileName = jhFileName.substring(0, jhFileName.indexOf(JobHistoryUtils.JOB_HISTORY_FILE_EXTENSION));
+ public static JobIndexInfo getIndexInfo(String jhFileName)
+ throws IOException {
+ String fileName = jhFileName.substring(0,
+ jhFileName.indexOf(JobHistoryUtils.JOB_HISTORY_FILE_EXTENSION));
JobIndexInfo indexInfo = new JobIndexInfo();
-
+
String[] jobDetails = fileName.split(DELIMITER);
-
- JobID oldJobId = JobID.forName(decodeJobHistoryFileName(jobDetails[JOB_ID_INDEX]));
+
+ JobID oldJobId =
+ JobID.forName(decodeJobHistoryFileName(jobDetails[JOB_ID_INDEX]));
JobId jobId = TypeConverter.toYarn(oldJobId);
indexInfo.setJobId(jobId);
// Do not fail if there are some minor parse errors
try {
try {
- indexInfo.setSubmitTime(
- Long.parseLong(decodeJobHistoryFileName(jobDetails[SUBMIT_TIME_INDEX])));
+ indexInfo.setSubmitTime(Long.parseLong(
+ decodeJobHistoryFileName(jobDetails[SUBMIT_TIME_INDEX])));
} catch (NumberFormatException e) {
LOG.warn("Unable to parse submit time from job history file "
+ jhFileName + " : " + e);
@@ -143,24 +157,24 @@ public class FileNameIndexUtils {
decodeJobHistoryFileName(jobDetails[JOB_NAME_INDEX]));
try {
- indexInfo.setFinishTime(
- Long.parseLong(decodeJobHistoryFileName(jobDetails[FINISH_TIME_INDEX])));
+ indexInfo.setFinishTime(Long.parseLong(
+ decodeJobHistoryFileName(jobDetails[FINISH_TIME_INDEX])));
} catch (NumberFormatException e) {
LOG.warn("Unable to parse finish time from job history file "
+ jhFileName + " : " + e);
}
try {
- indexInfo.setNumMaps(
- Integer.parseInt(decodeJobHistoryFileName(jobDetails[NUM_MAPS_INDEX])));
+ indexInfo.setNumMaps(Integer.parseInt(
+ decodeJobHistoryFileName(jobDetails[NUM_MAPS_INDEX])));
} catch (NumberFormatException e) {
LOG.warn("Unable to parse num maps from job history file "
+ jhFileName + " : " + e);
}
try {
- indexInfo.setNumReduces(
- Integer.parseInt(decodeJobHistoryFileName(jobDetails[NUM_REDUCES_INDEX])));
+ indexInfo.setNumReduces(Integer.parseInt(
+ decodeJobHistoryFileName(jobDetails[NUM_REDUCES_INDEX])));
} catch (NumberFormatException e) {
LOG.warn("Unable to parse num reduces from job history file "
+ jhFileName + " : " + e);
@@ -176,8 +190,8 @@ public class FileNameIndexUtils {
if (jobDetails.length <= JOB_START_TIME_INDEX) {
indexInfo.setJobStartTime(indexInfo.getSubmitTime());
} else {
- indexInfo.setJobStartTime(
- Long.parseLong(decodeJobHistoryFileName(jobDetails[JOB_START_TIME_INDEX])));
+ indexInfo.setJobStartTime(Long.parseLong(
+ decodeJobHistoryFileName(jobDetails[JOB_START_TIME_INDEX])));
}
} catch (NumberFormatException e){
LOG.warn("Unable to parse start time from job history file "
@@ -187,13 +201,13 @@ public class FileNameIndexUtils {
LOG.warn("Parsing job history file with partial data encoded into name: "
+ jhFileName);
}
-
+
return indexInfo;
}
/**
- * Helper function to encode the URL of the filename of the job-history
+ * Helper function to encode the URL of the filename of the job-history
* log file.
*
* @param logFileName file name of the job-history file
@@ -208,7 +222,8 @@ public class FileNameIndexUtils {
if (logFileName.contains(DELIMITER_ESCAPE)) {
replacementDelimiterEscape = nonOccursString(logFileName);
- logFileName = logFileName.replaceAll(DELIMITER_ESCAPE, replacementDelimiterEscape);
+ logFileName = logFileName.replaceAll(
+ DELIMITER_ESCAPE, replacementDelimiterEscape);
}
String encodedFileName = null;
@@ -223,14 +238,15 @@ public class FileNameIndexUtils {
// Restore protected escape delimiters after encoding
if (replacementDelimiterEscape != null) {
- encodedFileName = encodedFileName.replaceAll(replacementDelimiterEscape, DELIMITER_ESCAPE);
+ encodedFileName = encodedFileName.replaceAll(
+ replacementDelimiterEscape, DELIMITER_ESCAPE);
}
return encodedFileName;
}
-
+
/**
- * Helper function to decode the URL of the filename of the job-history
+ * Helper function to decode the URL of the filename of the job-history
* log file.
*
* @param logFileName file name of the job-history file
@@ -250,7 +266,7 @@ public class FileNameIndexUtils {
}
return decodedFileName;
}
-
+
static String nonOccursString(String logFileName) {
int adHocIndex = 0;
@@ -262,11 +278,11 @@ public class FileNameIndexUtils {
return unfoundString + "q";
}
-
+
private static String getUserName(JobIndexInfo indexInfo) {
return getNonEmptyString(indexInfo.getUser());
}
-
+
private static String getJobName(JobIndexInfo indexInfo) {
return getNonEmptyString(indexInfo.getJobName());
}
@@ -283,18 +299,65 @@ public class FileNameIndexUtils {
}
return in;
}
-
+
private static String escapeDelimiters(String escapee) {
return escapee.replaceAll(DELIMITER, DELIMITER_ESCAPE);
}
/**
- * Trims the job-name if required
+ * Trims the url-encoded string if required
*/
- private static String trimJobName(String jobName, int jobNameLimit) {
- if (jobName.length() > jobNameLimit) {
- jobName = jobName.substring(0, jobNameLimit);
+ private static String trimURLEncodedString(
+ String encodedString, int limitLength) {
+ assert(limitLength >= 0) : "limitLength should be positive integer";
+
+ if (encodedString.length() < limitLength) {
+ return encodedString;
+ }
+
+ int index = 0;
+ int increase = 0;
+ byte[] strBytes = encodedString.getBytes(UTF_8);
+
+ // calculate effective character length based on UTF-8 specification.
+ // The size of a character coded in UTF-8 should be 4-byte at most.
+ // See RFC3629
+ while (true) {
+ byte b = strBytes[index];
+ if (b == '%') {
+ byte minuend1 = strBytes[index + 1];
+ byte subtrahend1 = (byte)(Character.isDigit(
+ minuend1) ? '0' : 'A' - 10);
+ byte minuend2 = strBytes[index + 2];
+ byte subtrahend2 = (byte)(Character.isDigit(
+ minuend2) ? '0' : 'A' - 10);
+ int initialHex =
+ ((Character.toUpperCase(minuend1) - subtrahend1) << 4) +
+ (Character.toUpperCase(minuend2) - subtrahend2);
+
+ if (0x00 <= initialHex && initialHex <= 0x7F) {
+ // For 1-byte UTF-8 characters
+ increase = 3;
+ } else if (0xC2 <= initialHex && initialHex <= 0xDF) {
+ // For 2-byte UTF-8 characters
+ increase = 6;
+ } else if (0xE0 <= initialHex && initialHex <= 0xEF) {
+ // For 3-byte UTF-8 characters
+ increase = 9;
+ } else {
+ // For 4-byte UTF-8 characters
+ increase = 12;
+ }
+ } else {
+ increase = 1;
+ }
+ if (index + increase > limitLength) {
+ break;
+ } else {
+ index += increase;
+ }
}
- return jobName;
+
+ return encodedString.substring(0, index);
}
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/df99ea8a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/jobhistory/TestFileNameIndexUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/jobhistory/TestFileNameIndexUtils.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/jobhistory/TestFileNameIndexUtils.java
index 3d722e0..6fa7dbc 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/jobhistory/TestFileNameIndexUtils.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/jobhistory/TestFileNameIndexUtils.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.mapreduce.v2.jobhistory;
import java.io.IOException;
+import static java.nio.charset.StandardCharsets.UTF_8;
import org.apache.hadoop.mapreduce.JobID;
import org.apache.hadoop.mapreduce.TypeConverter;
@@ -30,14 +31,14 @@ import org.junit.Test;
public class TestFileNameIndexUtils {
private static final String OLD_JOB_HISTORY_FILE_FORMATTER = "%s"
- + FileNameIndexUtils.DELIMITER + "%s"
- + FileNameIndexUtils.DELIMITER + "%s"
- + FileNameIndexUtils.DELIMITER + "%s"
- + FileNameIndexUtils.DELIMITER + "%s"
- + FileNameIndexUtils.DELIMITER + "%s"
- + FileNameIndexUtils.DELIMITER + "%s"
- + FileNameIndexUtils.DELIMITER + "%s"
- + JobHistoryUtils.JOB_HISTORY_FILE_EXTENSION;
+ + FileNameIndexUtils.DELIMITER + "%s"
+ + FileNameIndexUtils.DELIMITER + "%s"
+ + FileNameIndexUtils.DELIMITER + "%s"
+ + FileNameIndexUtils.DELIMITER + "%s"
+ + FileNameIndexUtils.DELIMITER + "%s"
+ + FileNameIndexUtils.DELIMITER + "%s"
+ + FileNameIndexUtils.DELIMITER + "%s"
+ + JobHistoryUtils.JOB_HISTORY_FILE_EXTENSION;
private static final String OLD_FORMAT_BEFORE_ADD_START_TIME = "%s"
+ FileNameIndexUtils.DELIMITER + "%s"
@@ -51,29 +52,29 @@ public class TestFileNameIndexUtils {
+ JobHistoryUtils.JOB_HISTORY_FILE_EXTENSION;
private static final String JOB_HISTORY_FILE_FORMATTER = "%s"
- + FileNameIndexUtils.DELIMITER + "%s"
- + FileNameIndexUtils.DELIMITER + "%s"
- + FileNameIndexUtils.DELIMITER + "%s"
- + FileNameIndexUtils.DELIMITER + "%s"
- + FileNameIndexUtils.DELIMITER + "%s"
- + FileNameIndexUtils.DELIMITER + "%s"
- + FileNameIndexUtils.DELIMITER + "%s"
- + FileNameIndexUtils.DELIMITER + "%s"
- + FileNameIndexUtils.DELIMITER + "%s"
- + JobHistoryUtils.JOB_HISTORY_FILE_EXTENSION;
+ + FileNameIndexUtils.DELIMITER + "%s"
+ + FileNameIndexUtils.DELIMITER + "%s"
+ + FileNameIndexUtils.DELIMITER + "%s"
+ + FileNameIndexUtils.DELIMITER + "%s"
+ + FileNameIndexUtils.DELIMITER + "%s"
+ + FileNameIndexUtils.DELIMITER + "%s"
+ + FileNameIndexUtils.DELIMITER + "%s"
+ + FileNameIndexUtils.DELIMITER + "%s"
+ + FileNameIndexUtils.DELIMITER + "%s"
+ + JobHistoryUtils.JOB_HISTORY_FILE_EXTENSION;
private static final String JOB_ID = "job_1317928501754_0001";
private static final String SUBMIT_TIME = "1317928742025";
private static final String USER_NAME = "username";
private static final String USER_NAME_WITH_DELIMITER = "user"
- + FileNameIndexUtils.DELIMITER + "name";
+ + FileNameIndexUtils.DELIMITER + "name";
private static final String USER_NAME_WITH_DELIMITER_ESCAPE = "user"
- + FileNameIndexUtils.DELIMITER_ESCAPE + "name";
+ + FileNameIndexUtils.DELIMITER_ESCAPE + "name";
private static final String JOB_NAME = "mapreduce";
private static final String JOB_NAME_WITH_DELIMITER = "map"
- + FileNameIndexUtils.DELIMITER + "reduce";
+ + FileNameIndexUtils.DELIMITER + "reduce";
private static final String JOB_NAME_WITH_DELIMITER_ESCAPE = "map"
- + FileNameIndexUtils.DELIMITER_ESCAPE + "reduce";
+ + FileNameIndexUtils.DELIMITER_ESCAPE + "reduce";
private static final String FINISH_TIME = "1317928754958";
private static final String NUM_MAPS = "1";
private static final String NUM_REDUCES = "1";
@@ -123,7 +124,7 @@ public class TestFileNameIndexUtils {
Assert.assertEquals("Queue name different after encoding and decoding",
info.getQueueName(), parsedInfo.getQueueName());
Assert.assertEquals("Job start time different after encoding and decoding",
- info.getJobStartTime(), parsedInfo.getJobStartTime());
+ info.getJobStartTime(), parsedInfo.getJobStartTime());
}
@Test
@@ -173,6 +174,158 @@ public class TestFileNameIndexUtils {
parsedInfo.getJobName());
}
+ /**
+ * Verify the name of jobhistory file is not greater than 255 bytes
+ * even if there are some multibyte characters in the job name.
+ */
+ @Test
+ public void testJobNameWithMultibyteChars() throws IOException {
+ JobIndexInfo info = new JobIndexInfo();
+ JobID oldJobId = JobID.forName(JOB_ID);
+ JobId jobId = TypeConverter.toYarn(oldJobId);
+ info.setJobId(jobId);
+ info.setSubmitTime(Long.parseLong(SUBMIT_TIME));
+ info.setUser(USER_NAME);
+
+ StringBuilder sb = new StringBuilder();
+ info.setFinishTime(Long.parseLong(FINISH_TIME));
+ info.setNumMaps(Integer.parseInt(NUM_MAPS));
+ info.setNumReduces(Integer.parseInt(NUM_REDUCES));
+ info.setJobStatus(JOB_STATUS);
+ info.setQueueName(QUEUE_NAME);
+ info.setJobStartTime(Long.parseLong(JOB_START_TIME));
+
+ // Test for 1 byte UTF-8 character
+ // which is encoded into 1 x 3 = 3 characters by URL encode.
+ for (int i = 0; i < 100; i++) {
+ sb.append('%');
+ }
+ String longJobName = sb.toString();
+ info.setJobName(longJobName);
+
+ String jobHistoryFile =
+ FileNameIndexUtils.getDoneFileName(info, 50);
+
+ Assert.assertTrue(jobHistoryFile.length() <= 255);
+ String trimedJobName = jobHistoryFile.split(
+ FileNameIndexUtils.DELIMITER)[3]; // 3 is index of job name
+
+ // 3 x 16 < 50 < 3 x 17 so the length of trimedJobName should be 48
+ Assert.assertEquals(48, trimedJobName.getBytes(UTF_8).length);
+
+ // validate whether trimmedJobName by testing reversibility
+ byte[] trimedJobNameInByte = trimedJobName.getBytes(UTF_8);
+ String reEncodedTrimedJobName = new String(trimedJobNameInByte, UTF_8);
+ Assert.assertArrayEquals(trimedJobNameInByte,
+ reEncodedTrimedJobName.getBytes(UTF_8));
+ sb.setLength(0);
+
+ // Test for 2 bytes UTF-8 character
+ // which is encoded into 2 x 3 = 6 characters by URL encode.
+ for (int i = 0; i < 100; i++) {
+ sb.append('\u03A9'); // large omega
+ }
+ longJobName = sb.toString();
+ info.setJobName(longJobName);
+
+ jobHistoryFile =
+ FileNameIndexUtils.getDoneFileName(info, 27);
+
+ Assert.assertTrue(jobHistoryFile.length() <= 255);
+ trimedJobName = jobHistoryFile.split(
+ FileNameIndexUtils.DELIMITER)[3]; // 3 is index of job name
+
+ // 6 x 4 < 27 < 6 x 5 so the length of trimedJobName should be 24
+ Assert.assertEquals(24, trimedJobName.getBytes(UTF_8).length);
+
+ // validate whether trimmedJobName by testing reversibility
+ trimedJobNameInByte = trimedJobName.getBytes(UTF_8);
+ reEncodedTrimedJobName = new String(trimedJobNameInByte, UTF_8);
+ Assert.assertArrayEquals(trimedJobNameInByte,
+ reEncodedTrimedJobName.getBytes(UTF_8));
+ sb.setLength(0);
+
+ // Test for 3 bytes UTF-8 character
+ // which is encoded into 3 x 3 = 9 characters by URL encode.
+ for (int i = 0; i < 100; i++) {
+ sb.append('\u2192'); // rightwards arrow
+ }
+ longJobName = sb.toString();
+ info.setJobName(longJobName);
+
+ jobHistoryFile =
+ FileNameIndexUtils.getDoneFileName(info, 40);
+
+ Assert.assertTrue(jobHistoryFile.length() <= 255);
+ trimedJobName = jobHistoryFile.split(
+ FileNameIndexUtils.DELIMITER)[3]; // 3 is index of job name
+
+ // 9 x 4 < 40 < 9 x 5 so the length of trimedJobName should be 36
+ Assert.assertEquals(36, trimedJobName.getBytes(UTF_8).length);
+
+ // validate whether trimmedJobName by testing reversibility
+ trimedJobNameInByte = trimedJobName.getBytes(UTF_8);
+ reEncodedTrimedJobName = new String(trimedJobNameInByte, UTF_8);
+ Assert.assertArrayEquals(trimedJobNameInByte,
+ reEncodedTrimedJobName.getBytes(UTF_8));
+ sb.setLength(0);
+
+ // Test for 4 bytes UTF-8 character
+ // which is encoded into 4 x 3 = 12 characters by URL encode.
+ for (int i = 0; i < 100; i++) {
+ sb.append("\uD867\uDE3D"); // Mugil cephalus in Kanji.
+ }
+ longJobName = sb.toString();
+ info.setJobName(longJobName);
+
+ jobHistoryFile =
+ FileNameIndexUtils.getDoneFileName(info, 49);
+
+ Assert.assertTrue(jobHistoryFile.length() <= 255);
+ trimedJobName = jobHistoryFile.split(
+ FileNameIndexUtils.DELIMITER)[3]; // 3 is index of job name
+
+ // 12 x 4 < 49 < 12 x 5 so the length of trimedJobName should be 48
+ Assert.assertEquals(48, trimedJobName.getBytes(UTF_8).length);
+
+ // validate whether trimmedJobName by testing reversibility
+ trimedJobNameInByte = trimedJobName.getBytes(UTF_8);
+ reEncodedTrimedJobName = new String(trimedJobNameInByte, UTF_8);
+ Assert.assertArrayEquals(trimedJobNameInByte,
+ reEncodedTrimedJobName.getBytes(UTF_8));
+ sb.setLength(0);
+
+ // Test for the combination of 1 to 4 bytes UTF-8 characters
+ sb.append('\u732B') // cat in Kanji (encoded into 3 bytes x 3 characters)
+ .append("[") // (encoded into 1 byte x 3 characters)
+ .append('\u03BB') // small lambda (encoded into 2 bytes x 3 characters)
+ .append('/') // (encoded into 1 byte x 3 characters)
+ .append('A') // not url-encoded (1 byte x 1 character)
+ .append("\ud867\ude49") // flying fish in
+ // Kanji (encoded into 4 bytes x 3 characters)
+ .append('\u72AC'); // dog in Kanji (encoded into 3 bytes x 3 characters)
+
+ longJobName = sb.toString();
+ info.setJobName(longJobName);
+
+ jobHistoryFile =
+ FileNameIndexUtils.getDoneFileName(info, 23);
+
+ Assert.assertTrue(jobHistoryFile.length() <= 255);
+ trimedJobName = jobHistoryFile.split(
+ FileNameIndexUtils.DELIMITER)[3]; // 3 is index of job name
+
+ // total size of the first 5 characters = 22
+ // 23 < total size of the first 6 characters
+ Assert.assertEquals(22, trimedJobName.getBytes(UTF_8).length);
+
+ // validate whether trimmedJobName by testing reversibility
+ trimedJobNameInByte = trimedJobName.getBytes(UTF_8);
+ reEncodedTrimedJobName = new String(trimedJobNameInByte, UTF_8);
+ Assert.assertArrayEquals(trimedJobNameInByte,
+ reEncodedTrimedJobName.getBytes(UTF_8));
+ }
+
@Test
public void testUserNamePercentDecoding() throws IOException {
String jobHistoryFile = String.format(JOB_HISTORY_FILE_FORMATTER,