You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@flink.apache.org by rm...@apache.org on 2014/06/26 11:47:11 UTC
[46/53] [abbrv] git commit: Change string construction to work around
a known JVM performance bug still present in some older JVM versions.
Change string construction to work around a known JVM performance bug still present in some older JVM versions.
Project: http://git-wip-us.apache.org/repos/asf/incubator-flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-flink/commit/ef623e9b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-flink/tree/ef623e9b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-flink/diff/ef623e9b
Branch: refs/heads/travis_test
Commit: ef623e9b883dea3f57d0fb33c3af3a804dc041e6
Parents: 8431395
Author: Stephan Ewen <se...@apache.org>
Authored: Wed Jun 25 16:12:33 2014 +0200
Committer: Stephan Ewen <se...@apache.org>
Committed: Wed Jun 25 16:14:18 2014 +0200
----------------------------------------------------------------------
.../api/common/io/DelimitedInputFormat.java | 2 +-
.../types/parser/AsciiStringParser.java | 9 ++----
.../api/java/io/TextInputFormat.java | 29 +++++++-------------
3 files changed, 13 insertions(+), 27 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/ef623e9b/stratosphere-core/src/main/java/eu/stratosphere/api/common/io/DelimitedInputFormat.java
----------------------------------------------------------------------
diff --git a/stratosphere-core/src/main/java/eu/stratosphere/api/common/io/DelimitedInputFormat.java b/stratosphere-core/src/main/java/eu/stratosphere/api/common/io/DelimitedInputFormat.java
index 88eee94..185f7f8 100644
--- a/stratosphere-core/src/main/java/eu/stratosphere/api/common/io/DelimitedInputFormat.java
+++ b/stratosphere-core/src/main/java/eu/stratosphere/api/common/io/DelimitedInputFormat.java
@@ -258,7 +258,7 @@ public abstract class DelimitedInputFormat<OT> extends FileInputFormat<OT> {
*
* @return returns whether the record was successfully deserialized or not.
*/
- public abstract OT readRecord(OT reuse, byte[] bytes, int offset, int numBytes);
+ public abstract OT readRecord(OT reuse, byte[] bytes, int offset, int numBytes) throws IOException;
// --------------------------------------------------------------------------------------------
// Pre-flight: Configuration, Splits, Sampling
http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/ef623e9b/stratosphere-core/src/main/java/eu/stratosphere/types/parser/AsciiStringParser.java
----------------------------------------------------------------------
diff --git a/stratosphere-core/src/main/java/eu/stratosphere/types/parser/AsciiStringParser.java b/stratosphere-core/src/main/java/eu/stratosphere/types/parser/AsciiStringParser.java
index 381d968..c3e6784 100644
--- a/stratosphere-core/src/main/java/eu/stratosphere/types/parser/AsciiStringParser.java
+++ b/stratosphere-core/src/main/java/eu/stratosphere/types/parser/AsciiStringParser.java
@@ -13,17 +13,12 @@
package eu.stratosphere.types.parser;
-import java.nio.charset.Charset;
-
/**
* Converts a variable length field of a byte array into a {@link String}. The byte contents between
* delimiters is interpreted as an ASCII string. The string may be quoted in double quotes. For quoted
* strings, whitespaces (space and tab) leading and trailing before and after the quotes are removed.
*/
public class AsciiStringParser extends FieldParser<String> {
-
- // the default (ascii style) charset. should be available really everywhere.
- private static final Charset CHARSET = Charset.forName("ISO-8859-1");
private static final byte WHITESPACE_SPACE = (byte) ' ';
private static final byte WHITESPACE_TAB = (byte) '\t';
@@ -58,7 +53,7 @@ public class AsciiStringParser extends FieldParser<String> {
if (i < limit) {
// end of the string
- this.result = new String(bytes, quoteStart, i-quoteStart, CHARSET);
+ this.result = new String(bytes, quoteStart, i-quoteStart);
i++; // the quote
@@ -87,7 +82,7 @@ public class AsciiStringParser extends FieldParser<String> {
}
// set from the beginning. unquoted strings include the leading whitespaces
- this.result = new String(bytes, startPos, i-startPos, CHARSET);
+ this.result = new String(bytes, startPos, i-startPos);
return (i == limit ? limit : i+1);
}
}
http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/ef623e9b/stratosphere-java/src/main/java/eu/stratosphere/api/java/io/TextInputFormat.java
----------------------------------------------------------------------
diff --git a/stratosphere-java/src/main/java/eu/stratosphere/api/java/io/TextInputFormat.java b/stratosphere-java/src/main/java/eu/stratosphere/api/java/io/TextInputFormat.java
index 6febb74..759c9e9 100644
--- a/stratosphere-java/src/main/java/eu/stratosphere/api/java/io/TextInputFormat.java
+++ b/stratosphere-java/src/main/java/eu/stratosphere/api/java/io/TextInputFormat.java
@@ -14,6 +14,7 @@
**********************************************************************************************************************/
package eu.stratosphere.api.java.io;
+import java.io.IOException;
import java.nio.charset.Charset;
import eu.stratosphere.api.common.io.DelimitedInputFormat;
@@ -25,12 +26,6 @@ public class TextInputFormat extends DelimitedInputFormat<String> {
private static final long serialVersionUID = 1L;
- private String charsetName = "UTF-8";
-
-// private boolean skipInvalidLines;
-
- private transient Charset charset;
-
/**
* Code of \r, used to remove \r from a line when the line ends with \r\n
*/
@@ -40,7 +35,12 @@ public class TextInputFormat extends DelimitedInputFormat<String> {
* Code of \n, used to identify if \n is used as delimiter
*/
private static final byte NEW_LINE = (byte) '\n';
-
+
+
+ /**
+ * The name of the charset to use for decoding.
+ */
+ private String charsetName = "UTF-8";
// --------------------------------------------------------------------------------------------
@@ -62,14 +62,6 @@ public class TextInputFormat extends DelimitedInputFormat<String> {
this.charsetName = charsetName;
}
-// public boolean isSkipInvalidLines() {
-// return skipInvalidLines;
-// }
-//
-// public void setSkipInvalidLines(boolean skipInvalidLines) {
-// this.skipInvalidLines = skipInvalidLines;
-// }
-
// --------------------------------------------------------------------------------------------
@Override
@@ -79,13 +71,12 @@ public class TextInputFormat extends DelimitedInputFormat<String> {
if (charsetName == null || !Charset.isSupported(charsetName)) {
throw new RuntimeException("Unsupported charset: " + charsetName);
}
- this.charset = Charset.forName(charsetName);
}
// --------------------------------------------------------------------------------------------
@Override
- public String readRecord(String reusable, byte[] bytes, int offset, int numBytes) {
+ public String readRecord(String reusable, byte[] bytes, int offset, int numBytes) throws IOException {
//Check if \n is used as delimiter and the end of this line is a \r, then remove \r from the line
if (this.getDelimiter() != null && this.getDelimiter().length == 1
&& this.getDelimiter()[0] == NEW_LINE && offset+numBytes >= 1
@@ -93,13 +84,13 @@ public class TextInputFormat extends DelimitedInputFormat<String> {
numBytes -= 1;
}
- return new String(bytes, offset, numBytes, this.charset);
+ return new String(bytes, offset, numBytes, this.charsetName);
}
// --------------------------------------------------------------------------------------------
@Override
public String toString() {
- return "TextInputFormat (" + getFilePath() + ") - " + this.charsetName; // + (this.skipInvalidLines ? "(skipping invalid lines)" : "");
+ return "TextInputFormat (" + getFilePath() + ") - " + this.charsetName;
}
}