You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by za...@apache.org on 2019/03/05 23:05:05 UTC
[calcite] branch master updated: [CALCITE-2887] Improve performance
of RexLiteral#toJavaString
This is an automated email from the ASF dual-hosted git repository.
zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/master by this push:
new 6687023 [CALCITE-2887] Improve performance of RexLiteral#toJavaString
6687023 is described below
commit 6687023e9517fba00c5eeb85cc7b94efa0a6b9c0
Author: Stamatis Zampetakis <za...@gmail.com>
AuthorDate: Fri Mar 1 18:24:41 2019 +0100
[CALCITE-2887] Improve performance of RexLiteral#toJavaString
1. Replace combination of PrintWriter/StringWriter with StringBuilder.
2. Add JMH benchmark showing a 10X improvement when choosing StringBuilder.
3. Remove unused method from Unsafe.
4. Improve Javadoc of modified methods.
---
.../java/org/apache/calcite/rex/RexLiteral.java | 253 +++++++++++----------
.../main/java/org/apache/calcite/util/Unsafe.java | 6 -
.../main/java/org/apache/calcite/util/Util.java | 35 +--
.../benchmarks/StringConstructBenchmark.java | 172 ++++++++++++++
4 files changed, 323 insertions(+), 143 deletions(-)
diff --git a/core/src/main/java/org/apache/calcite/rex/RexLiteral.java b/core/src/main/java/org/apache/calcite/rex/RexLiteral.java
index aa78730..192a962 100644
--- a/core/src/main/java/org/apache/calcite/rex/RexLiteral.java
+++ b/core/src/main/java/org/apache/calcite/rex/RexLiteral.java
@@ -35,14 +35,13 @@ import org.apache.calcite.util.Litmus;
import org.apache.calcite.util.NlsString;
import org.apache.calcite.util.TimeString;
import org.apache.calcite.util.TimestampString;
-import org.apache.calcite.util.Unsafe;
import org.apache.calcite.util.Util;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
+import java.io.IOException;
import java.io.PrintWriter;
-import java.io.StringWriter;
import java.math.BigDecimal;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
@@ -382,21 +381,19 @@ public class RexLiteral extends RexNode {
if (value == null) {
return includeType == RexDigestIncludeType.NO_TYPE ? "null" : "null:" + fullTypeString;
}
- StringWriter sw = new StringWriter();
- PrintWriter pw = new PrintWriter(sw);
- printAsJava(value, pw, typeName, false, includeType);
- pw.flush();
+ StringBuilder sb = new StringBuilder();
+ appendAsJava(value, sb, typeName, false, includeType);
if (includeType != RexDigestIncludeType.NO_TYPE) {
- sw.append(':');
+ sb.append(':');
if (!fullTypeString.endsWith("NOT NULL")) {
- sw.append(fullTypeString);
+ sb.append(fullTypeString);
} else {
// Trim " NOT NULL". Apparently, the literal is not null, so we just print the data type.
- Unsafe.append(sw, fullTypeString, 0, fullTypeString.length() - 9);
+ sb.append(fullTypeString, 0, fullTypeString.length() - 9);
}
}
- return sw.toString();
+ return sb.toString();
}
/**
@@ -551,14 +548,14 @@ public class RexLiteral extends RexNode {
* Prints the value this literal as a Java string constant.
*/
public void printAsJava(PrintWriter pw) {
- printAsJava(value, pw, typeName, true, RexDigestIncludeType.NO_TYPE);
+ appendAsJava(value, pw, typeName, true, RexDigestIncludeType.NO_TYPE);
}
/**
- * Prints a value as a Java string. The value must be consistent with the
- * type, as per {@link #valueMatchesType}.
+ * Appends the specified value in the provided destination as a Java string. The value must be
+ * consistent with the type, as per {@link #valueMatchesType}.
*
- * <p>Typical return values:
+ * <p>Typical return values:</p>
*
* <ul>
* <li>true</li>
@@ -567,122 +564,130 @@ public class RexLiteral extends RexNode {
* <li>1.25</li>
* <li>1234ABCD</li>
* </ul>
- * @param value Value
- * @param pw Writer to write to
- * @param typeName Type family
- * @param includeType if representation should include data type
+ *
+ * <p>The destination where the value is appended must not incur I/O operations. This method is
+ * not meant to be used for writing the values to permanent storage.</p>
+ *
+ * @param value a value to be appended to the provided destination as a Java string
+ * @param destination a destination where to append the specified value
+ * @param typeName a type name to be used for the transformation of the value to a Java string
+ * @param includeType an indicator whether to include the data type in the Java representation
+ * @throws IllegalStateException if the appending to the destination <code>Appendable</code> fails
+ * due to I/O
*/
- private static void printAsJava(
+ private static void appendAsJava(
Comparable value,
- PrintWriter pw,
+ Appendable destination,
SqlTypeName typeName,
boolean java, RexDigestIncludeType includeType) {
- switch (typeName) {
- case CHAR:
- NlsString nlsString = (NlsString) value;
- if (java) {
- Util.printJavaString(
- pw,
- nlsString.getValue(),
- true);
- } else {
- boolean includeCharset =
- (nlsString.getCharsetName() != null)
- && !nlsString.getCharsetName().equals(
- CalciteSystemProperty.DEFAULT_CHARSET.value());
- pw.print(nlsString.asSql(includeCharset, false));
- }
- break;
- case BOOLEAN:
- assert value instanceof Boolean;
- pw.print(((Boolean) value).booleanValue());
- break;
- case DECIMAL:
- assert value instanceof BigDecimal;
- pw.print(value.toString());
- break;
- case DOUBLE:
- assert value instanceof BigDecimal;
- pw.print(Util.toScientificNotation((BigDecimal) value));
- break;
- case BIGINT:
- assert value instanceof BigDecimal;
- pw.print(((BigDecimal) value).longValue());
- pw.print('L');
- break;
- case BINARY:
- assert value instanceof ByteString;
- pw.print("X'");
- pw.print(((ByteString) value).toString(16));
- pw.print("'");
- break;
- case NULL:
- assert value == null;
- pw.print("null");
- break;
- case SYMBOL:
- assert value instanceof Enum;
- pw.print("FLAG(");
- pw.print(value);
- pw.print(")");
- break;
- case DATE:
- assert value instanceof DateString;
- pw.print(value);
- break;
- case TIME:
- assert value instanceof TimeString;
- pw.print(value);
- break;
- case TIME_WITH_LOCAL_TIME_ZONE:
- assert value instanceof TimeString;
- pw.print(value);
- break;
- case TIMESTAMP:
- assert value instanceof TimestampString;
- pw.print(value);
- break;
- case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
- assert value instanceof TimestampString;
- pw.print(value);
- break;
- case INTERVAL_YEAR:
- case INTERVAL_YEAR_MONTH:
- case INTERVAL_MONTH:
- case INTERVAL_DAY:
- case INTERVAL_DAY_HOUR:
- case INTERVAL_DAY_MINUTE:
- case INTERVAL_DAY_SECOND:
- case INTERVAL_HOUR:
- case INTERVAL_HOUR_MINUTE:
- case INTERVAL_HOUR_SECOND:
- case INTERVAL_MINUTE:
- case INTERVAL_MINUTE_SECOND:
- case INTERVAL_SECOND:
- if (value instanceof BigDecimal) {
- pw.print(value.toString());
- } else {
+ try {
+ switch (typeName) {
+ case CHAR:
+ NlsString nlsString = (NlsString) value;
+ if (java) {
+ Util.printJavaString(
+ destination,
+ nlsString.getValue(),
+ true);
+ } else {
+ boolean includeCharset =
+ (nlsString.getCharsetName() != null)
+ && !nlsString.getCharsetName().equals(
+ CalciteSystemProperty.DEFAULT_CHARSET.value());
+ destination.append(nlsString.asSql(includeCharset, false));
+ }
+ break;
+ case BOOLEAN:
+ assert value instanceof Boolean;
+ destination.append(value.toString());
+ break;
+ case DECIMAL:
+ assert value instanceof BigDecimal;
+ destination.append(value.toString());
+ break;
+ case DOUBLE:
+ assert value instanceof BigDecimal;
+ destination.append(Util.toScientificNotation((BigDecimal) value));
+ break;
+ case BIGINT:
+ assert value instanceof BigDecimal;
+ long narrowLong = ((BigDecimal) value).longValue();
+ destination.append(String.valueOf(narrowLong));
+ destination.append('L');
+ break;
+ case BINARY:
+ assert value instanceof ByteString;
+ destination.append("X'");
+ destination.append(((ByteString) value).toString(16));
+ destination.append("'");
+ break;
+ case NULL:
assert value == null;
- pw.print("null");
- }
- break;
- case MULTISET:
- case ROW:
- @SuppressWarnings("unchecked") final List<RexLiteral> list = (List) value;
- pw.print(
- new AbstractList<String>() {
- public String get(int index) {
- return list.get(index).computeDigest(includeType);
- }
+ destination.append("null");
+ break;
+ case SYMBOL:
+ assert value instanceof Enum;
+ destination.append("FLAG(");
+ destination.append(value.toString());
+ destination.append(")");
+ break;
+ case DATE:
+ assert value instanceof DateString;
+ destination.append(value.toString());
+ break;
+ case TIME:
+ assert value instanceof TimeString;
+ destination.append(value.toString());
+ break;
+ case TIME_WITH_LOCAL_TIME_ZONE:
+ assert value instanceof TimeString;
+ destination.append(value.toString());
+ break;
+ case TIMESTAMP:
+ assert value instanceof TimestampString;
+ destination.append(value.toString());
+ break;
+ case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
+ assert value instanceof TimestampString;
+ destination.append(value.toString());
+ break;
+ case INTERVAL_YEAR:
+ case INTERVAL_YEAR_MONTH:
+ case INTERVAL_MONTH:
+ case INTERVAL_DAY:
+ case INTERVAL_DAY_HOUR:
+ case INTERVAL_DAY_MINUTE:
+ case INTERVAL_DAY_SECOND:
+ case INTERVAL_HOUR:
+ case INTERVAL_HOUR_MINUTE:
+ case INTERVAL_HOUR_SECOND:
+ case INTERVAL_MINUTE:
+ case INTERVAL_MINUTE_SECOND:
+ case INTERVAL_SECOND:
+ assert value instanceof BigDecimal;
+ destination.append(value.toString());
+ break;
+ case MULTISET:
+ case ROW:
+ @SuppressWarnings("unchecked")
+ final List<RexLiteral> list = (List) value;
+ destination.append(
+ (new AbstractList<String>() {
+ public String get(int index) {
+ return list.get(index).computeDigest(includeType);
+ }
- public int size() {
- return list.size();
- }
- });
- break;
- default:
- assert valueMatchesType(value, typeName, true);
- throw Util.needToImplement(typeName);
+ public int size() {
+ return list.size();
+ }
+ }).toString());
+ break;
+ default:
+ assert valueMatchesType(value, typeName, true);
+ throw Util.needToImplement(typeName);
+ }
+ } catch (IOException e) {
+ throw new IllegalStateException("The destination Appendable should not incur I/O.", e);
}
}
diff --git a/core/src/main/java/org/apache/calcite/util/Unsafe.java b/core/src/main/java/org/apache/calcite/util/Unsafe.java
index 39cce2e..61805ab 100644
--- a/core/src/main/java/org/apache/calcite/util/Unsafe.java
+++ b/core/src/main/java/org/apache/calcite/util/Unsafe.java
@@ -49,12 +49,6 @@ public class Unsafe {
// Included in this class because StringBuffer is banned.
sw.getBuffer().setLength(0);
}
-
- /** Appends to {@link StringWriter}. */
- public static void append(StringWriter sw, CharSequence charSequence, int start, int end) {
- // Included in this class because StringBuffer is banned.
- sw.getBuffer().append(charSequence, start, end);
- }
}
// End Unsafe.java
diff --git a/core/src/main/java/org/apache/calcite/util/Util.java b/core/src/main/java/org/apache/calcite/util/Util.java
index 2544481..b4b3148 100644
--- a/core/src/main/java/org/apache/calcite/util/Util.java
+++ b/core/src/main/java/org/apache/calcite/util/Util.java
@@ -479,24 +479,33 @@ public class Util {
* Prints a string, enclosing in double quotes (") and escaping if
* necessary. For examples, <code>printDoubleQuoted(w,"x\"y",false)</code>
* prints <code>"x\"y"</code>.
+ *
+ * <p>The appendable where the value is printed must not incur I/O operations. This method is
+ * not meant to be used for writing the values to permanent storage.</p>
+ *
+ * @throws IllegalStateException if the print to the specified appendable fails due to I/O
*/
public static void printJavaString(
- PrintWriter pw,
+ Appendable appendable,
String s,
boolean nullMeansNull) {
- if (s == null) {
- if (nullMeansNull) {
- pw.print("null");
+ try {
+ if (s == null) {
+ if (nullMeansNull) {
+ appendable.append("null");
+ }
+ } else {
+ String s1 = replace(s, "\\", "\\\\");
+ String s2 = replace(s1, "\"", "\\\"");
+ String s3 = replace(s2, "\n\r", "\\n");
+ String s4 = replace(s3, "\n", "\\n");
+ String s5 = replace(s4, "\r", "\\r");
+ appendable.append('"');
+ appendable.append(s5);
+ appendable.append('"');
}
- } else {
- String s1 = replace(s, "\\", "\\\\");
- String s2 = replace(s1, "\"", "\\\"");
- String s3 = replace(s2, "\n\r", "\\n");
- String s4 = replace(s3, "\n", "\\n");
- String s5 = replace(s4, "\r", "\\r");
- pw.print("\"");
- pw.print(s5);
- pw.print("\"");
+ } catch (IOException ioe) {
+ throw new IllegalStateException("The specified appendable should not incur I/O.", ioe);
}
}
diff --git a/ubenchmark/src/main/java/org/apache/calcite/benchmarks/StringConstructBenchmark.java b/ubenchmark/src/main/java/org/apache/calcite/benchmarks/StringConstructBenchmark.java
new file mode 100644
index 0000000..c340dbb
--- /dev/null
+++ b/ubenchmark/src/main/java/org/apache/calcite/benchmarks/StringConstructBenchmark.java
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.benchmarks;
+
+
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.io.Writer;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * A benchmark of the most common patterns that are used to construct gradually String objects.
+ *
+ * The benchmark emphasizes on the build patterns that appear in the Calcite project.
+ */
+@Fork(value = 1, jvmArgsPrepend = "-Xmx2048m")
+@Measurement(iterations = 10, time = 100, timeUnit = TimeUnit.MILLISECONDS)
+@Warmup(iterations = 10, time = 100, timeUnit = TimeUnit.MILLISECONDS)
+@Threads(1)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@BenchmarkMode(Mode.Throughput)
+public class StringConstructBenchmark {
+
+ /**
+ * A state holding a Writer object which is initialized only once at the beginning of the
+ * benchmark.
+ */
+ @State(Scope.Thread)
+ public static class WriterState {
+ public Writer writer;
+
+ @Setup(Level.Trial)
+ public void setup() {
+ this.writer = new StringWriter();
+ }
+ }
+
+ /**
+ * A state holding an Appendable object which is initialized after a fixed number of append
+ * operations.
+ */
+ @State(Scope.Thread)
+ public static class AppenderState {
+ /**
+ * The type of the appender to be initialised.
+ */
+ @Param({"StringBuilder", "StringWriter", "PrintWriter"})
+ public String appenderType;
+
+ /**
+ * The maximum number of appends before resetting the appender.
+ *
+ * If the value is small then the appender is reinitialized very often, making the instantiation
+ * of the appender the dominant operation of the benchmark.
+ */
+ @Param({"1", "256", "512", "1024"})
+ public int maxAppends;
+
+ /**
+ * The appender that is currently used.
+ */
+ private Appendable appender;
+
+ /**
+ * The number of append operations performed so far.
+ */
+ private int nAppends = 0;
+
+ @Setup(Level.Iteration)
+ public void setup() {
+ reset();
+ }
+
+ private void reset() {
+ nAppends = 0;
+ if (appenderType.equals("StringBuilder")) {
+ this.appender = new StringBuilder();
+ } else if (appenderType.equals("StringWriter")) {
+ this.appender = new StringWriter();
+ } else if (appenderType.equals("PrintWriter")) {
+ this.appender = new PrintWriter(new StringWriter());
+ } else {
+ throw new IllegalStateException(
+ "The specified appender type (" + appenderType + ") is not supported.");
+ }
+ }
+
+ Appendable getOrCreateAppender() {
+ if (nAppends >= maxAppends) {
+ reset();
+ }
+ nAppends++;
+ return appender;
+ }
+
+ }
+
+ @Benchmark
+ public StringBuilder initStringBuilder() {
+ return new StringBuilder();
+ }
+
+ @Benchmark
+ public StringWriter initStringWriter() {
+ return new StringWriter();
+ }
+
+ @Benchmark
+ public PrintWriter initPrintWriter(WriterState writerState) {
+ return new PrintWriter(writerState.writer);
+ }
+
+ /**
+ * Benchmarks the performance of instantiating different {@link Appendable} objects and appending
+ * the same string a fixed number of times.
+ *
+ * @param bh blackhole used as an optimization fence
+ * @param appenderState the state holds the type of the appender and the number of appends that
+ * need to be performed before resetting the appender
+ * @throws IOException if the append operation encounters an I/O problem
+ */
+ @Benchmark
+ public void appendString(Blackhole bh, AppenderState appenderState) throws IOException {
+ bh.consume(appenderState.getOrCreateAppender().append("placeholder"));
+ }
+
+ public static void main(String[] args) throws RunnerException {
+ Options opt = new OptionsBuilder()
+ .include(StringConstructBenchmark.class.getName())
+ .detectJvmArgs()
+ .build();
+
+ new Runner(opt).run();
+ }
+}
+
+// End StringConstructBenchmark.java