You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by za...@apache.org on 2019/03/05 23:05:05 UTC

[calcite] branch master updated: [CALCITE-2887] Improve performance of RexLiteral#toJavaString

This is an automated email from the ASF dual-hosted git repository.

zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/master by this push:
     new 6687023  [CALCITE-2887] Improve performance of RexLiteral#toJavaString
6687023 is described below

commit 6687023e9517fba00c5eeb85cc7b94efa0a6b9c0
Author: Stamatis Zampetakis <za...@gmail.com>
AuthorDate: Fri Mar 1 18:24:41 2019 +0100

    [CALCITE-2887] Improve performance of RexLiteral#toJavaString
    
    1. Replace combination of PrintWriter/StringWriter with StringBuilder.
    2. Add JMH benchmark showing a 10X improvement when choosing StringBuilder.
    3. Remove unused method from Unsafe.
    4. Improve Javadoc of modified methods.
---
 .../java/org/apache/calcite/rex/RexLiteral.java    | 253 +++++++++++----------
 .../main/java/org/apache/calcite/util/Unsafe.java  |   6 -
 .../main/java/org/apache/calcite/util/Util.java    |  35 +--
 .../benchmarks/StringConstructBenchmark.java       | 172 ++++++++++++++
 4 files changed, 323 insertions(+), 143 deletions(-)

diff --git a/core/src/main/java/org/apache/calcite/rex/RexLiteral.java b/core/src/main/java/org/apache/calcite/rex/RexLiteral.java
index aa78730..192a962 100644
--- a/core/src/main/java/org/apache/calcite/rex/RexLiteral.java
+++ b/core/src/main/java/org/apache/calcite/rex/RexLiteral.java
@@ -35,14 +35,13 @@ import org.apache.calcite.util.Litmus;
 import org.apache.calcite.util.NlsString;
 import org.apache.calcite.util.TimeString;
 import org.apache.calcite.util.TimestampString;
-import org.apache.calcite.util.Unsafe;
 import org.apache.calcite.util.Util;
 
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
 
+import java.io.IOException;
 import java.io.PrintWriter;
-import java.io.StringWriter;
 import java.math.BigDecimal;
 import java.nio.ByteBuffer;
 import java.nio.charset.Charset;
@@ -382,21 +381,19 @@ public class RexLiteral extends RexNode {
     if (value == null) {
       return includeType == RexDigestIncludeType.NO_TYPE ? "null" : "null:" + fullTypeString;
     }
-    StringWriter sw = new StringWriter();
-    PrintWriter pw = new PrintWriter(sw);
-    printAsJava(value, pw, typeName, false, includeType);
-    pw.flush();
+    StringBuilder sb = new StringBuilder();
+    appendAsJava(value, sb, typeName, false, includeType);
 
     if (includeType != RexDigestIncludeType.NO_TYPE) {
-      sw.append(':');
+      sb.append(':');
       if (!fullTypeString.endsWith("NOT NULL")) {
-        sw.append(fullTypeString);
+        sb.append(fullTypeString);
       } else {
         // Trim " NOT NULL". Apparently, the literal is not null, so we just print the data type.
-        Unsafe.append(sw, fullTypeString, 0, fullTypeString.length() - 9);
+        sb.append(fullTypeString, 0, fullTypeString.length() - 9);
       }
     }
-    return sw.toString();
+    return sb.toString();
   }
 
   /**
@@ -551,14 +548,14 @@ public class RexLiteral extends RexNode {
    * Prints the value this literal as a Java string constant.
    */
   public void printAsJava(PrintWriter pw) {
-    printAsJava(value, pw, typeName, true, RexDigestIncludeType.NO_TYPE);
+    appendAsJava(value, pw, typeName, true, RexDigestIncludeType.NO_TYPE);
   }
 
   /**
-   * Prints a value as a Java string. The value must be consistent with the
-   * type, as per {@link #valueMatchesType}.
+   * Appends the specified value in the provided destination as a Java string. The value must be
+   * consistent with the type, as per {@link #valueMatchesType}.
    *
-   * <p>Typical return values:
+   * <p>Typical return values:</p>
    *
    * <ul>
    * <li>true</li>
@@ -567,122 +564,130 @@ public class RexLiteral extends RexNode {
    * <li>1.25</li>
    * <li>1234ABCD</li>
    * </ul>
-   *  @param value    Value
-   * @param pw       Writer to write to
-   * @param typeName Type family
-   * @param includeType if representation should include data type
+   *
+   * <p>The destination where the value is appended must not incur I/O operations. This method is
+   * not meant to be used for writing the values to permanent storage.</p>
+   *
+   * @param value a value to be appended to the provided destination as a Java string
+   * @param destination a destination where to append the specified value
+   * @param typeName a type name to be used for the transformation of the value to a Java string
+   * @param includeType an indicator whether to include the data type in the Java representation
+   * @throws IllegalStateException if the appending to the destination <code>Appendable</code> fails
+   *         due to I/O
    */
-  private static void printAsJava(
+  private static void appendAsJava(
       Comparable value,
-      PrintWriter pw,
+      Appendable destination,
       SqlTypeName typeName,
       boolean java, RexDigestIncludeType includeType) {
-    switch (typeName) {
-    case CHAR:
-      NlsString nlsString = (NlsString) value;
-      if (java) {
-        Util.printJavaString(
-            pw,
-            nlsString.getValue(),
-            true);
-      } else {
-        boolean includeCharset =
-            (nlsString.getCharsetName() != null)
-                && !nlsString.getCharsetName().equals(
-                CalciteSystemProperty.DEFAULT_CHARSET.value());
-        pw.print(nlsString.asSql(includeCharset, false));
-      }
-      break;
-    case BOOLEAN:
-      assert value instanceof Boolean;
-      pw.print(((Boolean) value).booleanValue());
-      break;
-    case DECIMAL:
-      assert value instanceof BigDecimal;
-      pw.print(value.toString());
-      break;
-    case DOUBLE:
-      assert value instanceof BigDecimal;
-      pw.print(Util.toScientificNotation((BigDecimal) value));
-      break;
-    case BIGINT:
-      assert value instanceof BigDecimal;
-      pw.print(((BigDecimal) value).longValue());
-      pw.print('L');
-      break;
-    case BINARY:
-      assert value instanceof ByteString;
-      pw.print("X'");
-      pw.print(((ByteString) value).toString(16));
-      pw.print("'");
-      break;
-    case NULL:
-      assert value == null;
-      pw.print("null");
-      break;
-    case SYMBOL:
-      assert value instanceof Enum;
-      pw.print("FLAG(");
-      pw.print(value);
-      pw.print(")");
-      break;
-    case DATE:
-      assert value instanceof DateString;
-      pw.print(value);
-      break;
-    case TIME:
-      assert value instanceof TimeString;
-      pw.print(value);
-      break;
-    case TIME_WITH_LOCAL_TIME_ZONE:
-      assert value instanceof TimeString;
-      pw.print(value);
-      break;
-    case TIMESTAMP:
-      assert value instanceof TimestampString;
-      pw.print(value);
-      break;
-    case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
-      assert value instanceof TimestampString;
-      pw.print(value);
-      break;
-    case INTERVAL_YEAR:
-    case INTERVAL_YEAR_MONTH:
-    case INTERVAL_MONTH:
-    case INTERVAL_DAY:
-    case INTERVAL_DAY_HOUR:
-    case INTERVAL_DAY_MINUTE:
-    case INTERVAL_DAY_SECOND:
-    case INTERVAL_HOUR:
-    case INTERVAL_HOUR_MINUTE:
-    case INTERVAL_HOUR_SECOND:
-    case INTERVAL_MINUTE:
-    case INTERVAL_MINUTE_SECOND:
-    case INTERVAL_SECOND:
-      if (value instanceof BigDecimal) {
-        pw.print(value.toString());
-      } else {
+    try {
+      switch (typeName) {
+      case CHAR:
+        NlsString nlsString = (NlsString) value;
+        if (java) {
+          Util.printJavaString(
+              destination,
+              nlsString.getValue(),
+              true);
+        } else {
+          boolean includeCharset =
+              (nlsString.getCharsetName() != null)
+                  && !nlsString.getCharsetName().equals(
+                  CalciteSystemProperty.DEFAULT_CHARSET.value());
+          destination.append(nlsString.asSql(includeCharset, false));
+        }
+        break;
+      case BOOLEAN:
+        assert value instanceof Boolean;
+        destination.append(value.toString());
+        break;
+      case DECIMAL:
+        assert value instanceof BigDecimal;
+        destination.append(value.toString());
+        break;
+      case DOUBLE:
+        assert value instanceof BigDecimal;
+        destination.append(Util.toScientificNotation((BigDecimal) value));
+        break;
+      case BIGINT:
+        assert value instanceof BigDecimal;
+        long narrowLong = ((BigDecimal) value).longValue();
+        destination.append(String.valueOf(narrowLong));
+        destination.append('L');
+        break;
+      case BINARY:
+        assert value instanceof ByteString;
+        destination.append("X'");
+        destination.append(((ByteString) value).toString(16));
+        destination.append("'");
+        break;
+      case NULL:
         assert value == null;
-        pw.print("null");
-      }
-      break;
-    case MULTISET:
-    case ROW:
-      @SuppressWarnings("unchecked") final List<RexLiteral> list = (List) value;
-      pw.print(
-          new AbstractList<String>() {
-            public String get(int index) {
-              return list.get(index).computeDigest(includeType);
-            }
+        destination.append("null");
+        break;
+      case SYMBOL:
+        assert value instanceof Enum;
+        destination.append("FLAG(");
+        destination.append(value.toString());
+        destination.append(")");
+        break;
+      case DATE:
+        assert value instanceof DateString;
+        destination.append(value.toString());
+        break;
+      case TIME:
+        assert value instanceof TimeString;
+        destination.append(value.toString());
+        break;
+      case TIME_WITH_LOCAL_TIME_ZONE:
+        assert value instanceof TimeString;
+        destination.append(value.toString());
+        break;
+      case TIMESTAMP:
+        assert value instanceof TimestampString;
+        destination.append(value.toString());
+        break;
+      case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
+        assert value instanceof TimestampString;
+        destination.append(value.toString());
+        break;
+      case INTERVAL_YEAR:
+      case INTERVAL_YEAR_MONTH:
+      case INTERVAL_MONTH:
+      case INTERVAL_DAY:
+      case INTERVAL_DAY_HOUR:
+      case INTERVAL_DAY_MINUTE:
+      case INTERVAL_DAY_SECOND:
+      case INTERVAL_HOUR:
+      case INTERVAL_HOUR_MINUTE:
+      case INTERVAL_HOUR_SECOND:
+      case INTERVAL_MINUTE:
+      case INTERVAL_MINUTE_SECOND:
+      case INTERVAL_SECOND:
+        assert value instanceof BigDecimal;
+        destination.append(value.toString());
+        break;
+      case MULTISET:
+      case ROW:
+        @SuppressWarnings("unchecked")
+        final List<RexLiteral> list = (List) value;
+        destination.append(
+            (new AbstractList<String>() {
+              public String get(int index) {
+                return list.get(index).computeDigest(includeType);
+              }
 
-            public int size() {
-              return list.size();
-            }
-          });
-      break;
-    default:
-      assert valueMatchesType(value, typeName, true);
-      throw Util.needToImplement(typeName);
+              public int size() {
+                return list.size();
+              }
+            }).toString());
+        break;
+      default:
+        assert valueMatchesType(value, typeName, true);
+        throw Util.needToImplement(typeName);
+      }
+    } catch (IOException e) {
+      throw new IllegalStateException("The destination Appendable should not incur I/O.", e);
     }
   }
 
diff --git a/core/src/main/java/org/apache/calcite/util/Unsafe.java b/core/src/main/java/org/apache/calcite/util/Unsafe.java
index 39cce2e..61805ab 100644
--- a/core/src/main/java/org/apache/calcite/util/Unsafe.java
+++ b/core/src/main/java/org/apache/calcite/util/Unsafe.java
@@ -49,12 +49,6 @@ public class Unsafe {
     // Included in this class because StringBuffer is banned.
     sw.getBuffer().setLength(0);
   }
-
-  /** Appends to {@link StringWriter}. */
-  public static void append(StringWriter sw, CharSequence charSequence, int start, int end) {
-    // Included in this class because StringBuffer is banned.
-    sw.getBuffer().append(charSequence, start, end);
-  }
 }
 
 // End Unsafe.java
diff --git a/core/src/main/java/org/apache/calcite/util/Util.java b/core/src/main/java/org/apache/calcite/util/Util.java
index 2544481..b4b3148 100644
--- a/core/src/main/java/org/apache/calcite/util/Util.java
+++ b/core/src/main/java/org/apache/calcite/util/Util.java
@@ -479,24 +479,33 @@ public class Util {
    * Prints a string, enclosing in double quotes (") and escaping if
    * necessary. For examples, <code>printDoubleQuoted(w,"x\"y",false)</code>
    * prints <code>"x\"y"</code>.
+   *
+   * <p>The appendable where the value is printed must not incur I/O operations. This method is
+   * not meant to be used for writing the values to permanent storage.</p>
+   *
+   * @throws IllegalStateException if the print to the specified appendable fails due to I/O
    */
   public static void printJavaString(
-      PrintWriter pw,
+      Appendable appendable,
       String s,
       boolean nullMeansNull) {
-    if (s == null) {
-      if (nullMeansNull) {
-        pw.print("null");
+    try {
+      if (s == null) {
+        if (nullMeansNull) {
+          appendable.append("null");
+        }
+      } else {
+        String s1 = replace(s, "\\", "\\\\");
+        String s2 = replace(s1, "\"", "\\\"");
+        String s3 = replace(s2, "\n\r", "\\n");
+        String s4 = replace(s3, "\n", "\\n");
+        String s5 = replace(s4, "\r", "\\r");
+        appendable.append('"');
+        appendable.append(s5);
+        appendable.append('"');
       }
-    } else {
-      String s1 = replace(s, "\\", "\\\\");
-      String s2 = replace(s1, "\"", "\\\"");
-      String s3 = replace(s2, "\n\r", "\\n");
-      String s4 = replace(s3, "\n", "\\n");
-      String s5 = replace(s4, "\r", "\\r");
-      pw.print("\"");
-      pw.print(s5);
-      pw.print("\"");
+    } catch (IOException ioe) {
+      throw new IllegalStateException("The specified appendable should not incur I/O.", ioe);
     }
   }
 
diff --git a/ubenchmark/src/main/java/org/apache/calcite/benchmarks/StringConstructBenchmark.java b/ubenchmark/src/main/java/org/apache/calcite/benchmarks/StringConstructBenchmark.java
new file mode 100644
index 0000000..c340dbb
--- /dev/null
+++ b/ubenchmark/src/main/java/org/apache/calcite/benchmarks/StringConstructBenchmark.java
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.benchmarks;
+
+
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.io.Writer;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * A benchmark of the most common patterns that are used to construct gradually String objects.
+ *
+ * The benchmark emphasizes on the build patterns that appear in the Calcite project.
+ */
+@Fork(value = 1, jvmArgsPrepend = "-Xmx2048m")
+@Measurement(iterations = 10, time = 100, timeUnit = TimeUnit.MILLISECONDS)
+@Warmup(iterations = 10, time = 100, timeUnit = TimeUnit.MILLISECONDS)
+@Threads(1)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@BenchmarkMode(Mode.Throughput)
+public class StringConstructBenchmark {
+
+  /**
+   * A state holding a Writer object which is initialized only once at the beginning of the
+   * benchmark.
+   */
+  @State(Scope.Thread)
+  public static class WriterState {
+    public Writer writer;
+
+    @Setup(Level.Trial)
+    public void setup() {
+      this.writer = new StringWriter();
+    }
+  }
+
+  /**
+   * A state holding an Appendable object which is initialized after a fixed number of append
+   * operations.
+   */
+  @State(Scope.Thread)
+  public static class AppenderState {
+    /**
+     * The type of the appender to be initialised.
+     */
+    @Param({"StringBuilder", "StringWriter", "PrintWriter"})
+    public String appenderType;
+
+    /**
+     * The maximum number of appends before resetting the appender.
+     *
+     * If the value is small then the appender is reinitialized very often, making the instantiation
+     * of the appender the dominant operation of the benchmark.
+     */
+    @Param({"1", "256", "512", "1024"})
+    public int maxAppends;
+
+    /**
+     * The appender that is currently used.
+     */
+    private Appendable appender;
+
+    /**
+     * The number of append operations performed so far.
+     */
+    private int nAppends = 0;
+
+    @Setup(Level.Iteration)
+    public void setup() {
+      reset();
+    }
+
+    private void reset() {
+      nAppends = 0;
+      if (appenderType.equals("StringBuilder")) {
+        this.appender = new StringBuilder();
+      } else if (appenderType.equals("StringWriter")) {
+        this.appender = new StringWriter();
+      } else if (appenderType.equals("PrintWriter")) {
+        this.appender = new PrintWriter(new StringWriter());
+      } else {
+        throw new IllegalStateException(
+            "The specified appender type (" + appenderType + ") is not supported.");
+      }
+    }
+
+    Appendable getOrCreateAppender() {
+      if (nAppends >= maxAppends) {
+        reset();
+      }
+      nAppends++;
+      return appender;
+    }
+
+  }
+
+  @Benchmark
+  public StringBuilder initStringBuilder() {
+    return new StringBuilder();
+  }
+
+  @Benchmark
+  public StringWriter initStringWriter() {
+    return new StringWriter();
+  }
+
+  @Benchmark
+  public PrintWriter initPrintWriter(WriterState writerState) {
+    return new PrintWriter(writerState.writer);
+  }
+
+  /**
+   * Benchmarks the performance of instantiating different {@link Appendable} objects and appending
+   * the same string a fixed number of times.
+   *
+   * @param bh blackhole used as an optimization fence
+   * @param appenderState the state holds the type of the appender and the number of appends that
+   * need to be performed before resetting the appender
+   * @throws IOException if the append operation encounters an I/O problem
+   */
+  @Benchmark
+  public void appendString(Blackhole bh, AppenderState appenderState) throws IOException {
+    bh.consume(appenderState.getOrCreateAppender().append("placeholder"));
+  }
+
+  public static void main(String[] args) throws RunnerException {
+    Options opt = new OptionsBuilder()
+        .include(StringConstructBenchmark.class.getName())
+        .detectJvmArgs()
+        .build();
+
+    new Runner(opt).run();
+  }
+}
+
+// End StringConstructBenchmark.java