You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by GitBox <gi...@apache.org> on 2020/06/09 16:24:44 UTC

[GitHub] [calcite] danny0405 opened a new pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

danny0405 opened a new pull request #2016:
URL: https://github.com/apache/calcite/pull/2016


   …ls) for RexNode and RelNode
   
   * Add class Digest used to identify the node;
   * There is a pre-computed hashcode to speedup #hashCode and #equals;
   * We still use the string digest comparison for RexNode, which can be
   promoted as reused objects later


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] danny0405 commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
danny0405 commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r442105545



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>There are three kinds of nodes we need to handle:
+   *
+   * <ul>
+   * <li>RelSubset: composition of class name, set id and traits;</li>
+   * <li>HepRelVertex: composition of class name and current rel id;</li>
+   * <li>Normal rel: composition of class name and id.</li>
+   * </ul>
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    StringBuilder digest = new StringBuilder(rel.getRelTypeName());
+    digest.append('#');
+    if (rel instanceof RelSubset) {
+      RelSubset subset = (RelSubset) rel;
+      digest.append(subset.getSetId());
+      for (RelTrait trait : subset.getTraitSet()) {
+        digest.append('.').append(trait);
+      }
+    } else if (rel instanceof HepRelVertex) {
+      digest.append(((HepRelVertex) rel).getCurrentRel().getId());
+    } else {
+      digest.append(rel.getId());
+    }
+    return digest.toString();
+  }
+
+  @Override public String toString() {
+    if (null != digest) {
+      return digest;
+    }
+    StringBuilder sb = new StringBuilder();
+    sb.append(rel.getRelTypeName());
+
+    for (RelTrait trait : rel.getTraitSet()) {
+      sb.append('.');
+      sb.append(trait.toString());
+    }
+
+    sb.append('(');
+    int j = 0;
+    for (Pair<String, Object> item : items) {
+      if (j++ > 0) {
+        sb.append(',');
+      }
+      sb.append(item.left);
+      sb.append('=');
+      sb.append(item.right);
+    }
+    sb.append(')');
+    digest = sb.toString();
+    return digest;
+  }
+
+  @Override public int compareTo(Digest other) {
+    return this.rel.getId() - other.rel.getId();
+  }
+
+  @Override public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+    Digest that = (Digest) o;
+    return hashCode == that.hashCode && deepEquals(that);
+  }
+
+  /**
+   * The method is used to resolve hash conflict, in current 6000+ tests, there are about 8
+   * tests with conflict, so we do not cache the hash code items in order to
+   * reduce mem consumption.
+   */
+  private boolean deepEquals(Digest other) {
+    Object[] thisItems = collect(this.rel, this.items, true);
+    Object[] thatItems = collect(other.rel, other.items, true);
+    if (thisItems.length != thatItems.length) {

Review comment:
       The test here https://github.com/apache/calcite/pull/2034 shows that there is negligible performance affect with `ArrayList.toArray`, so still, i would not follow you.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] laurentgo commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
laurentgo commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r440571543



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>There are three kinds of nodes we need to handle:
+   *
+   * <ul>
+   * <li>RelSubset: composition of class name, set id and traits;</li>
+   * <li>HepRelVertex: composition of class name and current rel id;</li>
+   * <li>Normal rel: composition of class name and id.</li>
+   * </ul>
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    StringBuilder digest = new StringBuilder(rel.getRelTypeName());
+    digest.append('#');
+    if (rel instanceof RelSubset) {
+      RelSubset subset = (RelSubset) rel;
+      digest.append(subset.getSetId());
+      for (RelTrait trait : subset.getTraitSet()) {
+        digest.append('.').append(trait);
+      }
+    } else if (rel instanceof HepRelVertex) {
+      digest.append(((HepRelVertex) rel).getCurrentRel().getId());
+    } else {
+      digest.append(rel.getId());
+    }
+    return digest.toString();
+  }
+
+  @Override public String toString() {
+    if (null != digest) {
+      return digest;
+    }
+    StringBuilder sb = new StringBuilder();
+    sb.append(rel.getRelTypeName());
+
+    for (RelTrait trait : rel.getTraitSet()) {
+      sb.append('.');
+      sb.append(trait.toString());
+    }
+
+    sb.append('(');
+    int j = 0;
+    for (Pair<String, Object> item : items) {
+      if (j++ > 0) {
+        sb.append(',');
+      }
+      sb.append(item.left);
+      sb.append('=');
+      sb.append(item.right);
+    }
+    sb.append(')');
+    digest = sb.toString();
+    return digest;
+  }
+
+  @Override public int compareTo(Digest other) {
+    return this.rel.getId() - other.rel.getId();
+  }
+
+  @Override public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+    Digest that = (Digest) o;
+    return hashCode == that.hashCode && deepEquals(that);
+  }
+
+  /**
+   * The method is used to resolve hash conflict, in current 6000+ tests, there are about 8
+   * tests with conflict, so we do not cache the hash code items in order to
+   * reduce mem consumption.
+   */
+  private boolean deepEquals(Digest other) {
+    Object[] thisItems = collect(this.rel, this.items, true);
+    Object[] thatItems = collect(other.rel, other.items, true);
+    if (thisItems.length != thatItems.length) {

Review comment:
       Considering null checks are often optimized out by the JVM, I'm not sure that the possible extra optimization is worth the extra code you wrote...




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] danny0405 commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
danny0405 commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r437505908



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,245 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents));
+  }
+
+  private static Object[] collect(RelNode rel, List<Pair<String, Object>> contents) {

Review comment:
       This is used to resolve the hash code conflict, which expects to be rare case,  the cache would not bring much.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] vlsi commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
vlsi commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r437420817



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,245 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents));
+  }
+
+  private static Object[] collect(RelNode rel, List<Pair<String, Object>> contents) {

Review comment:
       This seems to be resource-consuming. Should the result is not cached in the constructor?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] laurentgo commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
laurentgo commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r440573361



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>There are three kinds of nodes we need to handle:
+   *
+   * <ul>
+   * <li>RelSubset: composition of class name, set id and traits;</li>
+   * <li>HepRelVertex: composition of class name and current rel id;</li>
+   * <li>Normal rel: composition of class name and id.</li>
+   * </ul>
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    StringBuilder digest = new StringBuilder(rel.getRelTypeName());
+    digest.append('#');
+    if (rel instanceof RelSubset) {

Review comment:
       why not using the {{#computeDigest}} with the vertex id then?
   
   Also it is not possible to "substract" to the default digest. One can only add. The patch you proposed has some behavior which is not possible to change




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] danny0405 commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
danny0405 commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r439909893



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>There are three kinds of nodes we need to handle:
+   *
+   * <ul>
+   * <li>RelSubset: composition of class name, set id and traits;</li>
+   * <li>HepRelVertex: composition of class name and current rel id;</li>
+   * <li>Normal rel: composition of class name and id.</li>
+   * </ul>
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    StringBuilder digest = new StringBuilder(rel.getRelTypeName());
+    digest.append('#');
+    if (rel instanceof RelSubset) {
+      RelSubset subset = (RelSubset) rel;
+      digest.append(subset.getSetId());
+      for (RelTrait trait : subset.getTraitSet()) {
+        digest.append('.').append(trait);
+      }
+    } else if (rel instanceof HepRelVertex) {
+      digest.append(((HepRelVertex) rel).getCurrentRel().getId());
+    } else {
+      digest.append(rel.getId());
+    }
+    return digest.toString();
+  }
+
+  @Override public String toString() {
+    if (null != digest) {
+      return digest;
+    }
+    StringBuilder sb = new StringBuilder();
+    sb.append(rel.getRelTypeName());
+
+    for (RelTrait trait : rel.getTraitSet()) {
+      sb.append('.');
+      sb.append(trait.toString());
+    }
+
+    sb.append('(');
+    int j = 0;
+    for (Pair<String, Object> item : items) {
+      if (j++ > 0) {
+        sb.append(',');
+      }
+      sb.append(item.left);
+      sb.append('=');
+      sb.append(item.right);
+    }
+    sb.append(')');
+    digest = sb.toString();
+    return digest;
+  }
+
+  @Override public int compareTo(Digest other) {
+    return this.rel.getId() - other.rel.getId();

Review comment:
       Well, i have fixed the consistency of `#equals`.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] chunweilei commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
chunweilei commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r442607736



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,256 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+import javax.annotation.Nonnull;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  private final int hashCode;
+  private final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>Currently, returns composition of class name and id.
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    return rel.getRelTypeName() + '#' + rel.getId();
+  }
+
+  @Override public String toString() {
+    if (null != digest) {
+      return digest;
+    }
+    StringBuilder sb = new StringBuilder();
+    sb.append(rel.getRelTypeName());
+
+    for (RelTrait trait : rel.getTraitSet()) {
+      sb.append('.');
+      sb.append(trait.toString());
+    }
+

Review comment:
       As you said "List<Pair<String, Object>> items are the properties, e.g. the inputs, the type, the traits and so on", why should we still have to append trait?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] danny0405 commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
danny0405 commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r439897847



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>There are three kinds of nodes we need to handle:
+   *
+   * <ul>
+   * <li>RelSubset: composition of class name, set id and traits;</li>
+   * <li>HepRelVertex: composition of class name and current rel id;</li>
+   * <li>Normal rel: composition of class name and id.</li>
+   * </ul>
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    StringBuilder digest = new StringBuilder(rel.getRelTypeName());
+    digest.append('#');
+    if (rel instanceof RelSubset) {
+      RelSubset subset = (RelSubset) rel;
+      digest.append(subset.getSetId());
+      for (RelTrait trait : subset.getTraitSet()) {
+        digest.append('.').append(trait);
+      }
+    } else if (rel instanceof HepRelVertex) {
+      digest.append(((HepRelVertex) rel).getCurrentRel().getId());
+    } else {
+      digest.append(rel.getId());
+    }
+    return digest.toString();
+  }
+
+  @Override public String toString() {
+    if (null != digest) {
+      return digest;
+    }
+    StringBuilder sb = new StringBuilder();
+    sb.append(rel.getRelTypeName());
+
+    for (RelTrait trait : rel.getTraitSet()) {
+      sb.append('.');
+      sb.append(trait.toString());
+    }
+
+    sb.append('(');
+    int j = 0;
+    for (Pair<String, Object> item : items) {
+      if (j++ > 0) {
+        sb.append(',');
+      }
+      sb.append(item.left);
+      sb.append('=');
+      sb.append(item.right);
+    }
+    sb.append(')');
+    digest = sb.toString();
+    return digest;
+  }
+
+  @Override public int compareTo(Digest other) {
+    return this.rel.getId() - other.rel.getId();
+  }
+
+  @Override public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+    Digest that = (Digest) o;
+    return hashCode == that.hashCode && deepEquals(that);
+  }
+
+  /**
+   * The method is used to resolve hash conflict, in current 6000+ tests, there are about 8
+   * tests with conflict, so we do not cache the hash code items in order to
+   * reduce mem consumption.
+   */
+  private boolean deepEquals(Digest other) {
+    Object[] thisItems = collect(this.rel, this.items, true);
+    Object[] thatItems = collect(other.rel, other.items, true);
+    if (thisItems.length != thatItems.length) {

Review comment:
       Did you see the code in `Arrays.equals(thisItems, thatItems)` ? Here the `thisItems` and `thatItems` are both never null.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] chunweilei commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
chunweilei commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r442613841



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,256 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+import javax.annotation.Nonnull;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  private final int hashCode;
+  private final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>Currently, returns composition of class name and id.
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    return rel.getRelTypeName() + '#' + rel.getId();
+  }
+
+  @Override public String toString() {
+    if (null != digest) {
+      return digest;
+    }
+    StringBuilder sb = new StringBuilder();
+    sb.append(rel.getRelTypeName());
+
+    for (RelTrait trait : rel.getTraitSet()) {
+      sb.append('.');
+      sb.append(trait.toString());
+    }
+
+    sb.append('(');
+    int j = 0;
+    for (Pair<String, Object> item : items) {
+      if (j++ > 0) {
+        sb.append(',');
+      }
+      sb.append(item.left);
+      sb.append('=');
+      sb.append(item.right);
+    }
+    sb.append(')');
+    digest = sb.toString();
+    return digest;
+  }
+
+  @Override public int compareTo(@Nonnull Digest other) {
+    return this.equals(other) ? 0 : this.rel.getId() - other.rel.getId();
+  }
+
+  @Override public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+    Digest that = (Digest) o;
+    return hashCode == that.hashCode && deepEquals(that);
+  }
+
+  /**
+   * The method is used to resolve hash conflict, in current 6000+ tests, there are about 8
+   * tests with conflict, so we do not cache the hash code items in order to
+   * reduce mem consumption.
+   */
+  private boolean deepEquals(Digest other) {
+    Object[] thisItems = collect(this.rel, this.items, true);
+    Object[] thatItems = collect(other.rel, other.items, true);
+    if (thisItems.length != thatItems.length) {
+      return false;
+    }
+    for (int i = 0; i < thisItems.length; i++) {
+      if (!Objects.equals(thisItems[i], thatItems[i])) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  @Override public int hashCode() {
+    return hashCode;
+  }
+
+  /**
+   * Creates a digest with given rel and properties.
+   */
+  public static Digest create(RelNode rel, List<Pair<String, Object>> contents) {
+    return new Digest(rel, contents);
+  }
+
+  /**
+   * Creates a digest with given rel.
+   */
+  public static Digest create(RelNode rel) {
+    return new Digest(rel);
+  }
+
+  /**
+   * Creates a digest with given rel and string format digest
+   */
+  public static Digest create(RelNode rel, String digest) {
+    return new Digest(rel, digest);
+  }
+
+  /**
+   * Instantiates a digest with solid string format digest, this digest should only
+   * be used as a initial.
+   */
+  public static Digest initial(RelNode rel) {
+    return new Digest(rel, simpleRelDigest(rel));
+  }
+}

Review comment:
       It's really hard to distinguish these two methods. Is it possible to remove `Digest#create` which seems useless?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] laurentgo commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
laurentgo commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r439693614



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>There are three kinds of nodes we need to handle:
+   *
+   * <ul>
+   * <li>RelSubset: composition of class name, set id and traits;</li>
+   * <li>HepRelVertex: composition of class name and current rel id;</li>
+   * <li>Normal rel: composition of class name and id.</li>
+   * </ul>
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    StringBuilder digest = new StringBuilder(rel.getRelTypeName());
+    digest.append('#');
+    if (rel instanceof RelSubset) {
+      RelSubset subset = (RelSubset) rel;
+      digest.append(subset.getSetId());
+      for (RelTrait trait : subset.getTraitSet()) {
+        digest.append('.').append(trait);
+      }
+    } else if (rel instanceof HepRelVertex) {
+      digest.append(((HepRelVertex) rel).getCurrentRel().getId());
+    } else {
+      digest.append(rel.getId());
+    }
+    return digest.toString();
+  }
+
+  @Override public String toString() {
+    if (null != digest) {
+      return digest;
+    }
+    StringBuilder sb = new StringBuilder();
+    sb.append(rel.getRelTypeName());
+
+    for (RelTrait trait : rel.getTraitSet()) {
+      sb.append('.');
+      sb.append(trait.toString());
+    }
+
+    sb.append('(');
+    int j = 0;
+    for (Pair<String, Object> item : items) {
+      if (j++ > 0) {
+        sb.append(',');
+      }
+      sb.append(item.left);
+      sb.append('=');
+      sb.append(item.right);
+    }
+    sb.append(')');
+    digest = sb.toString();
+    return digest;
+  }
+
+  @Override public int compareTo(Digest other) {
+    return this.rel.getId() - other.rel.getId();

Review comment:
       why is Comparable needed for debugging? Right now, I don't see how debuggability can justify breaking the contract




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] danny0405 commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
danny0405 commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r442609677



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,256 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+import javax.annotation.Nonnull;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  private final int hashCode;
+  private final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>Currently, returns composition of class name and id.
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    return rel.getRelTypeName() + '#' + rel.getId();
+  }
+
+  @Override public String toString() {
+    if (null != digest) {
+      return digest;
+    }
+    StringBuilder sb = new StringBuilder();
+    sb.append(rel.getRelTypeName());
+
+    for (RelTrait trait : rel.getTraitSet()) {
+      sb.append('.');
+      sb.append(trait.toString());
+    }
+
+    sb.append('(');
+    int j = 0;
+    for (Pair<String, Object> item : items) {
+      if (j++ > 0) {
+        sb.append(',');
+      }
+      sb.append(item.left);
+      sb.append('=');
+      sb.append(item.right);
+    }
+    sb.append(')');
+    digest = sb.toString();
+    return digest;
+  }
+
+  @Override public int compareTo(@Nonnull Digest other) {
+    return this.equals(other) ? 0 : this.rel.getId() - other.rel.getId();
+  }
+
+  @Override public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+    Digest that = (Digest) o;
+    return hashCode == that.hashCode && deepEquals(that);
+  }
+
+  /**
+   * The method is used to resolve hash conflict, in current 6000+ tests, there are about 8
+   * tests with conflict, so we do not cache the hash code items in order to
+   * reduce mem consumption.
+   */
+  private boolean deepEquals(Digest other) {
+    Object[] thisItems = collect(this.rel, this.items, true);
+    Object[] thatItems = collect(other.rel, other.items, true);
+    if (thisItems.length != thatItems.length) {
+      return false;
+    }
+    for (int i = 0; i < thisItems.length; i++) {
+      if (!Objects.equals(thisItems[i], thatItems[i])) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  @Override public int hashCode() {
+    return hashCode;
+  }
+
+  /**
+   * Creates a digest with given rel and properties.
+   */
+  public static Digest create(RelNode rel, List<Pair<String, Object>> contents) {
+    return new Digest(rel, contents);
+  }
+
+  /**
+   * Creates a digest with given rel.
+   */
+  public static Digest create(RelNode rel) {
+    return new Digest(rel);
+  }
+
+  /**
+   * Creates a digest with given rel and string format digest
+   */
+  public static Digest create(RelNode rel, String digest) {
+    return new Digest(rel, digest);
+  }
+
+  /**
+   * Instantiates a digest with solid string format digest, this digest should only
+   * be used as a initial.
+   */
+  public static Digest initial(RelNode rel) {
+    return new Digest(rel, simpleRelDigest(rel));
+  }
+}

Review comment:
       To distinguish that the digest is just as a initial which should only be used once in the constructor.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] vlsi commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
vlsi commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r438730172



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,245 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents));
+  }
+
+  private static Object[] collect(RelNode rel, List<Pair<String, Object>> contents) {

Review comment:
       `equals` is always executed (== in case `hashCode` matches), and equals would be called multiple times in case of a hash collision, so it is worth making both equals and hashCode fast.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] laurentgo commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
laurentgo commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r439693715



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>There are three kinds of nodes we need to handle:
+   *
+   * <ul>
+   * <li>RelSubset: composition of class name, set id and traits;</li>
+   * <li>HepRelVertex: composition of class name and current rel id;</li>
+   * <li>Normal rel: composition of class name and id.</li>
+   * </ul>
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    StringBuilder digest = new StringBuilder(rel.getRelTypeName());
+    digest.append('#');
+    if (rel instanceof RelSubset) {
+      RelSubset subset = (RelSubset) rel;
+      digest.append(subset.getSetId());
+      for (RelTrait trait : subset.getTraitSet()) {
+        digest.append('.').append(trait);
+      }
+    } else if (rel instanceof HepRelVertex) {
+      digest.append(((HepRelVertex) rel).getCurrentRel().getId());
+    } else {
+      digest.append(rel.getId());
+    }
+    return digest.toString();
+  }
+
+  @Override public String toString() {
+    if (null != digest) {
+      return digest;
+    }
+    StringBuilder sb = new StringBuilder();
+    sb.append(rel.getRelTypeName());
+
+    for (RelTrait trait : rel.getTraitSet()) {
+      sb.append('.');
+      sb.append(trait.toString());
+    }
+
+    sb.append('(');
+    int j = 0;
+    for (Pair<String, Object> item : items) {
+      if (j++ > 0) {
+        sb.append(',');
+      }
+      sb.append(item.left);
+      sb.append('=');
+      sb.append(item.right);
+    }
+    sb.append(')');
+    digest = sb.toString();
+    return digest;
+  }
+
+  @Override public int compareTo(Digest other) {
+    return this.rel.getId() - other.rel.getId();
+  }
+
+  @Override public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+    Digest that = (Digest) o;
+    return hashCode == that.hashCode && deepEquals(that);
+  }
+
+  /**
+   * The method is used to resolve hash conflict, in current 6000+ tests, there are about 8
+   * tests with conflict, so we do not cache the hash code items in order to
+   * reduce mem consumption.
+   */
+  private boolean deepEquals(Digest other) {
+    Object[] thisItems = collect(this.rel, this.items, true);
+    Object[] thatItems = collect(other.rel, other.items, true);
+    if (thisItems.length != thatItems.length) {

Review comment:
       are you telling that 9 lines are simpler that one line of code which is `return Arrays.equals(thisItems, thatItems)`?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] laurentgo commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
laurentgo commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r439507991



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>There are three kinds of nodes we need to handle:
+   *
+   * <ul>
+   * <li>RelSubset: composition of class name, set id and traits;</li>
+   * <li>HepRelVertex: composition of class name and current rel id;</li>
+   * <li>Normal rel: composition of class name and id.</li>
+   * </ul>
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    StringBuilder digest = new StringBuilder(rel.getRelTypeName());
+    digest.append('#');
+    if (rel instanceof RelSubset) {
+      RelSubset subset = (RelSubset) rel;
+      digest.append(subset.getSetId());
+      for (RelTrait trait : subset.getTraitSet()) {
+        digest.append('.').append(trait);
+      }
+    } else if (rel instanceof HepRelVertex) {
+      digest.append(((HepRelVertex) rel).getCurrentRel().getId());
+    } else {
+      digest.append(rel.getId());
+    }
+    return digest.toString();
+  }
+
+  @Override public String toString() {
+    if (null != digest) {
+      return digest;
+    }
+    StringBuilder sb = new StringBuilder();
+    sb.append(rel.getRelTypeName());
+
+    for (RelTrait trait : rel.getTraitSet()) {
+      sb.append('.');
+      sb.append(trait.toString());
+    }
+
+    sb.append('(');
+    int j = 0;
+    for (Pair<String, Object> item : items) {
+      if (j++ > 0) {
+        sb.append(',');
+      }
+      sb.append(item.left);
+      sb.append('=');
+      sb.append(item.right);
+    }
+    sb.append(')');
+    digest = sb.toString();
+    return digest;
+  }
+
+  @Override public int compareTo(Digest other) {
+    return this.rel.getId() - other.rel.getId();
+  }
+
+  @Override public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+    Digest that = (Digest) o;
+    return hashCode == that.hashCode && deepEquals(that);
+  }
+
+  /**
+   * The method is used to resolve hash conflict, in current 6000+ tests, there are about 8
+   * tests with conflict, so we do not cache the hash code items in order to
+   * reduce mem consumption.
+   */
+  private boolean deepEquals(Digest other) {
+    Object[] thisItems = collect(this.rel, this.items, true);
+    Object[] thatItems = collect(other.rel, other.items, true);
+    if (thisItems.length != thatItems.length) {
+      return false;
+    }
+    for (int i = 0; i < thisItems.length; i++) {
+      if (!Objects.equals(thisItems[i], thatItems[i])) {

Review comment:
       should Arrays.deepEquals be used if `thisItems[i]` is an array?

##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>There are three kinds of nodes we need to handle:
+   *
+   * <ul>
+   * <li>RelSubset: composition of class name, set id and traits;</li>
+   * <li>HepRelVertex: composition of class name and current rel id;</li>
+   * <li>Normal rel: composition of class name and id.</li>
+   * </ul>
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    StringBuilder digest = new StringBuilder(rel.getRelTypeName());
+    digest.append('#');
+    if (rel instanceof RelSubset) {
+      RelSubset subset = (RelSubset) rel;
+      digest.append(subset.getSetId());
+      for (RelTrait trait : subset.getTraitSet()) {
+        digest.append('.').append(trait);
+      }
+    } else if (rel instanceof HepRelVertex) {
+      digest.append(((HepRelVertex) rel).getCurrentRel().getId());
+    } else {
+      digest.append(rel.getId());
+    }
+    return digest.toString();
+  }
+
+  @Override public String toString() {
+    if (null != digest) {
+      return digest;
+    }
+    StringBuilder sb = new StringBuilder();
+    sb.append(rel.getRelTypeName());
+
+    for (RelTrait trait : rel.getTraitSet()) {
+      sb.append('.');
+      sb.append(trait.toString());
+    }
+
+    sb.append('(');
+    int j = 0;
+    for (Pair<String, Object> item : items) {
+      if (j++ > 0) {
+        sb.append(',');
+      }
+      sb.append(item.left);
+      sb.append('=');
+      sb.append(item.right);
+    }
+    sb.append(')');
+    digest = sb.toString();
+    return digest;
+  }
+
+  @Override public int compareTo(Digest other) {
+    return this.rel.getId() - other.rel.getId();

Review comment:
       This breaks the contract that if 2 objects are equal, then the method should return 0.

##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>There are three kinds of nodes we need to handle:
+   *
+   * <ul>
+   * <li>RelSubset: composition of class name, set id and traits;</li>
+   * <li>HepRelVertex: composition of class name and current rel id;</li>
+   * <li>Normal rel: composition of class name and id.</li>
+   * </ul>
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    StringBuilder digest = new StringBuilder(rel.getRelTypeName());
+    digest.append('#');
+    if (rel instanceof RelSubset) {
+      RelSubset subset = (RelSubset) rel;
+      digest.append(subset.getSetId());
+      for (RelTrait trait : subset.getTraitSet()) {
+        digest.append('.').append(trait);
+      }
+    } else if (rel instanceof HepRelVertex) {
+      digest.append(((HepRelVertex) rel).getCurrentRel().getId());
+    } else {
+      digest.append(rel.getId());
+    }
+    return digest.toString();
+  }
+
+  @Override public String toString() {
+    if (null != digest) {
+      return digest;
+    }
+    StringBuilder sb = new StringBuilder();
+    sb.append(rel.getRelTypeName());
+
+    for (RelTrait trait : rel.getTraitSet()) {
+      sb.append('.');
+      sb.append(trait.toString());
+    }
+
+    sb.append('(');
+    int j = 0;
+    for (Pair<String, Object> item : items) {
+      if (j++ > 0) {
+        sb.append(',');
+      }
+      sb.append(item.left);
+      sb.append('=');
+      sb.append(item.right);
+    }
+    sb.append(')');
+    digest = sb.toString();
+    return digest;
+  }
+
+  @Override public int compareTo(Digest other) {
+    return this.rel.getId() - other.rel.getId();
+  }
+
+  @Override public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+    Digest that = (Digest) o;
+    return hashCode == that.hashCode && deepEquals(that);
+  }
+
+  /**
+   * The method is used to resolve hash conflict, in current 6000+ tests, there are about 8
+   * tests with conflict, so we do not cache the hash code items in order to
+   * reduce mem consumption.
+   */
+  private boolean deepEquals(Digest other) {
+    Object[] thisItems = collect(this.rel, this.items, true);
+    Object[] thatItems = collect(other.rel, other.items, true);
+    if (thisItems.length != thatItems.length) {

Review comment:
       The whole check + loop could be replaced with `Arrays.equals(Object[], Object[])`

##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>There are three kinds of nodes we need to handle:
+   *
+   * <ul>
+   * <li>RelSubset: composition of class name, set id and traits;</li>
+   * <li>HepRelVertex: composition of class name and current rel id;</li>
+   * <li>Normal rel: composition of class name and id.</li>
+   * </ul>
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    StringBuilder digest = new StringBuilder(rel.getRelTypeName());
+    digest.append('#');
+    if (rel instanceof RelSubset) {

Review comment:
       Why not let the `rel` node itself handle how to create the digest vs having a non-extendable/overridable method using different behaviors based on the type of `rel` to do it? This looks like an anti design pattern.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] laurentgo commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
laurentgo commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r440573361



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>There are three kinds of nodes we need to handle:
+   *
+   * <ul>
+   * <li>RelSubset: composition of class name, set id and traits;</li>
+   * <li>HepRelVertex: composition of class name and current rel id;</li>
+   * <li>Normal rel: composition of class name and id.</li>
+   * </ul>
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    StringBuilder digest = new StringBuilder(rel.getRelTypeName());
+    digest.append('#');
+    if (rel instanceof RelSubset) {

Review comment:
       why not using the `#computeDigest` with the vertex id then?
   
   Also it is not possible to "substract" to the default digest. One can only add. The patch you proposed has some behavior which is not possible to change




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] laurentgo commented on pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
laurentgo commented on pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#issuecomment-644520413


   Regarding my concern about memory usage, I was talking about `Digest` and the extra list used to store `items` which is kind of redundant with the `RelNode` itself. An alternative could have to replace the list of extra fields with an `Iterable` and call it each time items need to be collected? It could also possibly eliminate the creation of an array to do a comparison between objects...


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] danny0405 commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
danny0405 commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r442610070



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,256 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+import javax.annotation.Nonnull;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  private final int hashCode;
+  private final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>Currently, returns composition of class name and id.
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    return rel.getRelTypeName() + '#' + rel.getId();
+  }
+
+  @Override public String toString() {
+    if (null != digest) {
+      return digest;
+    }
+    StringBuilder sb = new StringBuilder();
+    sb.append(rel.getRelTypeName());
+
+    for (RelTrait trait : rel.getTraitSet()) {
+      sb.append('.');
+      sb.append(trait.toString());
+    }
+

Review comment:
       The properties do not include traits, the Digest does.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] chunweilei commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
chunweilei commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r442605829



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,256 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+import javax.annotation.Nonnull;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  private final int hashCode;
+  private final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>Currently, returns composition of class name and id.
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    return rel.getRelTypeName() + '#' + rel.getId();
+  }
+
+  @Override public String toString() {
+    if (null != digest) {
+      return digest;
+    }
+    StringBuilder sb = new StringBuilder();
+    sb.append(rel.getRelTypeName());
+
+    for (RelTrait trait : rel.getTraitSet()) {
+      sb.append('.');
+      sb.append(trait.toString());
+    }
+
+    sb.append('(');
+    int j = 0;
+    for (Pair<String, Object> item : items) {
+      if (j++ > 0) {
+        sb.append(',');
+      }
+      sb.append(item.left);
+      sb.append('=');
+      sb.append(item.right);
+    }
+    sb.append(')');
+    digest = sb.toString();
+    return digest;
+  }
+
+  @Override public int compareTo(@Nonnull Digest other) {
+    return this.equals(other) ? 0 : this.rel.getId() - other.rel.getId();
+  }
+
+  @Override public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+    Digest that = (Digest) o;
+    return hashCode == that.hashCode && deepEquals(that);
+  }
+
+  /**
+   * The method is used to resolve hash conflict, in current 6000+ tests, there are about 8
+   * tests with conflict, so we do not cache the hash code items in order to
+   * reduce mem consumption.
+   */
+  private boolean deepEquals(Digest other) {
+    Object[] thisItems = collect(this.rel, this.items, true);
+    Object[] thatItems = collect(other.rel, other.items, true);
+    if (thisItems.length != thatItems.length) {
+      return false;
+    }
+    for (int i = 0; i < thisItems.length; i++) {
+      if (!Objects.equals(thisItems[i], thatItems[i])) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  @Override public int hashCode() {
+    return hashCode;
+  }
+
+  /**
+   * Creates a digest with given rel and properties.
+   */
+  public static Digest create(RelNode rel, List<Pair<String, Object>> contents) {
+    return new Digest(rel, contents);
+  }
+
+  /**
+   * Creates a digest with given rel.
+   */
+  public static Digest create(RelNode rel) {
+    return new Digest(rel);
+  }
+
+  /**
+   * Creates a digest with given rel and string format digest
+   */
+  public static Digest create(RelNode rel, String digest) {
+    return new Digest(rel, digest);
+  }
+
+  /**
+   * Instantiates a digest with solid string format digest, this digest should only
+   * be used as a initial.
+   */
+  public static Digest initial(RelNode rel) {
+    return new Digest(rel, simpleRelDigest(rel));
+  }
+}

Review comment:
       What's the difference between `Digest#create` and `Digest#initial` ? Their results seem the same.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] danny0405 commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
danny0405 commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r442112660



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>There are three kinds of nodes we need to handle:
+   *
+   * <ul>
+   * <li>RelSubset: composition of class name, set id and traits;</li>
+   * <li>HepRelVertex: composition of class name and current rel id;</li>
+   * <li>Normal rel: composition of class name and id.</li>
+   * </ul>
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    StringBuilder digest = new StringBuilder(rel.getRelTypeName());
+    digest.append('#');
+    if (rel instanceof RelSubset) {

Review comment:
       Use the digest string to compute hashcode works well then the string is short, you can use the benchmark https://github.com/apache/calcite/pull/2034 to see the diff. The performance diff is negligible.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] danny0405 commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
danny0405 commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r440675634



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>There are three kinds of nodes we need to handle:
+   *
+   * <ul>
+   * <li>RelSubset: composition of class name, set id and traits;</li>
+   * <li>HepRelVertex: composition of class name and current rel id;</li>
+   * <li>Normal rel: composition of class name and id.</li>
+   * </ul>
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    StringBuilder digest = new StringBuilder(rel.getRelTypeName());
+    digest.append('#');
+    if (rel instanceof RelSubset) {
+      RelSubset subset = (RelSubset) rel;
+      digest.append(subset.getSetId());
+      for (RelTrait trait : subset.getTraitSet()) {
+        digest.append('.').append(trait);
+      }
+    } else if (rel instanceof HepRelVertex) {
+      digest.append(((HepRelVertex) rel).getCurrentRel().getId());
+    } else {
+      digest.append(rel.getId());
+    }
+    return digest.toString();
+  }
+
+  @Override public String toString() {
+    if (null != digest) {
+      return digest;
+    }
+    StringBuilder sb = new StringBuilder();
+    sb.append(rel.getRelTypeName());
+
+    for (RelTrait trait : rel.getTraitSet()) {
+      sb.append('.');
+      sb.append(trait.toString());
+    }
+
+    sb.append('(');
+    int j = 0;
+    for (Pair<String, Object> item : items) {
+      if (j++ > 0) {
+        sb.append(',');
+      }
+      sb.append(item.left);
+      sb.append('=');
+      sb.append(item.right);
+    }
+    sb.append(')');
+    digest = sb.toString();
+    return digest;
+  }
+
+  @Override public int compareTo(Digest other) {
+    return this.rel.getId() - other.rel.getId();
+  }
+
+  @Override public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+    Digest that = (Digest) o;
+    return hashCode == that.hashCode && deepEquals(that);
+  }
+
+  /**
+   * The method is used to resolve hash conflict, in current 6000+ tests, there are about 8
+   * tests with conflict, so we do not cache the hash code items in order to
+   * reduce mem consumption.
+   */
+  private boolean deepEquals(Digest other) {
+    Object[] thisItems = collect(this.rel, this.items, true);
+    Object[] thatItems = collect(other.rel, other.items, true);
+    if (thisItems.length != thatItems.length) {

Review comment:
       It is often but not always, if you are not sure also, let's keep it as it is, which is also straight-forward code.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] danny0405 commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
danny0405 commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r438734086



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,245 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents));
+  }
+
+  private static Object[] collect(RelNode rel, List<Pair<String, Object>> contents) {

Review comment:
       I run all the cases and there are only 8 tests that have hash conflict, i would try to promote.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] danny0405 commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
danny0405 commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r439897636



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>There are three kinds of nodes we need to handle:
+   *
+   * <ul>
+   * <li>RelSubset: composition of class name, set id and traits;</li>
+   * <li>HepRelVertex: composition of class name and current rel id;</li>
+   * <li>Normal rel: composition of class name and id.</li>
+   * </ul>
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    StringBuilder digest = new StringBuilder(rel.getRelTypeName());
+    digest.append('#');
+    if (rel instanceof RelSubset) {

Review comment:
       > RelNode subclass in my project which has a very natural/efficient way of creating a digest, I would have no choice than to fork the Calcite project to modify that method
   Did you notice that the `#computeDigest` and `#explainTerms` are both `API` for overriding the digest ?
   
   `HepRelVertex` is not a public API, i don't think there is necessity to customize its digest, if you have a sub-class, override the `#computeDigest` is till feasible.

##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>There are three kinds of nodes we need to handle:
+   *
+   * <ul>
+   * <li>RelSubset: composition of class name, set id and traits;</li>
+   * <li>HepRelVertex: composition of class name and current rel id;</li>
+   * <li>Normal rel: composition of class name and id.</li>
+   * </ul>
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    StringBuilder digest = new StringBuilder(rel.getRelTypeName());
+    digest.append('#');
+    if (rel instanceof RelSubset) {

Review comment:
       > RelNode subclass in my project which has a very natural/efficient way of creating a digest, I would have no choice than to fork the Calcite project to modify that method
   
   Did you notice that the `#computeDigest` and `#explainTerms` are both `API` for overriding the digest ?
   
   `HepRelVertex` is not a public API, i don't think there is necessity to customize its digest, if you have a sub-class, override the `#computeDigest` is till feasible.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] laurentgo commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
laurentgo commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r441027147



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>There are three kinds of nodes we need to handle:
+   *
+   * <ul>
+   * <li>RelSubset: composition of class name, set id and traits;</li>
+   * <li>HepRelVertex: composition of class name and current rel id;</li>
+   * <li>Normal rel: composition of class name and id.</li>
+   * </ul>
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    StringBuilder digest = new StringBuilder(rel.getRelTypeName());
+    digest.append('#');
+    if (rel instanceof RelSubset) {

Review comment:
       I guess I missed some update to your change (or missed the second constructor), but it looks like the choice is either:
   - provide rel + a list of extra items, and the logic to create the hashcode + digest is totally controlled by the digest class
   - provide rel + digest string to skip the whole hash computation logic
   
   I guess nothing in-between...
   
   > The digest of HepRelVertex and RelSubSet are the same as before, what do you mean by using the #computeDigest with the vertex id then ?
   It's back to my comment about having the logic hardcoded into a single private static method which goes against OOP practices whereas instructing `HelpRexVertex` and `RelSubSet` instructing digest that the only field to consider for digest should be `id`




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] chunweilei commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
chunweilei commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r442609980



##########
File path: core/src/main/java/org/apache/calcite/rel/core/Window.java
##########
@@ -394,16 +394,24 @@ public RexWinAggCall(
       this.ignoreNulls = ignoreNulls;
     }
 
-    /** {@inheritDoc}
-     *
-     * <p>Override {@link RexCall}, defining equality based on identity.
-     */
-    @Override public boolean equals(Object obj) {
-      return this == obj;
+    @Override public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (o == null || getClass() != o.getClass()) {
+        return false;
+      }
+      if (!super.equals(o)) {
+        return false;
+      }
+      RexWinAggCall that = (RexWinAggCall) o;
+      return ordinal == that.ordinal
+          && distinct == that.distinct
+          && ignoreNulls == that.ignoreNulls;
     }
 
     @Override public int hashCode() {
-      return Objects.hash(digest, ordinal, distinct);
+      return Objects.hash(super.hashCode(), ordinal, distinct, ignoreNulls);
     }

Review comment:
       Nice catch.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] danny0405 commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
danny0405 commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r442616115



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,256 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+import javax.annotation.Nonnull;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  private final int hashCode;
+  private final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>Currently, returns composition of class name and id.
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    return rel.getRelTypeName() + '#' + rel.getId();
+  }
+
+  @Override public String toString() {
+    if (null != digest) {
+      return digest;
+    }
+    StringBuilder sb = new StringBuilder();
+    sb.append(rel.getRelTypeName());
+
+    for (RelTrait trait : rel.getTraitSet()) {
+      sb.append('.');
+      sb.append(trait.toString());
+    }
+
+    sb.append('(');
+    int j = 0;
+    for (Pair<String, Object> item : items) {
+      if (j++ > 0) {
+        sb.append(',');
+      }
+      sb.append(item.left);
+      sb.append('=');
+      sb.append(item.right);
+    }
+    sb.append(')');
+    digest = sb.toString();
+    return digest;
+  }
+
+  @Override public int compareTo(@Nonnull Digest other) {
+    return this.equals(other) ? 0 : this.rel.getId() - other.rel.getId();
+  }
+
+  @Override public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+    Digest that = (Digest) o;
+    return hashCode == that.hashCode && deepEquals(that);
+  }
+
+  /**
+   * The method is used to resolve hash conflict, in current 6000+ tests, there are about 8
+   * tests with conflict, so we do not cache the hash code items in order to
+   * reduce mem consumption.
+   */
+  private boolean deepEquals(Digest other) {
+    Object[] thisItems = collect(this.rel, this.items, true);
+    Object[] thatItems = collect(other.rel, other.items, true);
+    if (thisItems.length != thatItems.length) {
+      return false;
+    }
+    for (int i = 0; i < thisItems.length; i++) {
+      if (!Objects.equals(thisItems[i], thatItems[i])) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  @Override public int hashCode() {
+    return hashCode;
+  }
+
+  /**
+   * Creates a digest with given rel and properties.
+   */
+  public static Digest create(RelNode rel, List<Pair<String, Object>> contents) {
+    return new Digest(rel, contents);
+  }
+
+  /**
+   * Creates a digest with given rel.
+   */
+  public static Digest create(RelNode rel) {
+    return new Digest(rel);
+  }
+
+  /**
+   * Creates a digest with given rel and string format digest
+   */
+  public static Digest create(RelNode rel, String digest) {
+    return new Digest(rel, digest);
+  }
+
+  /**
+   * Instantiates a digest with solid string format digest, this digest should only
+   * be used as a initial.
+   */
+  public static Digest initial(RelNode rel) {
+    return new Digest(rel, simpleRelDigest(rel));
+  }
+}

Review comment:
       I can remove the `#initial` and we use the `#create` uniformly.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] chunweilei commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
chunweilei commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r442598141



##########
File path: core/src/main/java/org/apache/calcite/plan/hep/HepRelVertex.java
##########
@@ -75,8 +76,8 @@
     return currentRel.getRowType();
   }
 
-  @Override protected String computeDigest() {
-    return "HepRelVertex(" + currentRel + ")";
+  @Override protected Digest computeDigest() {
+    return Digest.create(this, getRelTypeName() + '#' + getCurrentRel().getId());
   }
 

Review comment:
       Why do you use `getRelTypeName()` rather than "HepRelVertex"?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] danny0405 edited a comment on pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
danny0405 edited a comment on pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#issuecomment-645923910


   > Regarding my concern about memory usage, I was talking about `Digest` and the extra list used to store `items` which is kind of redundant with the `RelNode` itself. An alternative could have to replace the list of extra fields with an `Iterable` and call it each time items need to be collected? It could also possibly eliminate the creation of an array to do a comparison between objects...
   
   What do you mean by extra fields ? If possible, can you give a POC code there, the discussion with all kinds of assumptions make me burden out.
   
   And if possible, use the code here https://github.com/apache/calcite/pull/2034 to illustrate that you are right.


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] danny0405 commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
danny0405 commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r440678152



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>There are three kinds of nodes we need to handle:
+   *
+   * <ul>
+   * <li>RelSubset: composition of class name, set id and traits;</li>
+   * <li>HepRelVertex: composition of class name and current rel id;</li>
+   * <li>Normal rel: composition of class name and id.</li>
+   * </ul>
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    StringBuilder digest = new StringBuilder(rel.getRelTypeName());
+    digest.append('#');
+    if (rel instanceof RelSubset) {
+      RelSubset subset = (RelSubset) rel;
+      digest.append(subset.getSetId());
+      for (RelTrait trait : subset.getTraitSet()) {
+        digest.append('.').append(trait);
+      }
+    } else if (rel instanceof HepRelVertex) {
+      digest.append(((HepRelVertex) rel).getCurrentRel().getId());
+    } else {
+      digest.append(rel.getId());
+    }
+    return digest.toString();
+  }
+
+  @Override public String toString() {
+    if (null != digest) {
+      return digest;
+    }
+    StringBuilder sb = new StringBuilder();
+    sb.append(rel.getRelTypeName());
+
+    for (RelTrait trait : rel.getTraitSet()) {
+      sb.append('.');
+      sb.append(trait.toString());
+    }
+
+    sb.append('(');
+    int j = 0;
+    for (Pair<String, Object> item : items) {
+      if (j++ > 0) {
+        sb.append(',');
+      }
+      sb.append(item.left);
+      sb.append('=');
+      sb.append(item.right);
+    }
+    sb.append(')');
+    digest = sb.toString();
+    return digest;
+  }
+
+  @Override public int compareTo(Digest other) {
+    return this.rel.getId() - other.rel.getId();
+  }
+
+  @Override public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+    Digest that = (Digest) o;
+    return hashCode == that.hashCode && deepEquals(that);
+  }
+
+  /**
+   * The method is used to resolve hash conflict, in current 6000+ tests, there are about 8
+   * tests with conflict, so we do not cache the hash code items in order to
+   * reduce mem consumption.
+   */
+  private boolean deepEquals(Digest other) {
+    Object[] thisItems = collect(this.rel, this.items, true);
+    Object[] thatItems = collect(other.rel, other.items, true);
+    if (thisItems.length != thatItems.length) {
+      return false;
+    }
+    for (int i = 0; i < thisItems.length; i++) {
+      if (!Objects.equals(thisItems[i], thatItems[i])) {

Review comment:
       I said outputs false does not affect the semantics, then what's the problem there, i do not want to make the logic a mess just for a non-sure assumption. 
   
   If you have strong opinion, please give specific cases, only assumption makes no sense to me.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] chunweilei commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
chunweilei commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r442610541



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,256 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+import javax.annotation.Nonnull;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  private final int hashCode;
+  private final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>Currently, returns composition of class name and id.
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    return rel.getRelTypeName() + '#' + rel.getId();
+  }
+
+  @Override public String toString() {
+    if (null != digest) {
+      return digest;
+    }
+    StringBuilder sb = new StringBuilder();
+    sb.append(rel.getRelTypeName());
+
+    for (RelTrait trait : rel.getTraitSet()) {
+      sb.append('.');
+      sb.append(trait.toString());
+    }
+

Review comment:
       Please correct the comment.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] danny0405 commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
danny0405 commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r442608886



##########
File path: core/src/main/java/org/apache/calcite/plan/hep/HepRelVertex.java
##########
@@ -75,8 +76,8 @@
     return currentRel.getRowType();
   }
 
-  @Override protected String computeDigest() {
-    return "HepRelVertex(" + currentRel + ")";
+  @Override protected Digest computeDigest() {
+    return Digest.create(this, getRelTypeName() + '#' + getCurrentRel().getId());
   }
 

Review comment:
       To unify the logic, we can switch to "HepRelVertex", that's a preference.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] danny0405 commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
danny0405 commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r442609306



##########
File path: core/src/test/java/org/apache/calcite/test/enumerable/EnumerableCorrelateTest.java
##########
@@ -96,11 +96,11 @@
           planner.removeRule(EnumerableRules.ENUMERABLE_MERGE_JOIN_RULE);
         })
         .explainContains(""
-            + "EnumerableCalc(expr#0..3=[{inputs}], empid=[$t1], name=[$t3])\n"
+            + "PLAN=EnumerableCalc(expr#0..3=[{inputs}], empid=[$t1], name=[$t3])\n"
             + "  EnumerableCorrelate(correlation=[$cor1], joinType=[inner], requiredColumns=[{0}])\n"
             + "    EnumerableAggregate(group=[{0}])\n"
             + "      EnumerableTableScan(table=[[s, depts]])\n"

Review comment:
       I think we could, thanks ~




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] danny0405 commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
danny0405 commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r439692622



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>There are three kinds of nodes we need to handle:
+   *
+   * <ul>
+   * <li>RelSubset: composition of class name, set id and traits;</li>
+   * <li>HepRelVertex: composition of class name and current rel id;</li>
+   * <li>Normal rel: composition of class name and id.</li>
+   * </ul>
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    StringBuilder digest = new StringBuilder(rel.getRelTypeName());
+    digest.append('#');
+    if (rel instanceof RelSubset) {
+      RelSubset subset = (RelSubset) rel;
+      digest.append(subset.getSetId());
+      for (RelTrait trait : subset.getTraitSet()) {
+        digest.append('.').append(trait);
+      }
+    } else if (rel instanceof HepRelVertex) {
+      digest.append(((HepRelVertex) rel).getCurrentRel().getId());
+    } else {
+      digest.append(rel.getId());
+    }
+    return digest.toString();
+  }
+
+  @Override public String toString() {
+    if (null != digest) {
+      return digest;
+    }
+    StringBuilder sb = new StringBuilder();
+    sb.append(rel.getRelTypeName());
+
+    for (RelTrait trait : rel.getTraitSet()) {
+      sb.append('.');
+      sb.append(trait.toString());
+    }
+
+    sb.append('(');
+    int j = 0;
+    for (Pair<String, Object> item : items) {
+      if (j++ > 0) {
+        sb.append(',');
+      }
+      sb.append(item.left);
+      sb.append('=');
+      sb.append(item.right);
+    }
+    sb.append(')');
+    digest = sb.toString();
+    return digest;
+  }
+
+  @Override public int compareTo(Digest other) {
+    return this.rel.getId() - other.rel.getId();
+  }
+
+  @Override public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+    Digest that = (Digest) o;
+    return hashCode == that.hashCode && deepEquals(that);
+  }
+
+  /**
+   * The method is used to resolve hash conflict, in current 6000+ tests, there are about 8
+   * tests with conflict, so we do not cache the hash code items in order to
+   * reduce mem consumption.
+   */
+  private boolean deepEquals(Digest other) {
+    Object[] thisItems = collect(this.rel, this.items, true);
+    Object[] thatItems = collect(other.rel, other.items, true);
+    if (thisItems.length != thatItems.length) {

Review comment:
       Sorry, i would choose current code because it's simpler.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] laurentgo commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
laurentgo commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r440999147



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>There are three kinds of nodes we need to handle:
+   *
+   * <ul>
+   * <li>RelSubset: composition of class name, set id and traits;</li>
+   * <li>HepRelVertex: composition of class name and current rel id;</li>
+   * <li>Normal rel: composition of class name and id.</li>
+   * </ul>
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    StringBuilder digest = new StringBuilder(rel.getRelTypeName());
+    digest.append('#');
+    if (rel instanceof RelSubset) {
+      RelSubset subset = (RelSubset) rel;
+      digest.append(subset.getSetId());
+      for (RelTrait trait : subset.getTraitSet()) {
+        digest.append('.').append(trait);
+      }
+    } else if (rel instanceof HepRelVertex) {
+      digest.append(((HepRelVertex) rel).getCurrentRel().getId());
+    } else {
+      digest.append(rel.getId());
+    }
+    return digest.toString();
+  }
+
+  @Override public String toString() {
+    if (null != digest) {
+      return digest;
+    }
+    StringBuilder sb = new StringBuilder();
+    sb.append(rel.getRelTypeName());
+
+    for (RelTrait trait : rel.getTraitSet()) {
+      sb.append('.');
+      sb.append(trait.toString());
+    }
+
+    sb.append('(');
+    int j = 0;
+    for (Pair<String, Object> item : items) {
+      if (j++ > 0) {
+        sb.append(',');
+      }
+      sb.append(item.left);
+      sb.append('=');
+      sb.append(item.right);
+    }
+    sb.append(')');
+    digest = sb.toString();
+    return digest;
+  }
+
+  @Override public int compareTo(Digest other) {
+    return this.rel.getId() - other.rel.getId();

Review comment:
       Maybe it would have been simpler not to implement `Comparable`? You didn't clarify why this is needed for debugging purposes...




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] danny0405 commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
danny0405 commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r442113348



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>There are three kinds of nodes we need to handle:
+   *
+   * <ul>
+   * <li>RelSubset: composition of class name, set id and traits;</li>
+   * <li>HepRelVertex: composition of class name and current rel id;</li>
+   * <li>Normal rel: composition of class name and id.</li>
+   * </ul>
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    StringBuilder digest = new StringBuilder(rel.getRelTypeName());
+    digest.append('#');
+    if (rel instanceof RelSubset) {
+      RelSubset subset = (RelSubset) rel;
+      digest.append(subset.getSetId());
+      for (RelTrait trait : subset.getTraitSet()) {
+        digest.append('.').append(trait);
+      }
+    } else if (rel instanceof HepRelVertex) {
+      digest.append(((HepRelVertex) rel).getCurrentRel().getId());
+    } else {
+      digest.append(rel.getId());
+    }
+    return digest.toString();
+  }
+
+  @Override public String toString() {
+    if (null != digest) {
+      return digest;
+    }
+    StringBuilder sb = new StringBuilder();
+    sb.append(rel.getRelTypeName());
+
+    for (RelTrait trait : rel.getTraitSet()) {
+      sb.append('.');
+      sb.append(trait.toString());
+    }
+
+    sb.append('(');
+    int j = 0;
+    for (Pair<String, Object> item : items) {
+      if (j++ > 0) {
+        sb.append(',');
+      }
+      sb.append(item.left);
+      sb.append('=');
+      sb.append(item.right);
+    }
+    sb.append(')');
+    digest = sb.toString();
+    return digest;
+  }
+
+  @Override public int compareTo(Digest other) {
+    return this.rel.getId() - other.rel.getId();

Review comment:
       That was proposed initially by Julian in the issue, i think it is needed when we want to sort on that.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] danny0405 merged pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
danny0405 merged pull request #2016:
URL: https://github.com/apache/calcite/pull/2016


   


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] laurentgo commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
laurentgo commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r441022602



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>There are three kinds of nodes we need to handle:
+   *
+   * <ul>
+   * <li>RelSubset: composition of class name, set id and traits;</li>
+   * <li>HepRelVertex: composition of class name and current rel id;</li>
+   * <li>Normal rel: composition of class name and id.</li>
+   * </ul>
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    StringBuilder digest = new StringBuilder(rel.getRelTypeName());
+    digest.append('#');
+    if (rel instanceof RelSubset) {
+      RelSubset subset = (RelSubset) rel;
+      digest.append(subset.getSetId());
+      for (RelTrait trait : subset.getTraitSet()) {
+        digest.append('.').append(trait);
+      }
+    } else if (rel instanceof HepRelVertex) {
+      digest.append(((HepRelVertex) rel).getCurrentRel().getId());
+    } else {
+      digest.append(rel.getId());
+    }
+    return digest.toString();
+  }
+
+  @Override public String toString() {
+    if (null != digest) {
+      return digest;
+    }
+    StringBuilder sb = new StringBuilder();
+    sb.append(rel.getRelTypeName());
+
+    for (RelTrait trait : rel.getTraitSet()) {
+      sb.append('.');
+      sb.append(trait.toString());
+    }
+
+    sb.append('(');
+    int j = 0;
+    for (Pair<String, Object> item : items) {
+      if (j++ > 0) {
+        sb.append(',');
+      }
+      sb.append(item.left);
+      sb.append('=');
+      sb.append(item.right);
+    }
+    sb.append(')');
+    digest = sb.toString();
+    return digest;
+  }
+
+  @Override public int compareTo(Digest other) {
+    return this.rel.getId() - other.rel.getId();
+  }
+
+  @Override public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+    Digest that = (Digest) o;
+    return hashCode == that.hashCode && deepEquals(that);
+  }
+
+  /**
+   * The method is used to resolve hash conflict, in current 6000+ tests, there are about 8
+   * tests with conflict, so we do not cache the hash code items in order to
+   * reduce mem consumption.
+   */
+  private boolean deepEquals(Digest other) {
+    Object[] thisItems = collect(this.rel, this.items, true);
+    Object[] thatItems = collect(other.rel, other.items, true);
+    if (thisItems.length != thatItems.length) {
+      return false;
+    }
+    for (int i = 0; i < thisItems.length; i++) {
+      if (!Objects.equals(thisItems[i], thatItems[i])) {

Review comment:
       Yes, there has some "ifs" there. There are 2 assumptions here as well:
   - there's no rel node using an array as a digest field, which is true for Calcite code, and probably true for code using the library although it's harder to verify
   - even so, return false when 2 array contents are identical has no impact on the semantic.
   
   For the first point, maybe a more opinionated choice could be to disallow arrays as digest item?
   For the second point, I'm not sure that the assumption holds: if planner cannot identify that 2 rel nodes are identical (and so collapse them), it might cause some kind of infinite loop/plan explosion where the same node is created over and over.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] danny0405 commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
danny0405 commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r442111462



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>There are three kinds of nodes we need to handle:
+   *
+   * <ul>
+   * <li>RelSubset: composition of class name, set id and traits;</li>
+   * <li>HepRelVertex: composition of class name and current rel id;</li>
+   * <li>Normal rel: composition of class name and id.</li>
+   * </ul>
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    StringBuilder digest = new StringBuilder(rel.getRelTypeName());
+    digest.append('#');
+    if (rel instanceof RelSubset) {
+      RelSubset subset = (RelSubset) rel;
+      digest.append(subset.getSetId());
+      for (RelTrait trait : subset.getTraitSet()) {
+        digest.append('.').append(trait);
+      }
+    } else if (rel instanceof HepRelVertex) {
+      digest.append(((HepRelVertex) rel).getCurrentRel().getId());
+    } else {
+      digest.append(rel.getId());
+    }
+    return digest.toString();
+  }
+
+  @Override public String toString() {
+    if (null != digest) {
+      return digest;
+    }
+    StringBuilder sb = new StringBuilder();
+    sb.append(rel.getRelTypeName());
+
+    for (RelTrait trait : rel.getTraitSet()) {
+      sb.append('.');
+      sb.append(trait.toString());
+    }
+
+    sb.append('(');
+    int j = 0;
+    for (Pair<String, Object> item : items) {
+      if (j++ > 0) {
+        sb.append(',');
+      }
+      sb.append(item.left);
+      sb.append('=');
+      sb.append(item.right);
+    }
+    sb.append(')');
+    digest = sb.toString();
+    return digest;
+  }
+
+  @Override public int compareTo(Digest other) {
+    return this.rel.getId() - other.rel.getId();
+  }
+
+  @Override public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+    Digest that = (Digest) o;
+    return hashCode == that.hashCode && deepEquals(that);
+  }
+
+  /**
+   * The method is used to resolve hash conflict, in current 6000+ tests, there are about 8
+   * tests with conflict, so we do not cache the hash code items in order to
+   * reduce mem consumption.
+   */
+  private boolean deepEquals(Digest other) {
+    Object[] thisItems = collect(this.rel, this.items, true);
+    Object[] thatItems = collect(other.rel, other.items, true);
+    if (thisItems.length != thatItems.length) {
+      return false;
+    }
+    for (int i = 0; i < thisItems.length; i++) {
+      if (!Objects.equals(thisItems[i], thatItems[i])) {

Review comment:
       1. I don't think we should have any limitation on what should be a digest item just because of the digest code implementation
   2. Did we encounter any case that has infinite loop just because of an array item there ? I need to see the case to make decision, not an assumption. I prefer to fix that when we really encounter that,  i think such case would be very rare.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] laurentgo commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
laurentgo commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r439694355



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>There are three kinds of nodes we need to handle:
+   *
+   * <ul>
+   * <li>RelSubset: composition of class name, set id and traits;</li>
+   * <li>HepRelVertex: composition of class name and current rel id;</li>
+   * <li>Normal rel: composition of class name and id.</li>
+   * </ul>
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    StringBuilder digest = new StringBuilder(rel.getRelTypeName());
+    digest.append('#');
+    if (rel instanceof RelSubset) {

Review comment:
       Considering that method, the digest takes care how the digest should be generated for nodes it already knows about. Based on the current approach, if I create a new RelNode subclass in my project which has a very natural/efficient way of creating a digest, I would have no choice than to fork the Calcite project to modify that method.
   I would favor an approach which would let `HepRelVertex` itself decides what to use for its digest instead of hardcoding in this method that the current rel should be chosen.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] danny0405 commented on pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
danny0405 commented on pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#issuecomment-645923910


   > Regarding my concern about memory usage, I was talking about `Digest` and the extra list used to store `items` which is kind of redundant with the `RelNode` itself. An alternative could have to replace the list of extra fields with an `Iterable` and call it each time items need to be collected? It could also possibly eliminate the creation of an array to do a comparison between objects...
   
   What do you mean by extra fields ? If possible, can you give a POC code there, the discussion with all kinds of assumptions make me burden out.


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] chunweilei commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
chunweilei commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r442598141



##########
File path: core/src/main/java/org/apache/calcite/plan/hep/HepRelVertex.java
##########
@@ -75,8 +76,8 @@
     return currentRel.getRowType();
   }
 
-  @Override protected String computeDigest() {
-    return "HepRelVertex(" + currentRel + ")";
+  @Override protected Digest computeDigest() {
+    return Digest.create(this, getRelTypeName() + '#' + getCurrentRel().getId());
   }
 

Review comment:
       Why do you use `getRelTypeName()` rathen than "HepRelVertex"?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] danny0405 commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
danny0405 commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r440680110



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>There are three kinds of nodes we need to handle:
+   *
+   * <ul>
+   * <li>RelSubset: composition of class name, set id and traits;</li>
+   * <li>HepRelVertex: composition of class name and current rel id;</li>
+   * <li>Normal rel: composition of class name and id.</li>
+   * </ul>
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    StringBuilder digest = new StringBuilder(rel.getRelTypeName());
+    digest.append('#');
+    if (rel instanceof RelSubset) {

Review comment:
       The current patch also supports pure string digest, which is same as before, if you think it is impossible, please give an example.
   
   The digest of `HepRelVertex` and `RelSubSet` are the same as before, what do you mean by `using the #computeDigest with the vertex id then` ?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] danny0405 commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
danny0405 commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r439693708



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>There are three kinds of nodes we need to handle:
+   *
+   * <ul>
+   * <li>RelSubset: composition of class name, set id and traits;</li>
+   * <li>HepRelVertex: composition of class name and current rel id;</li>
+   * <li>Normal rel: composition of class name and id.</li>
+   * </ul>
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    StringBuilder digest = new StringBuilder(rel.getRelTypeName());
+    digest.append('#');
+    if (rel instanceof RelSubset) {
+      RelSubset subset = (RelSubset) rel;
+      digest.append(subset.getSetId());
+      for (RelTrait trait : subset.getTraitSet()) {
+        digest.append('.').append(trait);
+      }
+    } else if (rel instanceof HepRelVertex) {
+      digest.append(((HepRelVertex) rel).getCurrentRel().getId());
+    } else {
+      digest.append(rel.getId());
+    }
+    return digest.toString();
+  }
+
+  @Override public String toString() {
+    if (null != digest) {
+      return digest;
+    }
+    StringBuilder sb = new StringBuilder();
+    sb.append(rel.getRelTypeName());
+
+    for (RelTrait trait : rel.getTraitSet()) {
+      sb.append('.');
+      sb.append(trait.toString());
+    }
+
+    sb.append('(');
+    int j = 0;
+    for (Pair<String, Object> item : items) {
+      if (j++ > 0) {
+        sb.append(',');
+      }
+      sb.append(item.left);
+      sb.append('=');
+      sb.append(item.right);
+    }
+    sb.append(')');
+    digest = sb.toString();
+    return digest;
+  }
+
+  @Override public int compareTo(Digest other) {
+    return this.rel.getId() - other.rel.getId();
+  }
+
+  @Override public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+    Digest that = (Digest) o;
+    return hashCode == that.hashCode && deepEquals(that);
+  }
+
+  /**
+   * The method is used to resolve hash conflict, in current 6000+ tests, there are about 8
+   * tests with conflict, so we do not cache the hash code items in order to
+   * reduce mem consumption.
+   */
+  private boolean deepEquals(Digest other) {
+    Object[] thisItems = collect(this.rel, this.items, true);
+    Object[] thatItems = collect(other.rel, other.items, true);
+    if (thisItems.length != thatItems.length) {
+      return false;
+    }
+    for (int i = 0; i < thisItems.length; i++) {
+      if (!Objects.equals(thisItems[i], thatItems[i])) {

Review comment:
       I checked all the rels, they all have no array attributes in the digest, even it has, the `#equals` would output `false` which does not affect the semantics. So i choose current impl.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] danny0405 commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
danny0405 commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r439280300



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,245 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents));
+  }
+
+  private static Object[] collect(RelNode rel, List<Pair<String, Object>> contents) {

Review comment:
       Finally i decide to keep it as is, because the conflict is very few, i also remove the row type when computing hashcode, because row type diff for equiv node is also rare case(usually because of nullability changes).




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] danny0405 commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
danny0405 commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r439692310



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>There are three kinds of nodes we need to handle:
+   *
+   * <ul>
+   * <li>RelSubset: composition of class name, set id and traits;</li>
+   * <li>HepRelVertex: composition of class name and current rel id;</li>
+   * <li>Normal rel: composition of class name and id.</li>
+   * </ul>
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    StringBuilder digest = new StringBuilder(rel.getRelTypeName());
+    digest.append('#');
+    if (rel instanceof RelSubset) {

Review comment:
       For current impl, the digest **is** handled by each `rel` node. Split the code everywhere is hard to maintain, let the `Digest` take care how the digest should be generated but the invoker is still each node.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] danny0405 commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
danny0405 commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r439692488



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>There are three kinds of nodes we need to handle:
+   *
+   * <ul>
+   * <li>RelSubset: composition of class name, set id and traits;</li>
+   * <li>HepRelVertex: composition of class name and current rel id;</li>
+   * <li>Normal rel: composition of class name and id.</li>
+   * </ul>
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    StringBuilder digest = new StringBuilder(rel.getRelTypeName());
+    digest.append('#');
+    if (rel instanceof RelSubset) {
+      RelSubset subset = (RelSubset) rel;
+      digest.append(subset.getSetId());
+      for (RelTrait trait : subset.getTraitSet()) {
+        digest.append('.').append(trait);
+      }
+    } else if (rel instanceof HepRelVertex) {
+      digest.append(((HepRelVertex) rel).getCurrentRel().getId());
+    } else {
+      digest.append(rel.getId());
+    }
+    return digest.toString();
+  }
+
+  @Override public String toString() {
+    if (null != digest) {
+      return digest;
+    }
+    StringBuilder sb = new StringBuilder();
+    sb.append(rel.getRelTypeName());
+
+    for (RelTrait trait : rel.getTraitSet()) {
+      sb.append('.');
+      sb.append(trait.toString());
+    }
+
+    sb.append('(');
+    int j = 0;
+    for (Pair<String, Object> item : items) {
+      if (j++ > 0) {
+        sb.append(',');
+      }
+      sb.append(item.left);
+      sb.append('=');
+      sb.append(item.right);
+    }
+    sb.append(')');
+    digest = sb.toString();
+    return digest;
+  }
+
+  @Override public int compareTo(Digest other) {
+    return this.rel.getId() - other.rel.getId();

Review comment:
       This is only for debugging, strictly to say, i have no idea how to compare 2 nodes that are semantically equivalent (#equals) while have different members.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] chunweilei commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
chunweilei commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r442601203



##########
File path: core/src/test/java/org/apache/calcite/test/enumerable/EnumerableCorrelateTest.java
##########
@@ -96,11 +96,11 @@
           planner.removeRule(EnumerableRules.ENUMERABLE_MERGE_JOIN_RULE);
         })
         .explainContains(""
-            + "EnumerableCalc(expr#0..3=[{inputs}], empid=[$t1], name=[$t3])\n"
+            + "PLAN=EnumerableCalc(expr#0..3=[{inputs}], empid=[$t1], name=[$t3])\n"
             + "  EnumerableCorrelate(correlation=[$cor1], joinType=[inner], requiredColumns=[{0}])\n"
             + "    EnumerableAggregate(group=[{0}])\n"
             + "      EnumerableTableScan(table=[[s, depts]])\n"

Review comment:
       It's weird to add `PALN=` just for the two cases. It's better to have the same format for all the cases.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] danny0405 commented on pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
danny0405 commented on pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#issuecomment-643544569


   > I have to say that I don't see how this pull request makes hash code more efficient (precomputation of hashcode?). For sure it looks to increase memory usage and also remove lots of flexibility for Calcite users extending on RelNode and RexnNode...
   
   Compare to the old code, this patch reduce the memory usage, especially for the `RexNode`. What do you mean by remove lots of flexibility for Calcite users ? Which flexibility do i remove ?


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] laurentgo commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
laurentgo commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r440997075



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>There are three kinds of nodes we need to handle:
+   *
+   * <ul>
+   * <li>RelSubset: composition of class name, set id and traits;</li>
+   * <li>HepRelVertex: composition of class name and current rel id;</li>
+   * <li>Normal rel: composition of class name and id.</li>
+   * </ul>
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    StringBuilder digest = new StringBuilder(rel.getRelTypeName());
+    digest.append('#');
+    if (rel instanceof RelSubset) {
+      RelSubset subset = (RelSubset) rel;
+      digest.append(subset.getSetId());
+      for (RelTrait trait : subset.getTraitSet()) {
+        digest.append('.').append(trait);
+      }
+    } else if (rel instanceof HepRelVertex) {
+      digest.append(((HepRelVertex) rel).getCurrentRel().getId());
+    } else {
+      digest.append(rel.getId());
+    }
+    return digest.toString();
+  }
+
+  @Override public String toString() {
+    if (null != digest) {
+      return digest;
+    }
+    StringBuilder sb = new StringBuilder();
+    sb.append(rel.getRelTypeName());
+
+    for (RelTrait trait : rel.getTraitSet()) {
+      sb.append('.');
+      sb.append(trait.toString());
+    }
+
+    sb.append('(');
+    int j = 0;
+    for (Pair<String, Object> item : items) {
+      if (j++ > 0) {
+        sb.append(',');
+      }
+      sb.append(item.left);
+      sb.append('=');
+      sb.append(item.right);
+    }
+    sb.append(')');
+    digest = sb.toString();
+    return digest;
+  }
+
+  @Override public int compareTo(Digest other) {
+    return this.rel.getId() - other.rel.getId();
+  }
+
+  @Override public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+    Digest that = (Digest) o;
+    return hashCode == that.hashCode && deepEquals(that);
+  }
+
+  /**
+   * The method is used to resolve hash conflict, in current 6000+ tests, there are about 8
+   * tests with conflict, so we do not cache the hash code items in order to
+   * reduce mem consumption.
+   */
+  private boolean deepEquals(Digest other) {
+    Object[] thisItems = collect(this.rel, this.items, true);
+    Object[] thatItems = collect(other.rel, other.items, true);
+    if (thisItems.length != thatItems.length) {

Review comment:
       Sorry, I am at a loss for arguments. The original reply was that the code was simpler than `Arrays.equals(Object[], Object[])`, a standard JDK method, whereas in fact, it is basically the same code, except for the check for null, which is often suppressed by the JVM compiler (cf https://shipilev.net/jvm/anatomy-quarks/25-implicit-null-checks/) and even so has minimal impact compared to the two calls to the `#collect` method where a list is created and converted later into an array...




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] danny0405 commented on pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
danny0405 commented on pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#issuecomment-644621579


   > Regarding my concern about memory usage, I was talking about `Digest` and the extra list used to store `items` which is kind of redundant with the `RelNode` itself. An alternative could have to replace the list of extra fields with an `Iterable` and call it each time items need to be collected? It could also possibly eliminate the creation of an array to do a comparison between objects...
   
   I have gave the explanation in the JIRA issue about why i keep redundant references of the RelNode items, that still makes sense to me.
   
   >An alternative could have to replace the list of extra fields with an `Iterable`
   
   It is possible, if you think there is also some thing can do that can promote the `Digest` code, go for it and very appreciate for that. 


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [calcite] laurentgo commented on a change in pull request #2016: [CALCITE-3786] Add Digest interface to enable efficient hashCode(equa…

Posted by GitBox <gi...@apache.org>.
laurentgo commented on a change in pull request #2016:
URL: https://github.com/apache/calcite/pull/2016#discussion_r440571622



##########
File path: core/src/main/java/org/apache/calcite/plan/Digest.java
##########
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.plan;
+
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.hint.Hintable;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.util.Pair;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * A short description of relational expression's type, inputs, and
+ * other properties. The digest uniquely identifies the node; another node
+ * is equivalent if and only if it has the same value.
+ *
+ * <p>Row type is part of the digest for the rare occasion that similar
+ * expressions have different types, e.g. variants of
+ * {@code Project(child=rel#1, a=null)} where a is a null INTEGER or a
+ * null VARCHAR(10). Row type is represented as fieldTypes only, so {@code RelNode}
+ * that differ with field names only are treated equal.
+ * For instance, {@code Project(input=rel#1,empid=$0)} and {@code Project(input=rel#1,deptno=$0)}
+ * are equal.
+ *
+ * <p>Computed by {@code org.apache.calcite.rel.AbstractRelNode#computeDigest},
+ * assigned by {@link org.apache.calcite.rel.AbstractRelNode#onRegister},
+ * returned by {@link org.apache.calcite.rel.AbstractRelNode#getDigest()}.
+ */
+public class Digest implements Comparable<Digest> {
+
+  //~ Instance fields --------------------------------------------------------
+
+  final int hashCode;
+  final List<Pair<String, Object>> items;
+  private final RelNode rel;
+
+  // Used for debugging, computed lazily.
+  private String digest = null;
+
+  //~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a digest with given rel and properties.
+   *
+   * @param rel   The rel
+   * @param items The properties, e.g. the inputs, the type, the traits and so on
+   */
+  private Digest(RelNode rel, List<Pair<String, Object>> items) {
+    this.rel = rel;
+    this.items = normalizeContents(items);
+    this.hashCode = computeIdentity(rel, this.items);
+  }
+
+  /**
+   * Creates a digest with given rel, the digest is computed as simple,
+   * see {@link #simpleRelDigest(RelNode)}.
+   */
+  private Digest(RelNode rel) {
+    this(rel, simpleRelDigest(rel));
+  }
+
+  /** Creates a digest with given rel and string format digest. */
+  private Digest(RelNode rel, String digest) {
+    this.rel = rel;
+    this.items = Collections.emptyList();
+    this.digest = digest;
+    this.hashCode = this.digest.hashCode();
+  }
+
+  /** Returns the identity of this digest which is used to speedup hashCode and equals. */
+  private static int computeIdentity(RelNode rel, List<Pair<String, Object>> contents) {
+    return Objects.hash(collect(rel, contents, false));
+  }
+
+  /**
+   * Collects the items used for {@link #hashCode} and {@link #equals}.
+   *
+   * <p>Generally, the items used for hashCode and equals should be the same. The exception
+   * is the row type of the relational expression: the row type is needed because during
+   * planning, new equivalent rels may be produced with changed fields nullability
+   * (i.e. most of them comes from the rex simplify or constant reduction).
+   * This expects to be rare case, so the hashcode is computed without row type
+   * but when it conflicts, we compare with the row type involved(sans field names).
+   *
+   * @param rel      The rel to compute digest
+   * @param contents The rel properties should be considered in digest
+   * @param withType Whether to involve the row type
+   */
+  private static Object[] collect(
+      RelNode rel,
+      List<Pair<String, Object>> contents,
+      boolean withType) {
+    List<Object> hashCodeItems = new ArrayList<>();
+    // The type name.
+    hashCodeItems.add(rel.getRelTypeName());
+    // The traits.
+    hashCodeItems.addAll(rel.getTraitSet());
+    // The hints.
+    if (rel instanceof Hintable) {
+      hashCodeItems.addAll(((Hintable) rel).getHints());
+    }
+    if (withType) {
+      // The row type sans field names.
+      RelDataType relType = rel.getRowType();
+      if (relType.isStruct()) {
+        hashCodeItems.addAll(Pair.right(relType.getFieldList()));
+      } else {
+        // Make a decision here because
+        // some downstream projects have custom rel type which has no explicit fields.
+        hashCodeItems.add(relType);
+      }
+    }
+    // The rel node contents(e.g. the inputs or exprs).
+    hashCodeItems.addAll(contents);
+    return hashCodeItems.toArray();
+  }
+
+  /** Normalizes the rel node properties, currently, just to replace the
+   * {@link RelNode} with a simple string format digest. **/
+  private static List<Pair<String, Object>> normalizeContents(
+      List<Pair<String, Object>> items) {
+    List<Pair<String, Object>> normalized = new ArrayList<>();
+    for (Pair<String, Object> item : items) {
+      if (item.right instanceof RelNode) {
+        RelNode input = (RelNode) item.right;
+        normalized.add(Pair.of(item.left, simpleRelDigest(input)));
+      } else {
+        normalized.add(item);
+      }
+    }
+    return normalized;
+  }
+
+  /**
+   * Returns a simple string format digest.
+   *
+   * <p>There are three kinds of nodes we need to handle:
+   *
+   * <ul>
+   * <li>RelSubset: composition of class name, set id and traits;</li>
+   * <li>HepRelVertex: composition of class name and current rel id;</li>
+   * <li>Normal rel: composition of class name and id.</li>
+   * </ul>
+   *
+   * @param rel The rel
+   */
+  private static String simpleRelDigest(RelNode rel) {
+    StringBuilder digest = new StringBuilder(rel.getRelTypeName());
+    digest.append('#');
+    if (rel instanceof RelSubset) {
+      RelSubset subset = (RelSubset) rel;
+      digest.append(subset.getSetId());
+      for (RelTrait trait : subset.getTraitSet()) {
+        digest.append('.').append(trait);
+      }
+    } else if (rel instanceof HepRelVertex) {
+      digest.append(((HepRelVertex) rel).getCurrentRel().getId());
+    } else {
+      digest.append(rel.getId());
+    }
+    return digest.toString();
+  }
+
+  @Override public String toString() {
+    if (null != digest) {
+      return digest;
+    }
+    StringBuilder sb = new StringBuilder();
+    sb.append(rel.getRelTypeName());
+
+    for (RelTrait trait : rel.getTraitSet()) {
+      sb.append('.');
+      sb.append(trait.toString());
+    }
+
+    sb.append('(');
+    int j = 0;
+    for (Pair<String, Object> item : items) {
+      if (j++ > 0) {
+        sb.append(',');
+      }
+      sb.append(item.left);
+      sb.append('=');
+      sb.append(item.right);
+    }
+    sb.append(')');
+    digest = sb.toString();
+    return digest;
+  }
+
+  @Override public int compareTo(Digest other) {
+    return this.rel.getId() - other.rel.getId();
+  }
+
+  @Override public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+    Digest that = (Digest) o;
+    return hashCode == that.hashCode && deepEquals(that);
+  }
+
+  /**
+   * The method is used to resolve hash conflict, in current 6000+ tests, there are about 8
+   * tests with conflict, so we do not cache the hash code items in order to
+   * reduce mem consumption.
+   */
+  private boolean deepEquals(Digest other) {
+    Object[] thisItems = collect(this.rel, this.items, true);
+    Object[] thatItems = collect(other.rel, other.items, true);
+    if (thisItems.length != thatItems.length) {
+      return false;
+    }
+    for (int i = 0; i < thisItems.length; i++) {
+      if (!Objects.equals(thisItems[i], thatItems[i])) {

Review comment:
       for now? what about external rels?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org