You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/02/13 06:50:57 UTC

[1/3] impala git commit: IMPALA-6489: use correct template tuple size

Repository: impala
Updated Branches:
  refs/heads/master 95f166630 -> 5f7599687


IMPALA-6489: use correct template tuple size

The bug was that we mixed up the size of the top-level tuple with the
size of the nested tuple. The upshot in this case was that the wrong
amount of data was memcpy()ed over and we read past the bounds of the
original allocation.

Testing:
TestParquetArrayEncodings.test_avro_primitive_in_list reliably
reproduced the problem under ASAN. After the fix it not longer
reproduces.

Change-Id: I8193c04673f15e5057f457cc8a3a91a8fef64be2
Reviewed-on: http://gerrit.cloudera.org:8080/9288
Reviewed-by: Alex Behm <al...@cloudera.com>
Reviewed-by: Dan Hecht <dh...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/f4d7e264
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/f4d7e264
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/f4d7e264

Branch: refs/heads/master
Commit: f4d7e2644a7c3ab7450bc4fd7dfd274803fbf8dd
Parents: 95f1666
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Mon Feb 12 14:23:17 2018 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Tue Feb 13 03:13:47 2018 +0000

----------------------------------------------------------------------
 be/src/exec/hdfs-scanner.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/f4d7e264/be/src/exec/hdfs-scanner.h
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scanner.h b/be/src/exec/hdfs-scanner.h
index e3c186f..6497457 100644
--- a/be/src/exec/hdfs-scanner.h
+++ b/be/src/exec/hdfs-scanner.h
@@ -428,7 +428,7 @@ class HdfsScanner {
   void IR_ALWAYS_INLINE InitTuple(
       const TupleDescriptor* desc, Tuple* template_tuple, Tuple* tuple) {
     if (has_template_tuple(template_tuple)) {
-      InitTupleFromTemplate(template_tuple, tuple, tuple_byte_size());
+      InitTupleFromTemplate(template_tuple, tuple, tuple_byte_size(*desc));
     } else {
       tuple->ClearNullBits(desc->null_bytes_offset(), desc->num_null_bytes());
     }
@@ -479,6 +479,9 @@ class HdfsScanner {
 
   /// Not inlined in IR so it can be replaced with a constant.
   int IR_NO_INLINE tuple_byte_size() const { return tuple_byte_size_; }
+  int IR_NO_INLINE tuple_byte_size(const TupleDescriptor& desc) const {
+    return desc.byte_size();
+  }
 
   /// Returns true iff 'template_tuple' is non-NULL.
   /// Not inlined in IR so it can be replaced with a constant.


[2/3] impala git commit: IMPALA-5440: Add planner tests with extreme statistics values

Posted by ta...@apache.org.
IMPALA-5440: Add planner tests with extreme statistics values

This commit address some of the issues in JIRA: tests against the cardinality
overflowing from JOIN, UNION, CROSS JOIN, FULL OUTER JOIN,
0 row number and negative row number, as well as cardinality on Subplan node.

Change-Id: I86dec47cf1438882cafaec53e97864ccfcdff3cb
Reviewed-on: http://gerrit.cloudera.org:8080/9065
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/955fca68
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/955fca68
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/955fca68

Branch: refs/heads/master
Commit: 955fca68d53499455fceb5d7d308957eb6687a87
Parents: f4d7e26
Author: Xinran Yu Tinney <xy...@cloudera.com>
Authored: Thu Jan 18 15:27:31 2018 -0600
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Tue Feb 13 04:26:34 2018 +0000

----------------------------------------------------------------------
 .../org/apache/impala/planner/PlannerTest.java  | 77 ++++++++++++++++++++
 .../apache/impala/planner/PlannerTestBase.java  | 40 ++++++++++
 2 files changed, 117 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/955fca68/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
index 760334d..5dbba75 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
@@ -504,4 +504,81 @@ public class PlannerTest extends PlannerTestBase {
     options.setExplain_level(TExplainLevel.EXTENDED);
     runPlannerTestFile("min-max-runtime-filters", options);
   }
+
+  @Test
+  public void testCardinalityOverflow() throws ImpalaException {
+    String tblName = "tpch.cardinality_overflow";
+    String colDefs = "("
+        + "l_orderkey BIGINT, "
+        + "l_partkey BIGINT, "
+        + "l_suppkey BIGINT, "
+        + "l_linenumber INT, "
+        + "l_shipmode STRING, "
+        + "l_comment STRING"
+        + ")";
+    String tblLocation = "LOCATION "
+        + "'hdfs://localhost:20500/test-warehouse/tpch.lineitem'";
+    String tblPropsTemplate = "TBLPROPERTIES('numRows'='%s')";
+    String tblProps = String.format(tblPropsTemplate, Long.toString(Long.MAX_VALUE));
+
+    addTestTable(String.format("CREATE EXTERNAL TABLE %s %s %s %s;",
+        tblName, colDefs, tblLocation, tblProps));
+
+    // CROSS JOIN query: tests that multiplying the input cardinalities does not overflow
+    // the cross-join's estimated cardinality
+    String query = "select * from tpch.cardinality_overflow a,"
+        + "tpch.cardinality_overflow b, tpch.cardinality_overflow c";
+    checkCardinality(query, 0, Long.MAX_VALUE);
+
+    // FULL OUTER JOIN query: tests that adding the input cardinalities does not overflow
+    // the full outer join's estimated cardinality
+    query = "select a.l_comment from tpch.cardinality_overflow a full outer join "
+        + "tpch.cardinality_overflow b on a.l_orderkey = b.l_partkey";
+    checkCardinality(query, 0, Long.MAX_VALUE);
+
+    // UNION query: tests that adding the input cardinalities does not overflow
+    // the union's estimated cardinality
+    query = "select l_shipmode from tpch.cardinality_overflow "
+        + "union select l_comment from tpch.cardinality_overflow";
+    checkCardinality(query, 0, Long.MAX_VALUE);
+
+    // JOIN query: tests that multiplying the input cardinalities does not overflow
+    // the join's estimated cardinality
+    query = "select a.l_comment from tpch.cardinality_overflow a inner join "
+        + "tpch.cardinality_overflow b on a.l_linenumber < b.l_orderkey";
+    checkCardinality(query, 0, Long.MAX_VALUE);
+
+    // creates an empty table and tests that the cardinality is 0
+    tblName = "tpch.ex_customer_cardinality_zero";
+    tblProps = String.format(tblPropsTemplate, 0);
+    addTestTable(String.format("CREATE EXTERNAL TABLE  %s %s %s %s;",
+        tblName, colDefs, tblLocation, tblProps));
+    query = "select * from tpch.ex_customer_cardinality_zero";
+    checkCardinality(query, 0, 0);
+
+    // creates a table with negative row count and
+    // tests that the cardinality is not negative
+    tblName = "tpch.ex_customer_cardinality_neg";
+    tblProps = String.format(tblPropsTemplate, -1);
+    addTestTable(String.format("CREATE EXTERNAL TABLE  %s %s %s %s;",
+        tblName, colDefs, tblLocation, tblProps));
+    query = "select * from tpch.ex_customer_cardinality_neg";
+    checkCardinality(query, -1, Long.MAX_VALUE);
+
+    // SUBPLAN query: tests that adding the input cardinalities does not overflow
+    // the SUBPLAN's estimated cardinality
+    tblName = "functional_parquet.cardinality_overflow";
+    colDefs = "("
+        + "id BIGINT, "
+        + "int_array ARRAY<INT>"
+        + ")";
+    String storedAs = "STORED AS PARQUET";
+    tblLocation = "LOCATION "
+        + "'hdfs://localhost:20500/test-warehouse/complextypestbl_parquet'";
+    tblProps = String.format(tblPropsTemplate, Long.toString(Long.MAX_VALUE));
+    addTestTable(String.format("CREATE EXTERNAL TABLE  %s %s %s %s %s;",
+        tblName, colDefs, storedAs, tblLocation, tblProps));
+    query = "select id from functional_parquet.cardinality_overflow t, t.int_array";
+    checkCardinality(query, 0, Long.MAX_VALUE);
+  }
 }

http://git-wip-us.apache.org/repos/asf/impala/blob/955fca68/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java b/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java
index bc93160..7c47d74 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java
@@ -36,6 +36,7 @@ import org.apache.commons.lang.exception.ExceptionUtils;
 import org.apache.hadoop.fs.Path;
 import org.apache.impala.analysis.ColumnLineageGraph;
 import org.apache.impala.analysis.DescriptorTable;
+import org.apache.impala.catalog.Catalog;
 import org.apache.impala.catalog.CatalogException;
 import org.apache.impala.common.FrontendTestBase;
 import org.apache.impala.common.ImpalaException;
@@ -665,6 +666,45 @@ public class PlannerTestBase extends FrontendTestBase {
     }
   }
 
+  /**
+   * This function plans the given query and fails if the estimated cardinalities are
+   * not within the specified bounds [min, max].
+   */
+  protected void checkCardinality(String query, long min, long max)
+        throws ImpalaException {
+    TQueryCtx queryCtx = TestUtils.createQueryContext(Catalog.DEFAULT_DB,
+        System.getProperty("user.name"));
+    queryCtx.client_request.setStmt(query);
+    StringBuilder explainBuilder = new StringBuilder();
+    TExecRequest execRequest = frontend_.createExecRequest(queryCtx, explainBuilder);
+
+    if (!execRequest.isSetQuery_exec_request()
+        || execRequest.query_exec_request == null
+        || execRequest.query_exec_request.plan_exec_info == null) {
+      return;
+    }
+    for (TPlanExecInfo execInfo : execRequest.query_exec_request.plan_exec_info) {
+      for (TPlanFragment planFragment : execInfo.fragments) {
+        if (!planFragment.isSetPlan() || planFragment.plan == null) continue;
+        for (TPlanNode node : planFragment.plan.nodes) {
+          if (node.estimated_stats == null) {
+            fail("Query: " + query + " has no estimated statistics");
+          }
+          long cardinality = node.estimated_stats.cardinality;
+          if (cardinality < min || cardinality > max) {
+            StringBuilder errorLog = new StringBuilder();
+            errorLog.append("Query: " + query + "\n");
+            errorLog.append(
+                "Expected cardinality estimate between " + min + " and " + max + "\n");
+            errorLog.append("Actual cardinality estimate: " + cardinality + "\n");
+            errorLog.append("In node id " + node.node_id + "\n");
+            fail(errorLog.toString());
+          }
+        }
+      }
+    }
+  }
+
   private void checkColumnLineage(TestCase testCase, TExecRequest execRequest,
       StringBuilder errorLog, StringBuilder actualOutput) {
     String query = testCase.getQuery();


[3/3] impala git commit: IMPALA-6269: Cherry-pick dependency change for KRPC

Posted by ta...@apache.org.
IMPALA-6269: Cherry-pick dependency change for KRPC

Expose RPC method info map and various metrics

These changes are needed for IMPALA-6269.

Change-Id: I8bda390ea92cceb0d696767402c978a83b386825
Reviewed-on: http://gerrit.cloudera.org:8080/9269
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin <as...@cloudera.com>
Reviewed-by: Todd Lipcon <to...@apache.org>
Reviewed-on: http://gerrit.cloudera.org:8080/9287
Reviewed-by: Lars Volker <lv...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/5f759968
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/5f759968
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/5f759968

Branch: refs/heads/master
Commit: 5f7599687748b1e1ce0a5a34d38cc49a4c4cd9f5
Parents: 955fca6
Author: Lars Volker <lv...@cloudera.com>
Authored: Mon Feb 12 14:46:19 2018 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Tue Feb 13 05:41:18 2018 +0000

----------------------------------------------------------------------
 be/src/kudu/rpc/acceptor_pool.cc | 4 ++++
 be/src/kudu/rpc/acceptor_pool.h  | 4 ++++
 be/src/kudu/rpc/service_if.h     | 6 +++++-
 be/src/kudu/util/metrics.h       | 4 ++++
 4 files changed, 17 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/5f759968/be/src/kudu/rpc/acceptor_pool.cc
----------------------------------------------------------------------
diff --git a/be/src/kudu/rpc/acceptor_pool.cc b/be/src/kudu/rpc/acceptor_pool.cc
index cbb1232..deff77b 100644
--- a/be/src/kudu/rpc/acceptor_pool.cc
+++ b/be/src/kudu/rpc/acceptor_pool.cc
@@ -134,6 +134,10 @@ Status AcceptorPool::GetBoundAddress(Sockaddr* addr) const {
   return socket_.GetSocketAddress(addr);
 }
 
+int64_t AcceptorPool::num_rpc_connections_accepted() const {
+  return rpc_connections_accepted_->value();
+}
+
 void AcceptorPool::RunThread() {
   while (true) {
     Socket new_sock;

http://git-wip-us.apache.org/repos/asf/impala/blob/5f759968/be/src/kudu/rpc/acceptor_pool.h
----------------------------------------------------------------------
diff --git a/be/src/kudu/rpc/acceptor_pool.h b/be/src/kudu/rpc/acceptor_pool.h
index 92b7fc5..653f135 100644
--- a/be/src/kudu/rpc/acceptor_pool.h
+++ b/be/src/kudu/rpc/acceptor_pool.h
@@ -18,6 +18,7 @@
 #ifndef KUDU_RPC_ACCEPTOR_POOL_H
 #define KUDU_RPC_ACCEPTOR_POOL_H
 
+#include <stdint.h>
 #include <vector>
 
 #include "kudu/gutil/atomicops.h"
@@ -59,6 +60,9 @@ class AcceptorPool {
   // actual port that was bound.
   Status GetBoundAddress(Sockaddr* addr) const;
 
+  // Return the number of connections accepted by this messenger. Thread-safe.
+  int64_t num_rpc_connections_accepted() const;
+
  private:
   void RunThread();
 

http://git-wip-us.apache.org/repos/asf/impala/blob/5f759968/be/src/kudu/rpc/service_if.h
----------------------------------------------------------------------
diff --git a/be/src/kudu/rpc/service_if.h b/be/src/kudu/rpc/service_if.h
index a3722c6..7846c0c 100644
--- a/be/src/kudu/rpc/service_if.h
+++ b/be/src/kudu/rpc/service_if.h
@@ -121,12 +121,16 @@ class GeneratedServiceIf : public ServiceIf {
 
   RpcMethodInfo* LookupMethod(const RemoteMethod& method) override;
 
+  // Returns the mapping from method names to method infos.
+  typedef std::unordered_map<std::string, scoped_refptr<RpcMethodInfo>> MethodInfoMap;
+  const MethodInfoMap& methods_by_name() const { return methods_by_name_; }
+
  protected:
   // For each method, stores the relevant information about how to handle the
   // call. Methods are inserted by the constructor of the generated subclass.
   // After construction, this map is accessed by multiple threads and therefore
   // must not be modified.
-  std::unordered_map<std::string, scoped_refptr<RpcMethodInfo>> methods_by_name_;
+  MethodInfoMap methods_by_name_;
 
   // The result tracker for this service's methods.
   scoped_refptr<ResultTracker> result_tracker_;

http://git-wip-us.apache.org/repos/asf/impala/blob/5f759968/be/src/kudu/util/metrics.h
----------------------------------------------------------------------
diff --git a/be/src/kudu/util/metrics.h b/be/src/kudu/util/metrics.h
index 8aeaf93..0db08ee 100644
--- a/be/src/kudu/util/metrics.h
+++ b/be/src/kudu/util/metrics.h
@@ -987,6 +987,10 @@ class Histogram : public Metric {
   Status GetHistogramSnapshotPB(HistogramSnapshotPB* snapshot,
                                 const MetricJsonOptions& opts) const;
 
+  // Returns a pointer to the underlying histogram. The implementation of HdrHistogram
+  // is thread safe.
+  const HdrHistogram* histogram() const { return histogram_.get(); }
+
   uint64_t CountInBucketForValueForTests(uint64_t value) const;
   uint64_t MinValueForTests() const;
   uint64_t MaxValueForTests() const;