You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/08/15 14:41:00 UTC

[impala] branch master updated: IMPALA-8849: fix IllegalStateException with VARCHAR

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new c3a67b6  IMPALA-8849: fix IllegalStateException with VARCHAR
c3a67b6 is described below

commit c3a67b67faaebf18735fa36d35c80d8c11043f7f
Author: Tim Armstrong <ta...@cloudera.com>
AuthorDate: Fri Aug 9 17:56:00 2019 -0700

    IMPALA-8849: fix IllegalStateException with VARCHAR
    
    The bug is that the serialized size wasn't populated
    for VARCHAR in a case when it should have been.
    It appears a condition was simply not updated when
    VARCHAR was added.
    
    Other code assumed that the serialized size was
    populated when the other size field was populated,
    which is a reasonable invariant. I documented the
    invariant in the class and added validation that
    the invariant held.
    
    Defining and checking invariants led to discovering
    various other minor issues where the sizes were
    set incorrect for fixed-length types or not set for
    variable-length types:
    * CHAR was not consistently treated as a fixed-length type.
    * avgSerializedSize_ was not always updated with avgSize_
    
    Testing:
    Added a regression test for this specific case. Adding
    the assertions resulted in other cases showing up
    related bugs.
    
    Change-Id: Ie45e386cb09e31f4b7cdc82b7734dbecb4464534
    Reviewed-on: http://gerrit.cloudera.org:8080/14062
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Csaba Ringhofer <cs...@cloudera.com>
---
 .../impala/analysis/AlterTableSetColumnStats.java  |  4 +-
 .../org/apache/impala/catalog/ColumnStats.java     | 81 +++++++++++++++++++---
 .../org/apache/impala/planner/PlannerTest.java     | 20 +++---
 testdata/bin/compute-table-stats.sh                |  2 +-
 .../queries/PlannerTest/card-scan.test             | 70 +++++++++++--------
 .../queries/PlannerTest/empty.test                 | 48 ++++++-------
 .../QueryTest/compute-stats-incremental.test       |  6 +-
 .../queries/QueryTest/compute-stats.test           |  2 +-
 8 files changed, 154 insertions(+), 79 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/AlterTableSetColumnStats.java b/fe/src/main/java/org/apache/impala/analysis/AlterTableSetColumnStats.java
index b68916b..6a3fcee 100644
--- a/fe/src/main/java/org/apache/impala/analysis/AlterTableSetColumnStats.java
+++ b/fe/src/main/java/org/apache/impala/analysis/AlterTableSetColumnStats.java
@@ -123,7 +123,7 @@ public class AlterTableSetColumnStats extends AlterTableStmt {
             "Expected a positive integer or -1 for unknown.",
             statsValue, statsKey));
       }
-      stats.update(statsKey, statsVal);
+      stats.update(col.getType(), statsKey, statsVal);
     } else if (statsKey == ColumnStats.StatsKey.AVG_SIZE) {
       Float statsVal = null;
       try {
@@ -137,7 +137,7 @@ public class AlterTableSetColumnStats extends AlterTableStmt {
             "Expected a positive floating-point number or -1 for unknown.",
             statsValue, statsKey));
       }
-      stats.update(statsKey, statsVal);
+      stats.update(col.getType(), statsKey, statsVal);
     } else {
       Preconditions.checkState(false, "Unhandled StatsKey value: " + statsKey);
     }
diff --git a/fe/src/main/java/org/apache/impala/catalog/ColumnStats.java b/fe/src/main/java/org/apache/impala/catalog/ColumnStats.java
index 59f079f..46e5f68 100644
--- a/fe/src/main/java/org/apache/impala/catalog/ColumnStats.java
+++ b/fe/src/main/java/org/apache/impala/catalog/ColumnStats.java
@@ -73,9 +73,12 @@ public class ColumnStats {
     public String toString() { return name_; }
   }
 
-  // in bytes: excludes serialization overhead
+  // in bytes: excludes serialization overhead.
+  // -1 if unknown. Always has a valid value for fixed-length types.
   private double avgSize_;
   // in bytes; includes serialization overhead.
+  // -1 if unknown. Always has a valid value for fixed-length types.
+  // avgSerializedSize_ is valid iff avgSize_ is valid.
   private double avgSerializedSize_;
   private long maxSize_;  // in bytes
   private long numDistinctValues_;
@@ -83,6 +86,7 @@ public class ColumnStats {
 
   public ColumnStats(Type colType) {
     initColStats(colType);
+    validate(colType);
   }
 
   /**
@@ -94,6 +98,7 @@ public class ColumnStats {
     maxSize_ = other.maxSize_;
     numDistinctValues_ = other.numDistinctValues_;
     numNulls_ = other.numNulls_;
+    validate(null);
   }
 
   /**
@@ -121,16 +126,20 @@ public class ColumnStats {
   public static ColumnStats fromExpr(Expr expr) {
     Preconditions.checkNotNull(expr);
     Preconditions.checkState(expr.getType().isValid());
-    ColumnStats stats = new ColumnStats(expr.getType());
+    Type colType = expr.getType();
+    ColumnStats stats = new ColumnStats(colType);
     stats.setNumDistinctValues(expr.getNumDistinctValues());
     SlotRef slotRef = expr.unwrapSlotRef(false);
     if (slotRef == null) return stats;
     ColumnStats slotStats = slotRef.getDesc().getStats();
     if (slotStats == null) return stats;
     stats.numNulls_ = slotStats.getNumNulls();
-    stats.avgSerializedSize_ = slotStats.getAvgSerializedSize();
-    stats.avgSize_ = slotStats.getAvgSize();
-    stats.maxSize_ = slotStats.getMaxSize();
+    if (!colType.isFixedLengthType()) {
+      stats.avgSerializedSize_ = slotStats.getAvgSerializedSize();
+      stats.avgSize_ = slotStats.getAvgSize();
+      stats.maxSize_ = slotStats.getMaxSize();
+    }
+    stats.validate(colType);
     return stats;
   }
 
@@ -152,6 +161,7 @@ public class ColumnStats {
     } else {
       numNulls_ += other.numNulls_;
     }
+    validate(null);
     return this;
   }
 
@@ -165,6 +175,7 @@ public class ColumnStats {
   public long getMaxSize() { return maxSize_; }
   public boolean hasNulls() { return numNulls_ > 0; }
   public long getNumNulls() { return numNulls_; }
+  // True iff getAvgSize() and getAvgSerializedSize() will return valid values.
   public boolean hasAvgSize() { return avgSize_ >= 0; }
   public boolean hasNumDistinctValues() { return numDistinctValues_ >= 0; }
   public boolean hasStats() { return numNulls_ != -1 || numDistinctValues_ != -1; }
@@ -226,6 +237,14 @@ public class ColumnStats {
         }
         break;
       case CHAR:
+        // Ignore CHAR length stats, since it is fixed length internally.
+        isCompatible = statsData.isSetStringStats();
+        if (isCompatible) {
+          StringColumnStatsData stringStats = statsData.getStringStats();
+          numDistinctValues_ = stringStats.getNumDVs();
+          numNulls_ = stringStats.getNumNulls();
+        }
+        break;
       case VARCHAR:
       case STRING:
         isCompatible = statsData.isSetStringStats();
@@ -235,7 +254,11 @@ public class ColumnStats {
           numNulls_ = stringStats.getNumNulls();
           maxSize_ = stringStats.getMaxColLen();
           avgSize_ = Double.valueOf(stringStats.getAvgColLen()).floatValue();
-          avgSerializedSize_ = avgSize_ + PrimitiveType.STRING.getSlotSize();
+          if (avgSize_ >= 0) {
+            avgSerializedSize_ = avgSize_ + PrimitiveType.STRING.getSlotSize();
+          } else {
+            avgSerializedSize_ = -1;
+          }
         }
         break;
       case BINARY:
@@ -261,6 +284,7 @@ public class ColumnStats {
             "Unexpected column type: " + colType.toString());
         break;
     }
+    validate(colType);
     return isCompatible;
   }
 
@@ -340,13 +364,17 @@ public class ColumnStats {
    * Requires that the given value is of a type appropriate for the
    * member being set. Throws if that is not the case.
    */
-  public void update(StatsKey key, Number value) {
+  public void update(Type colType, StatsKey key, Number value) {
     Preconditions.checkNotNull(key);
     Preconditions.checkNotNull(value);
     if (key == StatsKey.AVG_SIZE) {
       Preconditions.checkArgument(value instanceof Float);
+      Float floatValue = (Float) value;
+      Preconditions.checkArgument(floatValue >= 0 || floatValue == -1, floatValue);
     } else {
       Preconditions.checkArgument(value instanceof Long);
+      Long longValue = (Long) value;
+      Preconditions.checkArgument(longValue >= 0 || longValue == -1, longValue);
     }
     switch (key) {
       case NUM_DISTINCT_VALUES: {
@@ -358,15 +386,24 @@ public class ColumnStats {
         break;
       }
       case AVG_SIZE: {
+        Preconditions.checkArgument(!colType.isFixedLengthType(), colType);
         avgSize_ = (Float) value;
+        // Ensure avgSerializedSize_ stays in sync with avgSize_.
+        if (avgSize_ >= 0) {
+          avgSerializedSize_ = colType.getSlotSize() + avgSize_;
+        } else {
+          avgSerializedSize_ = -1;
+        }
         break;
       }
       case MAX_SIZE: {
+        Preconditions.checkArgument(!colType.isFixedLengthType(), colType);
         maxSize_ = (Long) value;
         break;
       }
       default: Preconditions.checkState(false);
     }
+    validate(colType);
   }
 
   /**
@@ -380,14 +417,16 @@ public class ColumnStats {
 
   public void update(Type colType, TColumnStats stats) {
     initColStats(colType);
-    avgSize_ = Double.valueOf(stats.getAvg_size()).floatValue();
-    if (colType.getPrimitiveType() == PrimitiveType.STRING ||
-        colType.getPrimitiveType() == PrimitiveType.BINARY) {
+    if (!colType.isFixedLengthType() && stats.getAvg_size() >= 0) {
+      // Update size estimates based on average size. Fixed length types already include
+      // size estimates.
+      avgSize_ = Double.valueOf(stats.getAvg_size()).floatValue();
       avgSerializedSize_ = colType.getSlotSize() + avgSize_;
     }
     maxSize_ = stats.getMax_size();
     numDistinctValues_ = stats.getNum_distinct_values();
     numNulls_ = stats.getNum_nulls();
+    validate(colType);
   }
 
   public TColumnStats toThrift() {
@@ -399,9 +438,31 @@ public class ColumnStats {
     return colStats;
   }
 
+  /**
+   * Check that the stats obey expected invariants.
+   * 'colType' is optional, but should be passed in if it is available in the caller.
+   */
+  public void validate(Type colType) {
+    // avgSize_ and avgSerializedSize_ must be set together.
+    Preconditions.checkState(avgSize_ >= 0 == avgSerializedSize_ >= 0, this);
+
+    // Values must be either valid or -1.
+    Preconditions.checkState(avgSize_ == -1 || avgSize_ >= 0, this);
+    Preconditions.checkState(avgSerializedSize_ == -1 || avgSerializedSize_ >= 0, this);
+    Preconditions.checkState(maxSize_ == -1 || maxSize_ >= 0, this);
+    Preconditions.checkState(numDistinctValues_ == -1 || numDistinctValues_ >= 0, this);
+    Preconditions.checkState(numNulls_ == -1 || numNulls_ >= 0, this);
+    if (colType != null && colType.isFixedLengthType()) {
+      Preconditions.checkState(avgSize_ == colType.getSlotSize(), this);
+      Preconditions.checkState(avgSerializedSize_ == colType.getSlotSize(), this);
+      Preconditions.checkState(maxSize_ == colType.getSlotSize(), this);
+    }
+  }
+
   @Override
   public String toString() {
     return Objects.toStringHelper(this.getClass())
+        .add("avgSize_", avgSize_)
         .add("avgSerializedSize_", avgSerializedSize_)
         .add("maxSize_", maxSize_)
         .add("numDistinct_", numDistinctValues_)
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
index 76016c1..5cbcc37 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
@@ -54,7 +54,8 @@ public class PlannerTest extends PlannerTestBase {
    */
   @Test
   public void testScanCardinality() {
-    runPlannerTestFile("card-scan");
+    runPlannerTestFile("card-scan",
+        ImmutableSet.of(PlannerTestOption.VALIDATE_CARDINALITY));
   }
 
   /**
@@ -62,7 +63,8 @@ public class PlannerTest extends PlannerTestBase {
    */
   @Test
   public void testInnerJoinCardinality() {
-    runPlannerTestFile("card-inner-join");
+    runPlannerTestFile("card-inner-join",
+        ImmutableSet.of(PlannerTestOption.VALIDATE_CARDINALITY));
   }
 
   /**
@@ -70,7 +72,8 @@ public class PlannerTest extends PlannerTestBase {
    */
   @Test
   public void testOuterJoinCardinality() {
-    runPlannerTestFile("card-outer-join");
+    runPlannerTestFile("card-outer-join",
+        ImmutableSet.of(PlannerTestOption.VALIDATE_CARDINALITY));
   }
 
   /**
@@ -78,7 +81,8 @@ public class PlannerTest extends PlannerTestBase {
    */
   @Test
   public void testMultiJoinCardinality() {
-    runPlannerTestFile("card-multi-join");
+    runPlannerTestFile("card-multi-join",
+        ImmutableSet.of(PlannerTestOption.VALIDATE_CARDINALITY));
   }
 
   @Test
@@ -870,16 +874,16 @@ public class PlannerTest extends PlannerTestBase {
     // Single key string column with max length stat.
     HBaseColumn stringColwithSmallMaxSize = new HBaseColumn("",
         FeHBaseTable.Util.ROW_KEY_COLUMN_FAMILY, "", false, Type.STRING, "", 1);
-    stringColwithSmallMaxSize.getStats().update(ColumnStats.StatsKey.MAX_SIZE,
-        Long.valueOf(50));
+    stringColwithSmallMaxSize.getStats().update(
+        Type.STRING, ColumnStats.StatsKey.MAX_SIZE, Long.valueOf(50));
     assertEquals(HBaseScanNode.memoryEstimateForFetchingColumns(Lists
         .newArrayList(stringColwithSmallMaxSize)), 128);
 
     // Case that triggers the upper bound if some columns have stats are missing.
     HBaseColumn stringColwithLargeMaxSize = new HBaseColumn("",
         FeHBaseTable.Util.ROW_KEY_COLUMN_FAMILY, "", false, Type.STRING, "", 1);
-    stringColwithLargeMaxSize.getStats().update(ColumnStats.StatsKey.MAX_SIZE,
-        Long.valueOf(128 * 1024 * 1024));
+    stringColwithLargeMaxSize.getStats().update(
+        Type.STRING, ColumnStats.StatsKey.MAX_SIZE, Long.valueOf(128 * 1024 * 1024));
     assertEquals(HBaseScanNode.memoryEstimateForFetchingColumns(Lists.newArrayList(
         stringColwithLargeMaxSize, stringColWithoutStats)), 128 * 1024 * 1024);
 
diff --git a/testdata/bin/compute-table-stats.sh b/testdata/bin/compute-table-stats.sh
index 2d6733a..d7e8cf8 100755
--- a/testdata/bin/compute-table-stats.sh
+++ b/testdata/bin/compute-table-stats.sh
@@ -34,7 +34,7 @@ COMPUTE_STATS_SCRIPT="${IMPALA_HOME}/tests/util/compute_table_stats.py --impalad
 ${COMPUTE_STATS_SCRIPT} --db_names=functional\
     --table_names="alltypes,alltypesagg,alltypesaggmultifilesnopart,alltypesaggnonulls,
     alltypessmall,alltypestiny,jointbl,dimtbl,stringpartitionkey,nulltable,nullrows,
-    date_tbl"
+    date_tbl,chars_medium"
 
 # We cannot load HBase on s3 and isilon yet.
 if [ "${TARGET_FILESYSTEM}" = "hdfs" ]; then
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/card-scan.test b/testdata/workloads/functional-planner/queries/PlannerTest/card-scan.test
index fb21e84..0987b27 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/card-scan.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/card-scan.test
@@ -8,7 +8,7 @@ select * from tpch.customer
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer]
-   partitions=1/1 files=1 size=23.08MB
+   HDFS partitions=1/1 files=1 size=23.08MB
    row-size=218B cardinality=150.00K
 ====
 # Predicate on a single value: card = |T|/ndv
@@ -20,7 +20,7 @@ where c.c_custkey = 10
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
-   partitions=1/1 files=1 size=23.08MB
+   HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_custkey = 10
    row-size=218B cardinality=1
 ====
@@ -33,7 +33,7 @@ where c.c_nationkey = 10
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
-   partitions=1/1 files=1 size=23.08MB
+   HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_nationkey = 10
    row-size=218B cardinality=6.00K
 ====
@@ -45,7 +45,7 @@ where c.c_custkey = 10 OR c.c_custkey = 20
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
-   partitions=1/1 files=1 size=23.08MB
+   HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_custkey IN (10, 20)
    row-size=218B cardinality=2
 ====
@@ -59,7 +59,7 @@ where c.c_custkey = 10
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
-   partitions=1/1 files=1 size=23.08MB
+   HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_custkey = 10 OR c.c_nationkey = 10
    row-size=218B cardinality=6.00K
 ====
@@ -77,7 +77,7 @@ where t.id = 10
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [functional.alltypestiny t]
-   partitions=4/4 files=4 size=460B
+   HDFS partitions=4/4 files=4 size=460B
    predicates: t.id = 10 OR t.bool_col = TRUE
    row-size=89B cardinality=5
 ====
@@ -89,7 +89,7 @@ where c.c_custkey in (10, 20, 30)
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
-   partitions=1/1 files=1 size=23.08MB
+   HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_custkey IN (10, 20, 30)
    row-size=218B cardinality=3
 ====
@@ -102,7 +102,7 @@ where c.c_custkey in (10, 20, 30, 30, 10, 20)
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
-   partitions=1/1 files=1 size=23.08MB
+   HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_custkey IN (10, 20, 30, 30, 10, 20)
    row-size=218B cardinality=6
 ====
@@ -114,7 +114,7 @@ where c.c_custkey = 10 OR c.c_custkey = 10
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
-   partitions=1/1 files=1 size=23.08MB
+   HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_custkey = 10
    row-size=218B cardinality=1
 ====
@@ -128,7 +128,7 @@ where c.c_custkey = 10 OR 10 = c.c_custkey
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
-   partitions=1/1 files=1 size=23.08MB
+   HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_custkey IN (10, 10)
    row-size=218B cardinality=2
 ====
@@ -149,7 +149,7 @@ where c.c_custkey = 10 AND c.c_custkey = 10
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
-   partitions=1/1 files=1 size=23.08MB
+   HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_custkey = 10
    row-size=218B cardinality=1
 ====
@@ -165,7 +165,7 @@ where id != 10
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [functional.alltypestiny]
-   partitions=4/4 files=4 size=460B
+   HDFS partitions=4/4 files=4 size=460B
    predicates: id != 10
    row-size=89B cardinality=1
 ====
@@ -179,7 +179,7 @@ where c.c_custkey < 1234
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
-   partitions=1/1 files=1 size=23.08MB
+   HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_custkey < 1234
    row-size=218B cardinality=15.00K
 ====
@@ -194,7 +194,7 @@ where c.c_custkey < 1234
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
-   partitions=1/1 files=1 size=23.08MB
+   HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_custkey < 1234
    row-size=218B cardinality=15.00K
 ====
@@ -209,7 +209,7 @@ where c.c_custkey < 1234
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
-   partitions=1/1 files=1 size=23.08MB
+   HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_custkey < 1234
    row-size=218B cardinality=15.00K
 ====
@@ -224,7 +224,7 @@ where c.c_custkey < 1234
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
-   partitions=1/1 files=1 size=23.08MB
+   HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_custkey < 1234, c.c_custkey < 2345
    row-size=218B cardinality=15.00K
 ====
@@ -240,7 +240,7 @@ where c.c_custkey < 1234
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
-   partitions=1/1 files=1 size=23.08MB
+   HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_custkey < 1234, c.c_nationkey < 100
    row-size=218B cardinality=15.00K
 ====
@@ -257,7 +257,7 @@ where c.c_custkey between 1234 and 2345
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
-   partitions=1/1 files=1 size=23.08MB
+   HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_custkey <= 2345, c.c_custkey >= 1234
    row-size=218B cardinality=15.00K
 ====
@@ -278,7 +278,7 @@ where c.c_custkey >= 1234 and c.c_custkey <= 2345
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
-   partitions=1/1 files=1 size=23.08MB
+   HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_custkey <= 2345, c.c_custkey >= 1234
    row-size=218B cardinality=15.00K
 ====
@@ -292,7 +292,7 @@ where c.c_custkey between 1234 and 2345
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
-   partitions=1/1 files=1 size=23.08MB
+   HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_custkey <= 2345, c.c_custkey >= 1234
    row-size=218B cardinality=15.00K
 ====
@@ -309,7 +309,7 @@ PLAN-ROOT SINK
 |
 00:SCAN HDFS [functional.alltypesagg a]
    partition predicates: a.`day` >= 6
-   partitions=5/11 files=5 size=372.38KB
+   HDFS partitions=5/11 files=5 size=372.38KB
    row-size=95B cardinality=5.00K
 ====
 # Partitioned table, one partition matches
@@ -321,7 +321,7 @@ PLAN-ROOT SINK
 |
 00:SCAN HDFS [functional.alltypesagg a]
    partition predicates: a.`day` = 6
-   partitions=1/11 files=1 size=74.48KB
+   HDFS partitions=1/11 files=1 size=74.48KB
    row-size=95B cardinality=1.00K
 ====
 # Partitioned table, no partitions match
@@ -350,7 +350,7 @@ where c.c_mktsegment is null
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
-   partitions=1/1 files=1 size=23.08MB
+   HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_mktsegment IS NULL
    row-size=218B cardinality=15.00K
 ====
@@ -364,7 +364,7 @@ where c.c_mktsegment is not null
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
-   partitions=1/1 files=1 size=23.08MB
+   HDFS partitions=1/1 files=1 size=23.08MB
    predicates: c.c_mktsegment IS NOT NULL
    row-size=218B cardinality=15.00K
 ====
@@ -376,7 +376,7 @@ where tinyint_col is null
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [functional.alltypesagg]
-   partitions=11/11 files=11 size=814.73KB
+   HDFS partitions=11/11 files=11 size=814.73KB
    predicates: tinyint_col IS NULL
    row-size=95B cardinality=2.00K
 ====
@@ -388,7 +388,7 @@ where tinyint_col is not null
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [functional.alltypesagg]
-   partitions=11/11 files=11 size=814.73KB
+   HDFS partitions=11/11 files=11 size=814.73KB
    predicates: tinyint_col IS NOT NULL
    row-size=95B cardinality=9.00K
 ====
@@ -400,7 +400,7 @@ where concat(c.c_mktsegment, c_comment) is null
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
-   partitions=1/1 files=1 size=23.08MB
+   HDFS partitions=1/1 files=1 size=23.08MB
    predicates: concat(c.c_mktsegment, c_comment) IS NULL
    row-size=218B cardinality=15.00K
 ====
@@ -414,7 +414,7 @@ where concat(c.c_mktsegment, c_comment) is not null
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [tpch.customer c]
-   partitions=1/1 files=1 size=23.08MB
+   HDFS partitions=1/1 files=1 size=23.08MB
    predicates: concat(c.c_mktsegment, c_comment) IS NOT NULL
    row-size=218B cardinality=15.00K
 ====
@@ -425,7 +425,7 @@ from functional.alltypesnopart
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [functional.alltypesnopart]
-   partitions=1/1 files=0 size=0B
+   HDFS partitions=1/1 files=0 size=0B
    row-size=72B cardinality=0
 ====
 # Filter on the no-stats table
@@ -436,7 +436,17 @@ where int_col = 10
 PLAN-ROOT SINK
 |
 00:SCAN HDFS [functional.alltypesnopart]
-   partitions=1/1 files=0 size=0B
+   HDFS partitions=1/1 files=0 size=0B
    predicates: int_col = 10
    row-size=72B cardinality=0
 ====
+# VARCHAR column with stats
+# Regression test for IMPALA-8849 - previously produced a negative row size.
+select varchar_col from functional.chars_medium
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [functional.chars_medium]
+   HDFS partitions=1/1 files=1 size=320.68KB
+   row-size=15B cardinality=11.00K
+====
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/empty.test b/testdata/workloads/functional-planner/queries/PlannerTest/empty.test
index 964b7f9..cfb9843 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/empty.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/empty.test
@@ -100,7 +100,7 @@ PLAN-ROOT SINK
 |--01:EMPTYSET
 |
 00:SCAN HDFS [functional.alltypessmall f]
-   partitions=4/4 files=4 size=6.32KB
+   HDFS partitions=4/4 files=4 size=6.32KB
    runtime filters: RF000 -> f.id
    row-size=4B cardinality=100
 ====
@@ -118,11 +118,11 @@ PLAN-ROOT SINK
 |  row-size=89B cardinality=108
 |
 |--02:SCAN HDFS [functional.alltypestiny]
-|     partitions=4/4 files=4 size=460B
+|     HDFS partitions=4/4 files=4 size=460B
 |     row-size=89B cardinality=8
 |
 01:SCAN HDFS [functional.alltypessmall]
-   partitions=4/4 files=4 size=6.32KB
+   HDFS partitions=4/4 files=4 size=6.32KB
    row-size=89B cardinality=100
 ====
 # Constant conjunct turns union into an empty-set node.
@@ -145,7 +145,7 @@ PLAN-ROOT SINK
 |--01:EMPTYSET
 |
 00:SCAN HDFS [functional.alltypes a]
-   partitions=24/24 files=24 size=478.45KB
+   HDFS partitions=24/24 files=24 size=478.45KB
    row-size=89B cardinality=7.30K
 ====
 # Constant conjunct in the ON-clause of an outer join is
@@ -163,11 +163,11 @@ PLAN-ROOT SINK
 |  row-size=178B cardinality=100
 |
 |--01:SCAN HDFS [functional.alltypestiny b]
-|     partitions=4/4 files=4 size=460B
+|     HDFS partitions=4/4 files=4 size=460B
 |     row-size=89B cardinality=8
 |
 00:SCAN HDFS [functional.alltypessmall a]
-   partitions=4/4 files=4 size=6.32KB
+   HDFS partitions=4/4 files=4 size=6.32KB
    row-size=89B cardinality=100
 ====
 # Constant conjunct in the ON-clause of an outer join is
@@ -186,11 +186,11 @@ PLAN-ROOT SINK
 |  row-size=178B cardinality=9
 |
 |--01:SCAN HDFS [functional.alltypestiny b]
-|     partitions=4/4 files=4 size=460B
+|     HDFS partitions=4/4 files=4 size=460B
 |     row-size=89B cardinality=8
 |
 00:SCAN HDFS [functional.alltypessmall a]
-   partitions=4/4 files=4 size=6.32KB
+   HDFS partitions=4/4 files=4 size=6.32KB
    runtime filters: RF000 -> a.id
    row-size=89B cardinality=100
 ====
@@ -209,11 +209,11 @@ PLAN-ROOT SINK
 |  row-size=178B cardinality=108
 |
 |--01:SCAN HDFS [functional.alltypestiny b]
-|     partitions=4/4 files=4 size=460B
+|     HDFS partitions=4/4 files=4 size=460B
 |     row-size=89B cardinality=8
 |
 00:SCAN HDFS [functional.alltypessmall a]
-   partitions=4/4 files=4 size=6.32KB
+   HDFS partitions=4/4 files=4 size=6.32KB
    row-size=89B cardinality=100
 ====
 # Limit 0 turns query block into an empty-set node.
@@ -257,7 +257,7 @@ PLAN-ROOT SINK
 |--01:EMPTYSET
 |
 00:SCAN HDFS [functional.alltypessmall f]
-   partitions=4/4 files=4 size=6.32KB
+   HDFS partitions=4/4 files=4 size=6.32KB
    runtime filters: RF000 -> f.id
    row-size=4B cardinality=100
 ====
@@ -275,11 +275,11 @@ PLAN-ROOT SINK
 |  row-size=89B cardinality=108
 |
 |--02:SCAN HDFS [functional.alltypestiny]
-|     partitions=4/4 files=4 size=460B
+|     HDFS partitions=4/4 files=4 size=460B
 |     row-size=89B cardinality=8
 |
 01:SCAN HDFS [functional.alltypessmall]
-   partitions=4/4 files=4 size=6.32KB
+   HDFS partitions=4/4 files=4 size=6.32KB
    row-size=89B cardinality=100
 ====
 # Limit 0 causes empty-set union.
@@ -308,7 +308,7 @@ PLAN-ROOT SINK
 |  row-size=8B cardinality=11.00K
 |
 |--03:SCAN HDFS [functional.alltypesagg]
-|     partitions=11/11 files=11 size=814.73KB
+|     HDFS partitions=11/11 files=11 size=814.73KB
 |     row-size=4B cardinality=11.00K
 |
 02:AGGREGATE [FINALIZE]
@@ -428,14 +428,14 @@ PLAN-ROOT SINK
 |
 02:UNION
 |  pass-through-operands: all
-|  row-size=2B cardinality=0
+|  row-size=4B cardinality=0
 |
 01:AGGREGATE [FINALIZE]
 |  group by: lead(-496, 81, NULL) OVER(...)
-|  row-size=2B cardinality=0
+|  row-size=4B cardinality=0
 |
 00:UNION
-   row-size=2B cardinality=0
+   row-size=4B cardinality=0
 ====
 # IMPALA-2088: Test empty union operands with analytic functions.
 select lead(-496, 81) over (order by t1.double_col desc, t1.id asc)
@@ -456,18 +456,18 @@ PLAN-ROOT SINK
 02:UNION
 |  constant-operands=1
 |  pass-through-operands: 01
-|  row-size=2B cardinality=9
+|  row-size=16B cardinality=9
 |
 |--03:SCAN HDFS [functional.alltypestiny]
-|     partitions=4/4 files=4 size=460B
+|     HDFS partitions=4/4 files=4 size=460B
 |     row-size=8B cardinality=8
 |
 01:AGGREGATE [FINALIZE]
 |  group by: lead(-496, 81, NULL) OVER(...)
-|  row-size=2B cardinality=0
+|  row-size=16B cardinality=0
 |
 00:UNION
-   row-size=2B cardinality=0
+   row-size=16B cardinality=0
 ====
 # IMPALA-2216: Make sure the final output exprs are substituted, even
 # if the resulting plan is an EmptySetNode.
@@ -556,7 +556,7 @@ PLAN-ROOT SINK
 |  03:EMPTYSET
 |
 00:SCAN HDFS [tpch_nested_parquet.customer c]
-   partitions=1/1 files=4 size=288.99MB
+   HDFS partitions=1/1 files=4 size=288.98MB
    predicates: c_custkey < 10
    row-size=56B cardinality=15.00K
 ====
@@ -598,7 +598,7 @@ PLAN-ROOT SINK
 |     row-size=8B cardinality=10
 |
 00:SCAN HDFS [tpch_nested_parquet.customer c]
-   partitions=1/1 files=4 size=288.99MB
+   HDFS partitions=1/1 files=4 size=288.98MB
    predicates: c_custkey = 1
    row-size=44B cardinality=1
 ====
@@ -632,6 +632,6 @@ PLAN-ROOT SINK
 |--01:EMPTYSET
 |
 00:SCAN HDFS [functional.alltypes x]
-   partitions=24/24 files=24 size=478.45KB
+   HDFS partitions=24/24 files=24 size=478.45KB
    row-size=0B cardinality=7.30K
 ====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test b/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test
index e76170a..ce80222 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test
@@ -549,7 +549,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
 'id','INT',2915,0,4,4
 'ch1','CHAR(1)',1,0,1,1
 'ch2','CHAR(8)',10,0,8,8
-'ch3','CHAR(20)',10,0,8,8
+'ch3','CHAR(20)',10,0,20,20
 'ts','TIMESTAMP',2871,0,16,16
 'vc1','VARCHAR(1)',1,0,1,1
 'vc2','VARCHAR(8)',10,0,8,8
@@ -578,8 +578,8 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
 ---- RESULTS
 'id','INT',2915,1,4,4
 'ch1','CHAR(1)',2,0,1,1
-'ch2','CHAR(8)',11,0,8,7.99766731262207
-'ch3','CHAR(20)',11,0,8,7.99766731262207
+'ch2','CHAR(8)',11,0,8,8
+'ch3','CHAR(20)',11,0,20,20
 'ts','TIMESTAMP',2871,1,16,16
 'vc1','VARCHAR(1)',2,0,1,1
 'vc2','VARCHAR(8)',11,0,8,7.99766731262207
diff --git a/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test b/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test
index 01e5aa8..38d1024 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test
@@ -794,7 +794,7 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
 'id','INT',2915,0,4,4
 'ch1','CHAR(1)',1,0,1,1
 'ch2','CHAR(8)',10,0,8,8
-'ch3','CHAR(20)',10,0,8,8
+'ch3','CHAR(20)',10,0,20,20
 'ts','TIMESTAMP',2871,0,16,16
 'vc1','VARCHAR(1)',1,0,1,1
 'vc2','VARCHAR(8)',10,0,8,8