You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by db...@apache.org on 2022/12/15 15:05:09 UTC
[impala] branch master updated: IMPALA-11717: Use rapidjson for printing collections

This is an automated email from the ASF dual-hosted git repository.

dbecker pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 25b5058ef IMPALA-11717: Use rapidjson for printing collections
25b5058ef is described below

commit 25b5058ef5006c9541f4825860e7561812dafa60
Author: Daniel Becker <da...@cloudera.com>
AuthorDate: Thu Nov 10 01:35:32 2022 +0100

    IMPALA-11717: Use rapidjson for printing collections
    
    We have been using rapidjson to print structs but didn't use it to print
    collections (arrays and maps).
    
    This change introduces the usage of rapidjson to print collections for
    both the HS2 and the Beeswax protocol.
    
    The old code handling the printing of collections in raw-value.{h,cc} is
    removed.
    
    Testing:
     - Ran existing EE tests
     - Added EE tests with non-string and NULL map keys in
       nested-map-in-select-list.test and map_null_keys.test.
    
    Change-Id: I08a2d596a498fbbaf1419b18284846b992f49165
    Reviewed-on: http://gerrit.cloudera.org:8080/19309
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Daniel Becker <da...@cloudera.com>
---
 be/src/runtime/complex-value-writer.h              |  63 +++++++
 be/src/runtime/complex-value-writer.inline.h       | 186 +++++++++++++++++++++
 be/src/runtime/raw-value.cc                        |  85 +---------
 be/src/runtime/raw-value.h                         |  12 +-
 be/src/service/hs2-util.cc                         |  70 +++-----
 be/src/service/query-result-set.cc                 |  25 ++-
 be/src/service/query-result-set.h                  |   7 +-
 .../functional/functional_schema_template.sql      |  74 +++++++-
 .../datasets/functional/schema_constraints.csv     |   4 +
 .../queries/QueryTest/map_null_keys.test           |  24 +++
 .../QueryTest/nested-map-in-select-list.test       |  22 +++
 tests/query_test/test_nested_types.py              |   6 +
 12 files changed, 435 insertions(+), 143 deletions(-)

diff --git a/be/src/runtime/complex-value-writer.h b/be/src/runtime/complex-value-writer.h
new file mode 100644
index 000000000..0de297429
--- /dev/null
+++ b/be/src/runtime/complex-value-writer.h
@@ -0,0 +1,63 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <type_traits>
+
+#include <rapidjson/ostreamwrapper.h>
+#include <rapidjson/rapidjson.h>
+#include <rapidjson/stringbuffer.h>
+#include <rapidjson/writer.h>
+
+#include "runtime/row-batch.h"
+#include "runtime/tuple.h"
+#include "udf/udf-internal.h"
+
+namespace impala {
+
+// Class with static methods that write complex types (collections and structs) in JSON
+// format.
+template <class JsonStream>
+class ComplexValueWriter {
+ public:
+  // Gets a non-null CollectionValue and writes it in JSON format using 'writer'.
+  // 'collection_type' should be either TYPE_ARRAY or TYPE_MAP.
+  static void CollectionValueToJSON(const CollectionValue& collection_value,
+      PrimitiveType collection_type, const TupleDescriptor* item_tuple_desc,
+      rapidjson::Writer<JsonStream>* writer);
+
+  // Gets a non-null StructVal and writes it in JSON format using 'writer'. Uses
+  // 'column_type' to figure out field names and types. This function can call itself
+  // recursively in case of nested structs.
+  static void StructValToJSON(const impala_udf::StructVal& struct_val,
+      const ColumnType& column_type, rapidjson::Writer<JsonStream>* writer);
+
+ private:
+  static void PrimitiveValueToJSON(void* value, const ColumnType& type, bool map_key,
+      rapidjson::Writer<JsonStream>* writer);
+  static void WriteNull(rapidjson::Writer<JsonStream>* writer);
+  static void CollectionElementToJSON(Tuple* item_tuple, const SlotDescriptor& slot_desc,
+      bool map_key, rapidjson::Writer<JsonStream>* writer);
+  static void ArrayValueToJSON(const CollectionValue& array_value,
+    const TupleDescriptor* item_tuple_desc, rapidjson::Writer<JsonStream>* writer);
+  static void MapValueToJSON(const CollectionValue& map_value,
+      const TupleDescriptor* item_tuple_desc, rapidjson::Writer<JsonStream>* writer);
+
+};
+
+} // namespace impala
diff --git a/be/src/runtime/complex-value-writer.inline.h b/be/src/runtime/complex-value-writer.inline.h
new file mode 100644
index 000000000..52625bf27
--- /dev/null
+++ b/be/src/runtime/complex-value-writer.inline.h
@@ -0,0 +1,186 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "runtime/complex-value-writer.h"
+
+#include <string>
+
+#include "runtime/raw-value.inline.h"
+
+namespace impala {
+
+using impala_udf::StructVal;
+
+namespace {
+
+bool IsPrimitiveTypePrintedAsString(const ColumnType& type) {
+  return type.IsStringType() || type.IsDateType() || type.IsTimestampType();
+}
+
+} // anonymous namespace
+
+// Writes a primitive value using 'writer'. If 'map_key' is true, treats the value as a
+// map key. Maps are printed as JSON objects, but in Impala maps can have non-string keys
+// while JSON objects can't. We circumvent this rule by writing raw values with the
+// 'rapidjson::kStringType' type when printing map keys. These non-string key values will
+// not have quotes around them in the printed text.
+template <class JsonStream>
+void ComplexValueWriter<JsonStream>::PrimitiveValueToJSON(void* value,
+    const ColumnType& type, bool map_key, rapidjson::Writer<JsonStream>* writer) {
+  if (type.IsBooleanType() && !map_key) {
+    writer->Bool( *(reinterpret_cast<bool*>(value)) );
+    return;
+  }
+
+  string tmp;
+  // Passing -1 as 'scale' means we do not modify the precision setting of the stream
+  // (used when printing floating point values).
+  constexpr int scale = -1;
+  RawValue::PrintValue(value, type, scale, &tmp);
+  if (IsPrimitiveTypePrintedAsString(type)) {
+    writer->String(tmp.c_str());
+  } else {
+    writer->RawValue(tmp.c_str(), tmp.size(),
+        map_key ? rapidjson::kStringType : rapidjson::kNumberType);
+  }
+}
+
+template <class JsonStream>
+void ComplexValueWriter<JsonStream>::WriteNull(rapidjson::Writer<JsonStream>* writer) {
+  // We don't use writer->Null() because that does not work for map keys. Maps are
+  // modelled as JSON objects but JSON objects can only have string keys, not nulls. We
+  // circumvent this limitation by writing a raw value.
+  constexpr char const* null_str  = RawValue::NullLiteral(/*top_level*/ false);
+  const int null_str_len = std::char_traits<char>::length(null_str);
+  writer->RawValue(null_str, null_str_len, rapidjson::kStringType);
+}
+
+template <class JsonStream>
+void ComplexValueWriter<JsonStream>::CollectionElementToJSON(Tuple* item_tuple,
+    const SlotDescriptor& slot_desc, bool map_key,
+    rapidjson::Writer<JsonStream>* writer) {
+  void* element = item_tuple->GetSlot(slot_desc.tuple_offset());
+  DCHECK(element != nullptr);
+  const ColumnType& element_type = slot_desc.type();
+  bool element_is_null = item_tuple->IsNull(slot_desc.null_indicator_offset());
+  if (element_is_null) {
+    WriteNull(writer);
+  } else if (element_type.IsStructType()) {
+    DCHECK(false) << "Structs in collections are not supported yet.";
+  } else if (element_type.IsCollectionType()) {
+    const CollectionValue* nested_collection_val =
+      reinterpret_cast<CollectionValue*>(element);
+    const TupleDescriptor* child_item_tuple_desc =
+      slot_desc.children_tuple_descriptor();
+    DCHECK(child_item_tuple_desc != nullptr);
+    ComplexValueWriter::CollectionValueToJSON(*nested_collection_val, element_type.type,
+        child_item_tuple_desc, writer);
+  } else {
+    PrimitiveValueToJSON(element, element_type, map_key, writer);
+  }
+}
+
+// Gets an ArrayValue and writes it in JSON format using 'writer'.
+template <class JsonStream>
+void ComplexValueWriter<JsonStream>::ArrayValueToJSON(const CollectionValue& array_value,
+    const TupleDescriptor* item_tuple_desc,
+    rapidjson::Writer<JsonStream>* writer) {
+  DCHECK(item_tuple_desc != nullptr);
+
+  const int item_byte_size = item_tuple_desc->byte_size();
+  const vector<SlotDescriptor*>& slot_descs = item_tuple_desc->slots();
+
+  DCHECK(slot_descs.size() == 1);
+  DCHECK(slot_descs[0] != nullptr);
+
+  writer->StartArray();
+
+  for (int i = 0; i < array_value.num_tuples; ++i) {
+    Tuple* item_tuple = reinterpret_cast<Tuple*>(
+        array_value.ptr + i * item_byte_size);
+    // Print the value.
+    const SlotDescriptor& value_slot_desc = *slot_descs[0];
+    CollectionElementToJSON(item_tuple, value_slot_desc, false, writer);
+  }
+
+  writer->EndArray();
+}
+
+// Gets a MapValue and writes it in JSON format using 'writer'.
+template <class JsonStream>
+void ComplexValueWriter<JsonStream>::MapValueToJSON(const CollectionValue& map_value,
+    const TupleDescriptor* item_tuple_desc, rapidjson::Writer<JsonStream>* writer) {
+  DCHECK(item_tuple_desc != nullptr);
+
+  const int item_byte_size = item_tuple_desc->byte_size();
+  const vector<SlotDescriptor*>& slot_descs = item_tuple_desc->slots();
+
+  DCHECK(slot_descs.size() == 2);
+  DCHECK(slot_descs[0] != nullptr);
+  DCHECK(slot_descs[1] != nullptr);
+
+  writer->StartObject();
+
+  for (int i = 0; i < map_value.num_tuples; ++i) {
+    Tuple* item_tuple = reinterpret_cast<Tuple*>(
+        map_value.ptr + i * item_byte_size);
+    // Print key.
+    const SlotDescriptor& key_slot_desc = *slot_descs[0];
+    CollectionElementToJSON(item_tuple, key_slot_desc, true, writer);
+
+    // Print the value.
+    const SlotDescriptor& value_slot_desc = *slot_descs[1];
+    CollectionElementToJSON(item_tuple, value_slot_desc, false, writer);
+  }
+
+  writer->EndObject();
+}
+
+template <class JsonStream>
+void ComplexValueWriter<JsonStream>::CollectionValueToJSON(
+    const CollectionValue& collection_value, PrimitiveType collection_type,
+    const TupleDescriptor* item_tuple_desc, rapidjson::Writer<JsonStream>* writer) {
+  if (collection_type == PrimitiveType::TYPE_MAP) {
+    MapValueToJSON(collection_value, item_tuple_desc, writer);
+  } else {
+    DCHECK_EQ(collection_type, PrimitiveType::TYPE_ARRAY);
+    ArrayValueToJSON(collection_value, item_tuple_desc, writer);
+  }
+}
+
+template <class JsonStream>
+void ComplexValueWriter<JsonStream>::StructValToJSON(const StructVal& struct_val,
+    const ColumnType& column_type, rapidjson::Writer<JsonStream>* writer) {
+  DCHECK(column_type.type == TYPE_STRUCT);
+  DCHECK_EQ(struct_val.num_children, column_type.children.size());
+  writer->StartObject();
+  for (int i = 0; i < struct_val.num_children; ++i) {
+    writer->String(column_type.field_names[i].c_str());
+    void* child = (void*)(struct_val.ptr[i]);
+    const ColumnType& child_type = column_type.children[i];
+    if (child == nullptr) {
+      WriteNull(writer);
+    } else if (child_type.IsStructType()) {
+      StructValToJSON(*((StructVal*)child), child_type, writer);
+    } else {
+      PrimitiveValueToJSON(child, child_type, false, writer);
+    }
+  }
+  writer->EndObject();
+}
+
+} // namespace impala
diff --git a/be/src/runtime/raw-value.cc b/be/src/runtime/raw-value.cc
index 9a19753e9..32ca90e04 100644
--- a/be/src/runtime/raw-value.cc
+++ b/be/src/runtime/raw-value.cc
@@ -40,17 +40,6 @@ constexpr float RawValue::CANONICAL_FLOAT_NAN;
 constexpr double RawValue::CANONICAL_DOUBLE_ZERO;
 constexpr float RawValue::CANONICAL_FLOAT_ZERO;
 
-namespace {
-
-// Top level null values are printed as "NULL"; collections and structs are printed in
-// JSON format, which requires "null".
-constexpr const char* NullLiteral(bool top_level) {
-  if (top_level) return "NULL";
-  return "null";
-}
-
-}
-
 void RawValue::PrintValueAsBytes(const void* value, const ColumnType& type,
                                  stringstream* stream) {
   if (value == NULL) return;
@@ -425,80 +414,8 @@ template void RawValue::WritePrimitive<false>(const void* value, Tuple* tuple,
 bool PrintNestedValueIfNull(const SlotDescriptor& slot_desc, Tuple* item,
     stringstream* stream) {
   bool is_null = item->IsNull(slot_desc.null_indicator_offset());
-  if (is_null) *stream << NullLiteral(false);
+  if (is_null) *stream << RawValue::NullLiteral(false);
   return is_null;
 }
 
-void PrintNonNullNestedCollection(const SlotDescriptor& slot_desc, Tuple* item, int scale,
-    stringstream* stream) {
-  const CollectionValue* nested_collection_val =
-      item->GetCollectionSlot(slot_desc.tuple_offset());
-  DCHECK(nested_collection_val != nullptr);
-  const TupleDescriptor* child_item_tuple_desc =
-      slot_desc.children_tuple_descriptor();
-  DCHECK(child_item_tuple_desc != nullptr);
-  RawValue::PrintCollectionValue(nested_collection_val, child_item_tuple_desc, scale,
-      stream, slot_desc.type().IsMapType());
-}
-
-void PrintNonNullNestedPrimitive(const SlotDescriptor& slot_desc, Tuple* item, int scale,
-    stringstream* stream) {
-  RawValue::PrintValue(item->GetSlot(slot_desc.tuple_offset()), slot_desc.type(), scale,
-      stream, true);
-}
-
-void PrintNestedValue(const SlotDescriptor& slot_desc, Tuple* item, int scale,
-    stringstream* stream) {
-  bool is_null = PrintNestedValueIfNull(slot_desc, item, stream);
-  if (is_null) return;
-
-  if (slot_desc.type().IsCollectionType()) {
-    // The item is also an array or a map, recurse deeper if not NULL.
-    PrintNonNullNestedCollection(slot_desc, item, scale, stream);
-  } else if (!slot_desc.type().IsComplexType()) {
-    // The item is a scalar, print it with the usual PrintValue.
-    PrintNonNullNestedPrimitive(slot_desc, item, scale, stream);
-  } else {
-    DCHECK(false);
-  }
-}
-
-void RawValue::PrintCollectionValue(const CollectionValue* coll_val,
-    const TupleDescriptor* item_tuple_desc, int scale, stringstream *stream,
-    bool is_map) {
-  DCHECK(item_tuple_desc != nullptr);
-  if (coll_val == nullptr) {
-    // We only reach this code path if this is a top level collection. Otherwise
-    // PrintNestedValue() handles the printing of the NULL literal.
-    *stream << NullLiteral(true);
-    return;
-  }
-  int item_byte_size = item_tuple_desc->byte_size();
-
-  const vector<SlotDescriptor*>& slot_descs = item_tuple_desc->slots();
-  // TODO: This has to be changed once structs are supported too.
-  if (is_map) {
-    DCHECK(slot_descs.size() == 2);
-    DCHECK(slot_descs[0] != nullptr);
-    DCHECK(slot_descs[1] != nullptr);
-  } else {
-    DCHECK(slot_descs.size() == 1);
-    DCHECK(slot_descs[0] != nullptr);
-  }
-
-  *stream << (is_map ? "{" : "[");
-  for (int i = 0; i < coll_val->num_tuples; ++i) {
-    Tuple* item = reinterpret_cast<Tuple*>(coll_val->ptr + i * item_byte_size);
-
-    PrintNestedValue(*slot_descs[0], item, scale, stream);
-    if (is_map) {
-      *stream << ":";
-      PrintNestedValue(*slot_descs[1], item, scale, stream);
-    }
-
-    if (i < coll_val->num_tuples - 1) *stream << ",";
-  }
-  *stream << (is_map ? "}" : "]");
-}
-
 }
diff --git a/be/src/runtime/raw-value.h b/be/src/runtime/raw-value.h
index 8b99e7c25..b3edd9033 100644
--- a/be/src/runtime/raw-value.h
+++ b/be/src/runtime/raw-value.h
@@ -67,12 +67,6 @@ class RawValue {
     return str;
   }
 
-  /// Similar to PrintValue() but works with collection values.
-  /// Converts 'coll_val' collection into ascii and writes to 'stream'.
-  static void PrintCollectionValue(const CollectionValue* coll_val,
-      const TupleDescriptor* item_tuple_desc, int scale, std::stringstream *stream,
-      bool is_map);
-
   /// Writes the byte representation of a value to a stringstream character-by-character
   static void PrintValueAsBytes(const void* value, const ColumnType& type,
                                 std::stringstream* stream);
@@ -163,6 +157,12 @@ class RawValue {
   // Returns positive zero for floating point types.
   static inline const void* PositiveFloatingZero(const ColumnType& type);
 
+  // Top level null values are printed as "NULL"; collections and structs are printed in
+  // JSON format, which requires "null".
+  static constexpr const char* NullLiteral(bool top_level) {
+    return top_level ? "NULL" : "null";
+  }
+
 private:
   /// Recursive helper function for Write() to handle struct slots.
   template <bool COLLECT_STRING_VALS>
diff --git a/be/src/service/hs2-util.cc b/be/src/service/hs2-util.cc
index ebb059662..be9a0f233 100644
--- a/be/src/service/hs2-util.cc
+++ b/be/src/service/hs2-util.cc
@@ -18,6 +18,7 @@
 #include "service/hs2-util.h"
 
 #include <sstream>
+
 #include <rapidjson/rapidjson.h>
 #include <rapidjson/stringbuffer.h>
 #include <rapidjson/writer.h>
@@ -27,6 +28,7 @@
 #include "exprs/scalar-expr-evaluator.h"
 #include "gen-cpp/TCLIService_constants.h"
 #include "runtime/date-value.h"
+#include "runtime/complex-value-writer.inline.h"
 #include "runtime/decimal-value.inline.h"
 #include "runtime/raw-value.inline.h"
 #include "runtime/row-batch.h"
@@ -367,43 +369,13 @@ static void DecimalExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
   }
 }
 
-// Gets a StructVal and puts it's JSON format into 'out_stream'. Uses 'column_type' to
-// figure out field names and types. This functions can call itself recursively in case
-// of nested structs.
-static void StructValToJSON(const StructVal& struct_val, const ColumnType& column_type,
-    rapidjson::Writer<rapidjson::StringBuffer>* writer) {
-  DCHECK(column_type.type == TYPE_STRUCT);
-  DCHECK_EQ(struct_val.num_children, column_type.children.size());
-  writer->StartObject();
-  for (int i = 0; i < struct_val.num_children; ++i) {
-    writer->String(column_type.field_names[i].c_str());
-    void* child = (void*)(struct_val.ptr[i]);
-    if (child == nullptr) {
-      writer->Null();
-    } else if (column_type.children[i].IsStructType()) {
-      StructValToJSON(*((StructVal*)child), column_type.children[i], writer);
-    } else {
-      string tmp;
-      RawValue::PrintValue(child, column_type.children[i], -1, &tmp);
-      const ColumnType& child_type = column_type.children[i];
-      if (child_type.IsStringType() || child_type.IsDateType() ||
-          child_type.IsTimestampType()) {
-        writer->String(tmp.c_str());
-      } else if (child_type.IsBooleanType()) {
-        writer->Bool( *(reinterpret_cast<bool*>(child)) );
-      } else {
-        writer->RawValue(tmp.c_str(), tmp.size(), rapidjson::kNumberType);
-      }
-    }
-  }
-  writer->EndObject();
-}
-
 static void StructExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
     const TColumnType& type, RowBatch* batch, int start_idx, int num_rows,
     uint32_t output_row_idx, apache::hive::service::cli::thrift::TColumn* column) {
   DCHECK(type.types.size() > 1);
   ReserveSpace(num_rows, output_row_idx, &column->stringVal);
+  // The buffer used by rapidjson::Writer. We reuse it to eliminate allocations.
+  rapidjson::StringBuffer buffer;
   FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
     StructVal struct_val = expr_eval->GetStructVal(it.Get());
     if (struct_val.is_null) {
@@ -411,9 +383,12 @@ static void StructExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
     } else {
       int idx = 0;
       ColumnType column_type(type.types, &idx);
-      rapidjson::StringBuffer buffer;
+
+      buffer.Clear();
       rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
-      StructValToJSON(struct_val, column_type, &writer);
+
+      ComplexValueWriter<rapidjson::StringBuffer>::StructValToJSON(
+          struct_val, column_type, &writer);
       column->stringVal.values.emplace_back(buffer.GetString());
     }
     SetNullBit(output_row_idx, struct_val.is_null, &column->stringVal.nulls);
@@ -423,10 +398,17 @@ static void StructExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
 
 static void CollectionExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
     const TColumnType& type, RowBatch* batch, int start_idx, int num_rows,
-    uint32_t output_row_idx, apache::hive::service::cli::thrift::TColumn* column,
-    bool is_map) {
+    uint32_t output_row_idx, apache::hive::service::cli::thrift::TColumn* column) {
   DCHECK(type.types.size() > 1);
+  TTypeNodeType::type coll_thrift_type = type.types[0].type;
+  DCHECK(coll_thrift_type == TTypeNodeType::ARRAY ||
+      coll_thrift_type == TTypeNodeType::MAP);
+  PrimitiveType coll_impala_type = coll_thrift_type == TTypeNodeType::ARRAY ?
+      PrimitiveType::TYPE_ARRAY : PrimitiveType::TYPE_MAP;
+
   ReserveSpace(num_rows, output_row_idx, &column->stringVal);
+  // The buffer used by rapidjson::Writer. We reuse it to eliminate allocations.
+  rapidjson::StringBuffer buffer;
   FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
     CollectionVal coll_val = expr_eval->GetCollectionVal(it.Get());
     if (coll_val.is_null) {
@@ -438,10 +420,13 @@ static void CollectionExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
       const TupleDescriptor* item_tuple_desc = scalar_expr.GetCollectionTupleDesc();
       DCHECK(item_tuple_desc != nullptr);
       CollectionValue value(coll_val);
-      // TODO: use rapidjson as in for structs
-      stringstream stream;
-      RawValue::PrintCollectionValue(&value, item_tuple_desc, -1, &stream, is_map);
-      column->stringVal.values.emplace_back(stream.str());
+
+      buffer.Clear();
+      rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
+
+      ComplexValueWriter<rapidjson::StringBuffer>::CollectionValueToJSON(
+          value, coll_impala_type, item_tuple_desc, &writer);
+      column->stringVal.values.emplace_back(buffer.GetString());
     }
     SetNullBit(output_row_idx, coll_val.is_null, &column->stringVal.nulls);
     ++output_row_idx;
@@ -462,12 +447,9 @@ void impala::ExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
           expr_eval, type, batch, start_idx, num_rows, output_row_idx, column);
       return;
     case TTypeNodeType::ARRAY:
-      CollectionExprValuesToHS2TColumn(
-          expr_eval, type, batch, start_idx, num_rows, output_row_idx, column, false);
-      return;
     case TTypeNodeType::MAP:
       CollectionExprValuesToHS2TColumn(
-          expr_eval, type, batch, start_idx, num_rows, output_row_idx, column, true);
+          expr_eval, type, batch, start_idx, num_rows, output_row_idx, column);
       return;
     default:
       break;
diff --git a/be/src/service/query-result-set.cc b/be/src/service/query-result-set.cc
index c67f416eb..0897a4d53 100644
--- a/be/src/service/query-result-set.cc
+++ b/be/src/service/query-result-set.cc
@@ -20,9 +20,14 @@
 #include <sstream>
 #include <boost/scoped_ptr.hpp>
 
+#include <rapidjson/ostreamwrapper.h>
+#include <rapidjson/rapidjson.h>
+#include <rapidjson/writer.h>
+
 #include "exprs/scalar-expr-evaluator.h"
 #include "exprs/slot-ref.h"
 #include "rpc/thrift-util.h"
+#include "runtime/complex-value-writer.inline.h"
 #include "runtime/descriptors.h"
 #include "runtime/raw-value.h"
 #include "runtime/row-batch.h"
@@ -198,8 +203,7 @@ Status AsciiQueryResultSet::AddRows(const vector<ScalarExprEvaluator*>& expr_eva
       } else if (metadata_.columns[i].columnType.types.size() > 1) {
         ColumnType col_type = ColumnType::FromThrift(metadata_.columns[i].columnType);
         DCHECK(col_type.IsArrayType() || col_type.IsMapType());
-        PrintCollectionValue(expr_evals[i], it.Get(), scales[i], &out_stream,
-            col_type.IsMapType());
+        PrintCollectionValue(expr_evals[i], it.Get(), &out_stream, col_type.type);
       } else {
         DCHECK(false);
       }
@@ -211,7 +215,9 @@ Status AsciiQueryResultSet::AddRows(const vector<ScalarExprEvaluator*>& expr_eva
 }
 
 void QueryResultSet::PrintCollectionValue(ScalarExprEvaluator* expr_eval,
-    const TupleRow* row, int scale, stringstream *stream, bool is_map) {
+    const TupleRow* row, stringstream *stream, PrimitiveType collection_type) {
+  DCHECK(collection_type == PrimitiveType::TYPE_ARRAY
+      || collection_type == PrimitiveType::TYPE_MAP);
   const ScalarExpr& scalar_expr = expr_eval->root();
   // Currently scalar_expr can be only a slot ref as no functions return arrays.
   DCHECK(scalar_expr.IsSlotRef());
@@ -220,7 +226,18 @@ void QueryResultSet::PrintCollectionValue(ScalarExprEvaluator* expr_eval,
   const CollectionValue* array_val =
       static_cast<const CollectionValue*>(expr_eval->GetValue(row));
 
-  RawValue::PrintCollectionValue(array_val, item_tuple_desc, scale, stream, is_map);
+  if (array_val != nullptr) {
+    rapidjson::BasicOStreamWrapper<stringstream> wrapped_stream(*stream);
+    rapidjson::Writer<rapidjson::BasicOStreamWrapper<stringstream>> writer(
+        wrapped_stream);
+
+    ComplexValueWriter<rapidjson::BasicOStreamWrapper<stringstream>>
+        ::CollectionValueToJSON(*array_val,
+            collection_type,
+            item_tuple_desc, &writer);
+  } else {
+    (*stream) << RawValue::NullLiteral(/*top-level*/ true);
+  }
 }
 
 Status AsciiQueryResultSet::AddOneRow(const TResultRow& row) {
diff --git a/be/src/service/query-result-set.h b/be/src/service/query-result-set.h
index 176e18c48..32f36fd73 100644
--- a/be/src/service/query-result-set.h
+++ b/be/src/service/query-result-set.h
@@ -23,6 +23,7 @@
 #include "gen-cpp/Results_types.h"
 #include "gen-cpp/TCLIService_types.h"
 #include "runtime/runtime-state.h"
+#include "runtime/types.h"
 
 #include <vector>
 #include <sstream>
@@ -78,10 +79,10 @@ class QueryResultSet {
       apache::hive::service::cli::thrift::TRowSet* rowset);
 
 protected:
-  /// Wrapper to call RawValue::PrintCollectionValue for a given collection column.
-  /// expr_eval must be a SlotRef on a collection slot.
+  /// Wrapper to callComplexValueWriter::CollectionValueToJSON() for a given collection
+  /// column. expr_eval must be a SlotRef on a collection slot.
   static void PrintCollectionValue(ScalarExprEvaluator* expr_eval, const TupleRow* row,
-      int scale, std::stringstream *stream, bool is_map);
+      std::stringstream *stream, PrimitiveType collection_type);
 
 };
 }
diff --git a/testdata/datasets/functional/functional_schema_template.sql b/testdata/datasets/functional/functional_schema_template.sql
index 5f52e6d51..7ada201b5 100644
--- a/testdata/datasets/functional/functional_schema_template.sql
+++ b/testdata/datasets/functional/functional_schema_template.sql
@@ -3700,9 +3700,21 @@ map_1d MAP<INT, STRING>
 map_2d MAP<INT,MAP<INT,STRING>>
 map_3d MAP<INT,MAP<INT,MAP<INT,STRING>>>
 map_map_array MAP<INT,MAP<INT,ARRAY<INT>>>
+map_bool_key MAP<BOOLEAN, STRING>
+map_tinyint_key MAP<TINYINT, STRING>
+map_smallint_key MAP<SMALLINT, STRING>
+map_bigint_key MAP<BIGINT, STRING>
+map_float_key MAP<FLOAT, STRING>
+map_double_key MAP<DOUBLE, STRING>
+map_decimal_key MAP<DECIMAL(2,1), STRING>
+map_string_key MAP<STRING, INT>
+map_char_key MAP<CHAR(3), INT>
+map_varchar_key MAP<VARCHAR(3), STRING>
+map_timestamp_key MAP<TIMESTAMP, STRING>
+map_date_key MAP<DATE, STRING>
 ---- DEPENDENT_LOAD_HIVE
 -- It would be nice to insert NULLs, but I couldn't find a way in Hive.
-INSERT INTO {db_name}{db_suffix}.{table_name} VALUES
+INSERT OVERWRITE {db_name}{db_suffix}.{table_name} VALUES
   (1,
    array(1, 2, NULL),
    array(array(1, 2, NULL), array(3)),
@@ -3719,7 +3731,65 @@ INSERT INTO {db_name}{db_suffix}.{table_name} VALUES
    map(
        1, map(10, array(100, 200), 20, array(300, 400)),
        2, map(30, array(500, 600), 40, array(700, 800))
-   )
+   ),
+   map(true, "true", false, "false"),
+   map(-1Y, "a", 0Y, "b", 1Y, "c"),
+   map(-1S, "a", 0S, "b", 1S, "c"),
+   map(-1L, "a", 0L, "b", 1L, "c"),
+   map(cast(-1.5 as FLOAT), "a", cast(0.25 as FLOAT), "b", cast(1.75 as FLOAT), "c"),
+   map(cast(-1.5 as DOUBLE), "a", cast(0.25 as DOUBLE), "b", cast(1.75 as DOUBLE), "c"),
+   map(-1.8, "a", 0.2, "b", 1.2, "c"),
+   map("one", 1, "two", 2, "three", 3),
+   map(cast("Mon" as CHAR(3)), 1,
+       cast("Tue" as CHAR(3)), 2,
+       cast("Wed" as CHAR(3)), 3,
+       cast("Thu" as CHAR(3)), 4,
+       cast("Fri" as CHAR(3)), 5,
+       cast("Sat" as CHAR(3)), 6,
+       cast("Sun" as CHAR(3)), 7
+      ),
+   map(cast("a" as VARCHAR(3)), "A", cast("ab" as VARCHAR(3)), "AB", cast("abc" as VARCHAR(3)), "ABC"),
+   map(to_utc_timestamp("2022-12-10 08:15:12", "UTC"), "Saturday morning",
+       to_utc_timestamp("2022-12-09 18:15:12", "UTC"), "Friday evening"),
+   map(to_date("2022-12-10"), "Saturday", to_date("2022-12-09"), "Friday")
+  );
+---- LOAD
+====
+---- DATASET
+functional
+---- BASE_TABLE_NAME
+map_null_keys
+---- COLUMNS
+id INT
+map_bool_key MAP<BOOLEAN, STRING>
+map_tinyint_key MAP<TINYINT, STRING>
+map_smallint_key MAP<SMALLINT, STRING>
+map_bigint_key MAP<BIGINT, STRING>
+map_float_key MAP<FLOAT, STRING>
+map_double_key MAP<DOUBLE, STRING>
+map_decimal_key MAP<DECIMAL(2,1), STRING>
+map_string_key MAP<STRING, INT>
+map_char_key MAP<CHAR(3), INT>
+map_varchar_key MAP<VARCHAR(3), STRING>
+map_timestamp_key MAP<TIMESTAMP, STRING>
+map_date_key MAP<DATE, STRING>
+---- DEPENDENT_LOAD_HIVE
+INSERT OVERWRITE {db_name}{db_suffix}.{table_name} VALUES
+  (1,
+   map(true, "true", if(false, false, NULL), "null"),
+   map(-1Y, "one", if(false, 1Y, NULL), "null"),
+   map(-1S, "one", if(false, 1S, NULL), "null"),
+   map(-1L, "one", if(false, 1L, NULL), "null"),
+   map(cast(-1.75 as FLOAT), "a", if(false, cast(1.5 as FLOAT), NULL), "null"),
+   map(cast(-1.75 as DOUBLE), "a", if(false, cast(1.5 as DOUBLE), NULL), "null"),
+   map(-1.8, "a",if(false, 1.5, NULL), "null"),
+   map("one", 1, if(false, "", NULL), NULL),
+   map(cast("Mon" as CHAR(3)), 1,
+       if(false, cast("NUL" as CHAR(3)), NULL), NULL),
+   map(cast("a" as VARCHAR(3)), "A", if(false, cast("" as VARCHAR(3)), NULL), NULL),
+   map(to_utc_timestamp("2022-12-10 08:15:12", "UTC"), "Saturday morning",
+       if(false, to_utc_timestamp("2022-12-10 08:15:12", "UTC"), NULL), "null"),
+   map(to_date("2022-12-10"), "Saturday", if(false, to_date("2022-12-10"), NULL), "null")
   );
 ---- LOAD
 ====
diff --git a/testdata/datasets/functional/schema_constraints.csv b/testdata/datasets/functional/schema_constraints.csv
index dee5088e4..20213a95b 100644
--- a/testdata/datasets/functional/schema_constraints.csv
+++ b/testdata/datasets/functional/schema_constraints.csv
@@ -346,9 +346,13 @@ table_name:alltypessmall_bool_sorted, constraint:restrict_to, table_format:orc/d
 
 table_name:complextypes_arrays_only_view, constraint:restrict_to, table_format:parquet/none/none
 table_name:complextypes_arrays_only_view, constraint:restrict_to, table_format:orc/def/block
+
 table_name:collection_tbl, constraint:restrict_to, table_format:parquet/none/none
 table_name:collection_tbl, constraint:restrict_to, table_format:orc/def/block
 
+# In parquet we can't have NULL map keys but in ORC we can.
+table_name:map_null_keys, constraint:restrict_to, table_format:orc/def/block
+
 table_name:complextypes_maps_view, constraint:restrict_to, table_format:parquet/none/none
 table_name:complextypes_maps_view, constraint:restrict_to, table_format:orc/def/block
 
diff --git a/testdata/workloads/functional-query/queries/QueryTest/map_null_keys.test b/testdata/workloads/functional-query/queries/QueryTest/map_null_keys.test
new file mode 100644
index 000000000..e835742a6
--- /dev/null
+++ b/testdata/workloads/functional-query/queries/QueryTest/map_null_keys.test
@@ -0,0 +1,24 @@
+=====
+---- QUERY
+-- Test that NULL map keys are printed correctly.
+set CONVERT_LEGACY_HIVE_PARQUET_UTC_TIMESTAMPS=1;
+select
+ id,
+ map_bool_key,
+ map_tinyint_key,
+ map_smallint_key,
+ map_bigint_key,
+ map_float_key,
+ map_double_key,
+ map_decimal_key,
+ map_string_key,
+ map_char_key,
+ map_varchar_key,
+ map_timestamp_key,
+ map_date_key
+from map_null_keys;
+---- RESULTS
+1,'{true:"true",null:"null"}','{-1:"one",null:"null"}','{-1:"one",null:"null"}','{-1:"one",null:"null"}','{-1.75:"a",null:"null"}','{-1.75:"a",null:"null"}','{-1.8:"a",null:"null"}','{"one":1,null:null}','{"Mon":1,null:null}','{"a":"A",null:null}','{"2022-12-10 08:15:12":"Saturday morning",null:"null"}','{"2022-12-10":"Saturday",null:"null"}'
+---- TYPES
+INT,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING
+=====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/nested-map-in-select-list.test b/testdata/workloads/functional-query/queries/QueryTest/nested-map-in-select-list.test
index 326c6ae91..f611605a7 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/nested-map-in-select-list.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/nested-map-in-select-list.test
@@ -324,3 +324,25 @@ from collection_tbl c, c.map_map_array mma, mma.value ma;
 ---- TYPES
 INT,STRING,STRING,STRING,STRING
 =====
+---- QUERY
+-- Test that map keys are printed correctly.
+set CONVERT_LEGACY_HIVE_PARQUET_UTC_TIMESTAMPS=1;
+select
+ map_bool_key,
+ map_tinyint_key,
+ map_smallint_key,
+ map_bigint_key,
+ map_float_key,
+ map_double_key,
+ map_decimal_key,
+ map_string_key,
+ map_char_key,
+ map_varchar_key,
+ map_timestamp_key,
+ map_date_key
+from collection_tbl;
+---- RESULTS
+'{true:"true",false:"false"}','{-1:"a",0:"b",1:"c"}','{-1:"a",0:"b",1:"c"}','{-1:"a",0:"b",1:"c"}','{-1.5:"a",0.25:"b",1.75:"c"}','{-1.5:"a",0.25:"b",1.75:"c"}','{-1.8:"a",0.2:"b",1.2:"c"}','{"one":1,"two":2,"three":3}','{"Mon":1,"Tue":2,"Wed":3,"Thu":4,"Fri":5,"Sat":6,"Sun":7}','{"a":"A","ab":"AB","abc":"ABC"}','{"2022-12-10 08:15:12":"Saturday morning","2022-12-09 18:15:12":"Friday evening"}','{"2022-12-10":"Saturday","2022-12-09":"Friday"}'
+---- TYPES
+STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING
+=====
diff --git a/tests/query_test/test_nested_types.py b/tests/query_test/test_nested_types.py
index 1603c369b..b45a4e2cb 100644
--- a/tests/query_test/test_nested_types.py
+++ b/tests/query_test/test_nested_types.py
@@ -180,6 +180,12 @@ class TestNestedCollectionsInSelectList(ImpalaTestSuite):
     """Queries where a map column is in the select list"""
     self.run_test_case('QueryTest/nested-map-in-select-list', vector)
 
+  def test_map_null_keys(self, vector, unique_database):
+    """Queries where a map has null keys. Is only possible in ORC, not Parquet."""
+    if vector.get_value('table_format').file_format == 'parquet':
+      pytest.skip()
+    self.run_test_case('QueryTest/map_null_keys', vector)
+
 
 # Moved this to a separate test class from TestNestedTypesInSelectList because this needs
 # a narrower test vector.