You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by do...@apache.org on 2021/08/23 05:23:35 UTC

[orc] branch main updated: ORC-959: [C++] Fix crash of SargsApplier in finding nested field names (#877)

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/main by this push:
     new 6bcbeec  ORC-959: [C++] Fix crash of SargsApplier in finding nested field names (#877)
6bcbeec is described below

commit 6bcbeec158337486a7ada58e2fb0924ba0e18014
Author: Quanlong Huang <hu...@gmail.com>
AuthorDate: Mon Aug 23 13:23:28 2021 +0800

    ORC-959: [C++] Fix crash of SargsApplier in finding nested field names (#877)
    
    ### What changes were proposed in this pull request?
    
    This patch fixes a crash in the C++ reader when SearchArgument is created on nested columns of collection types (e.g. array/map). Only STRUCT type has field names for subtypes. So we should check the current node type before checking any field names. JIRA description contains details on how to reproduce the crash.
    
    ### Why are the changes needed?
    
    Same as the above.
    
    ### How was this patch tested?
    
    Added unit tests in c++/test/TestSargsApplier.cc to reproduce the crash and verify the fix on ARRAY and MAP types.
---
 c++/src/sargs/SargsApplier.cc |  3 ++-
 c++/src/sargs/SargsApplier.hh |  2 ++
 c++/test/TestSargsApplier.cc  | 24 ++++++++++++++++++++++++
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/c++/src/sargs/SargsApplier.cc b/c++/src/sargs/SargsApplier.cc
index af709ab..85db410 100644
--- a/c++/src/sargs/SargsApplier.cc
+++ b/c++/src/sargs/SargsApplier.cc
@@ -25,7 +25,8 @@ namespace orc {
   uint64_t SargsApplier::findColumn(const Type& type,
                                     const std::string& colName) {
     for (uint64_t i = 0; i != type.getSubtypeCount(); ++i) {
-      if (type.getFieldName(i) == colName) {
+      // Only STRUCT type has field names
+      if (type.getKind() == STRUCT && type.getFieldName(i) == colName) {
         return type.getSubtype(i)->getColumnId();
       } else {
         uint64_t ret = findColumn(*type.getSubtype(i), colName);
diff --git a/c++/src/sargs/SargsApplier.hh b/c++/src/sargs/SargsApplier.hh
index cc6db92..39650f1 100644
--- a/c++/src/sargs/SargsApplier.hh
+++ b/c++/src/sargs/SargsApplier.hh
@@ -82,6 +82,8 @@ namespace orc {
 
   private:
     friend class TestSargsApplier_findColumnTest_Test;
+    friend class TestSargsApplier_findArrayColumnTest_Test;
+    friend class TestSargsApplier_findMapColumnTest_Test;
     static uint64_t findColumn(const Type& type, const std::string& colName);
 
   private:
diff --git a/c++/test/TestSargsApplier.cc b/c++/test/TestSargsApplier.cc
index 85dd73d..1516358 100644
--- a/c++/test/TestSargsApplier.cc
+++ b/c++/test/TestSargsApplier.cc
@@ -33,6 +33,30 @@ namespace orc {
       SargsApplier::findColumn(*type, "b"));
   }
 
+  TEST(TestSargsApplier, findArrayColumnTest) {
+    auto type = std::unique_ptr<Type>(Type::buildTypeFromString(
+      "struct<a:int,c:string,e:array<struct<f:bigint,g:double>>>"));
+    EXPECT_EQ(1, SargsApplier::findColumn(*type, "a"));
+    EXPECT_EQ(2, SargsApplier::findColumn(*type, "c"));
+    EXPECT_EQ(3, SargsApplier::findColumn(*type, "e"));
+    EXPECT_EQ(5, SargsApplier::findColumn(*type, "f"));
+    EXPECT_EQ(6, SargsApplier::findColumn(*type, "g"));
+    EXPECT_EQ(std::numeric_limits<uint64_t>::max(),
+      SargsApplier::findColumn(*type, "b"));
+  }
+
+  TEST(TestSargsApplier, findMapColumnTest) {
+    auto type = std::unique_ptr<Type>(Type::buildTypeFromString(
+      "struct<a:int,c:string,e:map<int,struct<f:bigint,g:double>>>"));
+    EXPECT_EQ(1, SargsApplier::findColumn(*type, "a"));
+    EXPECT_EQ(2, SargsApplier::findColumn(*type, "c"));
+    EXPECT_EQ(3, SargsApplier::findColumn(*type, "e"));
+    EXPECT_EQ(6, SargsApplier::findColumn(*type, "f"));
+    EXPECT_EQ(7, SargsApplier::findColumn(*type, "g"));
+    EXPECT_EQ(std::numeric_limits<uint64_t>::max(),
+      SargsApplier::findColumn(*type, "b"));
+  }
+
   static proto::ColumnStatistics createIntStats(
     int64_t min, int64_t max, bool hasNull = false) {
     proto::ColumnStatistics statistics;