You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by do...@apache.org on 2021/08/24 15:39:20 UTC

[orc] branch branch-1.7 updated: ORC-960: [C++] Support creating SearchArguments using column ids (#879)

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-1.7
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/branch-1.7 by this push:
     new c783441  ORC-960: [C++] Support creating SearchArguments using column ids (#879)
c783441 is described below

commit c7834418145b30081b2ae3c0fbf16f56b0bc29f4
Author: Quanlong Huang <hu...@gmail.com>
AuthorDate: Tue Aug 24 23:37:18 2021 +0800

    ORC-960: [C++] Support creating SearchArguments using column ids (#879)
    
    <!--
    Thanks for sending a pull request!  Here are some tips for you:
      1. File a JIRA issue first and use it as a prefix of your PR title, e.g., `ORC-001: Fix ABC`.
      2. Use your PR title to summarize what this PR proposes instead of describing the problem.
      3. Make PR title and description complete because these will be the permanent commit log.
      4. If possible, provide a concise and reproducible example to reproduce the issue for a faster review.
      5. If the PR is unfinished, use GitHub PR Draft feature.
    -->
    
    ### What changes were proposed in this pull request?
    
    Currently in the C++ reader, SearchArgumentBuilder only provides interfaces for creating SearchArguments using field column names. Column names can be ambiguous if there are nested struct columns using the same names. Array item column or map key/value columns even don't have names.
    
    This patch addes corresponding interfaces to create SearchArguments using column ids. Refactors some codes using templates to avoid duplicated codes.
    
    ### Why are the changes needed?
    
    As described above and in the JIRA description, we need creating SearchArguments using column ids.
    
    ### How was this patch tested?
    
    Added end-to-end tests in c++/test/TestPredicatePushdown.cc.
    
    (cherry picked from commit 2143841e24abb2e0fef1a3396376682fc3bb6fea)
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 c++/include/orc/sargs/SearchArgument.hh |  91 +++++++++++++++++++--
 c++/src/sargs/PredicateLeaf.cc          |  98 +++++++++++++++++------
 c++/src/sargs/PredicateLeaf.hh          |  24 ++++++
 c++/src/sargs/SargsApplier.cc           |   6 +-
 c++/src/sargs/SearchArgument.cc         | 126 +++++++++++++++++++++++------
 c++/src/sargs/SearchArgument.hh         | 113 ++++++++++++++++++++++++--
 c++/test/TestPredicatePushdown.cc       | 136 ++++++++++++++++++++++----------
 7 files changed, 487 insertions(+), 107 deletions(-)

diff --git a/c++/include/orc/sargs/SearchArgument.hh b/c++/include/orc/sargs/SearchArgument.hh
index fe39e62..48796cb 100644
--- a/c++/include/orc/sargs/SearchArgument.hh
+++ b/c++/include/orc/sargs/SearchArgument.hh
@@ -82,7 +82,7 @@ namespace orc {
 
     /**
      * Add a less than leaf to the current item on the stack.
-     * @param column the name of the column
+     * @param column the field name of the column
      * @param type the type of the expression
      * @param literal the literal
      * @return this
@@ -92,8 +92,19 @@ namespace orc {
                                             Literal literal) = 0;
 
     /**
+     * Add a less than leaf to the current item on the stack.
+     * @param columnId the column id of the column
+     * @param type the type of the expression
+     * @param literal the literal
+     * @return this
+     */
+    virtual SearchArgumentBuilder& lessThan(uint64_t columnId,
+                                            PredicateDataType type,
+                                            Literal literal) = 0;
+
+    /**
      * Add a less than equals leaf to the current item on the stack.
-     * @param column the name of the column
+     * @param column the field name of the column
      * @param type the type of the expression
      * @param literal the literal
      * @return this
@@ -103,8 +114,19 @@ namespace orc {
                                                   Literal literal) = 0;
 
     /**
+     * Add a less than equals leaf to the current item on the stack.
+     * @param columnId the column id of the column
+     * @param type the type of the expression
+     * @param literal the literal
+     * @return this
+     */
+    virtual SearchArgumentBuilder& lessThanEquals(uint64_t columnId,
+                                                  PredicateDataType type,
+                                                  Literal literal) = 0;
+
+    /**
      * Add an equals leaf to the current item on the stack.
-     * @param column the name of the column
+     * @param column the field name of the column
      * @param type the type of the expression
      * @param literal the literal
      * @return this
@@ -114,8 +136,19 @@ namespace orc {
                                           Literal literal) = 0;
 
     /**
+     * Add an equals leaf to the current item on the stack.
+     * @param columnId the column id of the column
+     * @param type the type of the expression
+     * @param literal the literal
+     * @return this
+     */
+    virtual SearchArgumentBuilder& equals(uint64_t columnId,
+                                          PredicateDataType type,
+                                          Literal literal) = 0;
+
+    /**
      * Add a null safe equals leaf to the current item on the stack.
-     * @param column the name of the column
+     * @param column the field name of the column
      * @param type the type of the expression
      * @param literal the literal
      * @return this
@@ -125,8 +158,19 @@ namespace orc {
                                                   Literal literal) = 0;
 
     /**
+     * Add a null safe equals leaf to the current item on the stack.
+     * @param columnId the column id of the column
+     * @param type the type of the expression
+     * @param literal the literal
+     * @return this
+     */
+    virtual SearchArgumentBuilder& nullSafeEquals(uint64_t columnId,
+                                                  PredicateDataType type,
+                                                  Literal literal) = 0;
+
+    /**
      * Add an in leaf to the current item on the stack.
-     * @param column the name of the column
+     * @param column the field name of the column
      * @param type the type of the expression
      * @param literals the literals
      * @return this
@@ -136,8 +180,19 @@ namespace orc {
                                       const std::initializer_list<Literal>& literals) = 0;
 
     /**
+     * Add an in leaf to the current item on the stack.
+     * @param columnId the column id of the column
+     * @param type the type of the expression
+     * @param literals the literals
+     * @return this
+     */
+    virtual SearchArgumentBuilder& in(uint64_t columnId,
+                                      PredicateDataType type,
+                                      const std::initializer_list<Literal>& literals) = 0;
+
+    /**
      * Add an is null leaf to the current item on the stack.
-     * @param column the name of the column
+     * @param column the field name of the column
      * @param type the type of the expression
      * @return this
      */
@@ -145,8 +200,17 @@ namespace orc {
                                           PredicateDataType type) = 0;
 
     /**
+     * Add an is null leaf to the current item on the stack.
+     * @param columnId the column id of the column
+     * @param type the type of the expression
+     * @return this
+     */
+    virtual SearchArgumentBuilder& isNull(uint64_t columnId,
+                                          PredicateDataType type) = 0;
+
+    /**
      * Add a between leaf to the current item on the stack.
-     * @param column the name of the column
+     * @param column the field name of the column
      * @param type the type of the expression
      * @param lower the literal
      * @param upper the literal
@@ -158,6 +222,19 @@ namespace orc {
                                            Literal upper) = 0;
 
     /**
+     * Add a between leaf to the current item on the stack.
+     * @param columnId the column id of the column
+     * @param type the type of the expression
+     * @param lower the literal
+     * @param upper the literal
+     * @return this
+     */
+    virtual SearchArgumentBuilder& between(uint64_t columnId,
+                                           PredicateDataType type,
+                                           Literal lower,
+                                           Literal upper) = 0;
+
+    /**
      * Add a truth value to the expression.
      * @param truth truth value
      * @return this
diff --git a/c++/src/sargs/PredicateLeaf.cc b/c++/src/sargs/PredicateLeaf.cc
index 5fd0bdb..caea93f 100644
--- a/c++/src/sargs/PredicateLeaf.cc
+++ b/c++/src/sargs/PredicateLeaf.cc
@@ -34,7 +34,22 @@ namespace orc {
                                Literal literal)
                               : mOperator(op)
                               , mType(type)
-                              , mColumnName(colName) {
+                              , mColumnName(colName)
+                              , mHasColumnName(true)
+                              , mColumnId(0) {
+    mLiterals.emplace_back(literal);
+    mHashCode = hashCode();
+    validate();
+  }
+
+  PredicateLeaf::PredicateLeaf(Operator op,
+                               PredicateDataType type,
+                               uint64_t columnId,
+                               Literal literal)
+                              : mOperator(op)
+                              , mType(type)
+                              , mHasColumnName(false)
+                              , mColumnId(columnId) {
     mLiterals.emplace_back(literal);
     mHashCode = hashCode();
     validate();
@@ -47,17 +62,37 @@ namespace orc {
                               : mOperator(op)
                               , mType(type)
                               , mColumnName(colName)
+                              , mHasColumnName(true)
+                              , mLiterals(literals.begin(), literals.end()) {
+    mHashCode = hashCode();
+    validate();
+  }
+
+  PredicateLeaf::PredicateLeaf(Operator op,
+                               PredicateDataType type,
+                               uint64_t columnId,
+                               const std::initializer_list<Literal>& literals)
+                              : mOperator(op)
+                              , mType(type)
+                              , mHasColumnName(false)
+                              , mColumnId(columnId)
                               , mLiterals(literals.begin(), literals.end()) {
     mHashCode = hashCode();
     validate();
   }
 
+  void PredicateLeaf::validateColumn() const {
+    if (mHasColumnName && mColumnName.empty()) {
+      throw std::invalid_argument("column name should not be empty");
+    } else if (!mHasColumnName && mColumnId == INVALID_COLUMN_ID) {
+      throw std::invalid_argument("invalid column id");
+    }
+  }
+
   void PredicateLeaf::validate() const {
     switch (mOperator) {
       case Operator::IS_NULL:
-        if (mColumnName.empty()) {
-          throw std::invalid_argument("column name should not be empty");
-        }
+        validateColumn();
         if (!mLiterals.empty()) {
           throw std::invalid_argument("No literal is required!");
         }
@@ -66,9 +101,7 @@ namespace orc {
       case Operator::NULL_SAFE_EQUALS:
       case Operator::LESS_THAN:
       case Operator::LESS_THAN_EQUALS:
-        if (mColumnName.empty()) {
-          throw std::invalid_argument("column name should not be empty");
-        }
+        validateColumn();
         if (mLiterals.size() != 1) {
           throw std::invalid_argument("One literal is required!");
         }
@@ -78,9 +111,7 @@ namespace orc {
         }
         break;
       case Operator::IN:
-        if (mColumnName.empty()) {
-          throw std::invalid_argument("column name should not be empty");
-        }
+        validateColumn();
         if (mLiterals.size() < 2) {
           throw std::invalid_argument("At least two literals are required!");
         }
@@ -91,9 +122,7 @@ namespace orc {
         }
         break;
       case Operator::BETWEEN:
-        if (mColumnName.empty()) {
-          throw std::invalid_argument("column name should not be empty");
-        }
+        validateColumn();
         for (auto literal : mLiterals) {
           if (static_cast<int>(literal.getType()) != static_cast<int>(mType)) {
             throw std::invalid_argument("leaf and literal types do not match!");
@@ -113,6 +142,10 @@ namespace orc {
     return mType;
   }
 
+  bool PredicateLeaf::hasColumnName() const {
+    return mHasColumnName;
+  }
+
   /**
    * Get the simple column name.
    */
@@ -120,6 +153,10 @@ namespace orc {
     return mColumnName;
   }
 
+  uint64_t PredicateLeaf::getColumnId() const {
+    return mColumnId;
+  }
+
   /**
    * Get the literal half of the predicate leaf.
    */
@@ -151,35 +188,42 @@ namespace orc {
     return sstream.str();
   }
 
+  std::string PredicateLeaf::columnDebugString() const {
+    if (mHasColumnName) return mColumnName;
+    std::ostringstream sstream;
+    sstream << "column(id=" << mColumnId << ')';
+    return sstream.str();
+  }
+
   std::string PredicateLeaf::toString() const {
     std::ostringstream sstream;
     sstream << '(';
     switch (mOperator) {
       case Operator::IS_NULL:
-        sstream << mColumnName << " is null";
+        sstream << columnDebugString() << " is null";
         break;
       case Operator::EQUALS:
-        sstream << mColumnName << " = " << getLiteralString(mLiterals);
+        sstream << columnDebugString() << " = " << getLiteralString(mLiterals);
         break;
       case Operator::NULL_SAFE_EQUALS:
-        sstream << mColumnName << " null_safe_= "
+        sstream << columnDebugString() << " null_safe_= "
                 << getLiteralString(mLiterals);
         break;
       case Operator::LESS_THAN:
-        sstream << mColumnName << " < " << getLiteralString(mLiterals);
+        sstream << columnDebugString() << " < " << getLiteralString(mLiterals);
         break;
       case Operator::LESS_THAN_EQUALS:
-        sstream << mColumnName << " <= " << getLiteralString(mLiterals);
+        sstream << columnDebugString() << " <= " << getLiteralString(mLiterals);
         break;
       case Operator::IN:
-        sstream << mColumnName << " in " << getLiteralsString(mLiterals);
+        sstream << columnDebugString() << " in " << getLiteralsString(mLiterals);
         break;
       case Operator::BETWEEN:
-        sstream << mColumnName << " between " << getLiteralsString(mLiterals);
+        sstream << columnDebugString() << " between " << getLiteralsString(mLiterals);
         break;
       default:
-        sstream << "unknown operator, colName: "
-                << mColumnName << ", literals: "
+        sstream << "unknown operator, column: "
+                << columnDebugString() << ", literals: "
                 << getLiteralsString(mLiterals);
     }
     sstream << ')';
@@ -192,19 +236,21 @@ namespace orc {
       [&](const Literal& literal) {
       value = value * 17 + literal.getHashCode();
     });
+    auto colHash = mHasColumnName ?
+        std::hash<std::string>{}(mColumnName) :
+        std::hash<uint64_t>{}(mColumnId);
     return value * 103 * 101 * 3 * 17 +
       std::hash<int>{}(static_cast<int>(mOperator)) +
       std::hash<int>{}(static_cast<int>(mType)) * 17 +
-      std::hash<std::string>{}(mColumnName) * 3 * 17 +
-      std::hash<uint64_t>{}(mColumnId) * 3 * 17 * 17;
+      colHash * 3 * 17;
   }
 
   bool PredicateLeaf::operator==(const PredicateLeaf& r) const {
     if (this == &r) {
       return true;
     }
-    if (mHashCode != r.mHashCode || mType != r.mType ||
-        mOperator != r.mOperator || mColumnName != r.mColumnName ||
+    if (mHashCode != r.mHashCode || mType != r.mType || mOperator != r.mOperator ||
+        mHasColumnName != r.mHasColumnName || mColumnName != r.mColumnName ||
         mColumnId != r.mColumnId || mLiterals.size() != r.mLiterals.size()) {
       return false;
     }
diff --git a/c++/src/sargs/PredicateLeaf.hh b/c++/src/sargs/PredicateLeaf.hh
index 92e9784..298470b 100644
--- a/c++/src/sargs/PredicateLeaf.hh
+++ b/c++/src/sargs/PredicateLeaf.hh
@@ -73,9 +73,19 @@ namespace orc {
 
     PredicateLeaf(Operator op,
                   PredicateDataType type,
+                  uint64_t columnId,
+                  Literal literal);
+
+    PredicateLeaf(Operator op,
+                  PredicateDataType type,
                   const std::string& colName,
                   const std::initializer_list<Literal>& literalList);
 
+    PredicateLeaf(Operator op,
+                  PredicateDataType type,
+                  uint64_t columnId,
+                  const std::initializer_list<Literal>& literalList);
+
     /**
      * Get the operator for the leaf.
      */
@@ -87,11 +97,21 @@ namespace orc {
     PredicateDataType getType() const;
 
     /**
+     * Get whether the predicate is created using column name.
+     */
+    bool hasColumnName() const;
+
+    /**
      * Get the simple column name.
      */
     const std::string& getColumnName() const;
 
     /**
+     * Get the column id.
+     */
+    uint64_t getColumnId() const;
+
+    /**
      * Get the literal half of the predicate leaf.
      */
     Literal getLiteral() const;
@@ -118,6 +138,9 @@ namespace orc {
     size_t hashCode() const;
 
     void validate() const;
+    void validateColumn() const;
+
+    std::string columnDebugString() const;
 
     TruthValue evaluatePredicateMinMax(
                                  const proto::ColumnStatistics& colStats) const;
@@ -129,6 +152,7 @@ namespace orc {
     Operator mOperator;
     PredicateDataType mType;
     std::string mColumnName;
+    bool mHasColumnName;
     uint64_t mColumnId;
     std::vector<Literal> mLiterals;
     size_t mHashCode;
diff --git a/c++/src/sargs/SargsApplier.cc b/c++/src/sargs/SargsApplier.cc
index 85db410..f99499e 100644
--- a/c++/src/sargs/SargsApplier.cc
+++ b/c++/src/sargs/SargsApplier.cc
@@ -54,7 +54,11 @@ namespace orc {
     const std::vector<PredicateLeaf>& leaves = sargs->getLeaves();
     mFilterColumns.resize(leaves.size(), INVALID_COLUMN_ID);
     for (size_t i = 0; i != mFilterColumns.size(); ++i) {
-      mFilterColumns[i] = findColumn(type, leaves[i].getColumnName());
+      if (leaves[i].hasColumnName()) {
+        mFilterColumns[i] = findColumn(type, leaves[i].getColumnName());
+      } else {
+        mFilterColumns[i] = leaves[i].getColumnId();
+      }
     }
   }
 
diff --git a/c++/src/sargs/SearchArgument.cc b/c++/src/sargs/SearchArgument.cc
index 38fa69f..400b40f 100644
--- a/c++/src/sargs/SearchArgument.cc
+++ b/c++/src/sargs/SearchArgument.cc
@@ -102,13 +102,22 @@ namespace orc {
     return result.first->second;
   }
 
+  bool SearchArgumentBuilderImpl::isInvalidColumn(const std::string& column) {
+    return column.empty();
+  }
+
+  bool SearchArgumentBuilderImpl::isInvalidColumn(uint64_t columnId) {
+    return columnId == INVALID_COLUMN_ID;
+  }
+
+  template<typename T>
   SearchArgumentBuilder&
   SearchArgumentBuilderImpl::compareOperator(PredicateLeaf::Operator op,
-                                             const std::string& column,
+                                             T column,
                                              PredicateDataType type,
                                              Literal literal) {
     TreeNode parent = mCurrTree.front();
-    if (column.empty()) {
+    if (isInvalidColumn(column)) {
       parent->addChild(
         std::make_shared<ExpressionTree>(TruthValue::YES_NO_NULL));
     } else {
@@ -125,16 +134,30 @@ namespace orc {
       PredicateLeaf::Operator::LESS_THAN, column, type, literal);
   }
 
-    SearchArgumentBuilder& SearchArgumentBuilderImpl::lessThanEquals(const std::string& column,
-                                                                     PredicateDataType type,
-                                                                     Literal literal) {
+  SearchArgumentBuilder& SearchArgumentBuilderImpl::lessThan(uint64_t columnId,
+                                                             PredicateDataType type,
+                                                             Literal literal) {
+    return compareOperator(
+      PredicateLeaf::Operator::LESS_THAN, columnId, type, literal);
+  }
+
+  SearchArgumentBuilder& SearchArgumentBuilderImpl::lessThanEquals(const std::string& column,
+                                                                   PredicateDataType type,
+                                                                   Literal literal) {
     return compareOperator(
       PredicateLeaf::Operator::LESS_THAN_EQUALS, column, type, literal);
   }
 
-    SearchArgumentBuilder& SearchArgumentBuilderImpl::equals(const std::string& column,
-                                                             PredicateDataType type,
-                                                             Literal literal) {
+  SearchArgumentBuilder& SearchArgumentBuilderImpl::lessThanEquals(uint64_t columnId,
+                                                                   PredicateDataType type,
+                                                                   Literal literal) {
+    return compareOperator(
+      PredicateLeaf::Operator::LESS_THAN_EQUALS, columnId, type, literal);
+  }
+
+  SearchArgumentBuilder& SearchArgumentBuilderImpl::equals(const std::string& column,
+                                                           PredicateDataType type,
+                                                           Literal literal) {
     if (literal.isNull()) {
       return isNull(column, type);
     } else {
@@ -143,18 +166,37 @@ namespace orc {
     }
   }
 
-    SearchArgumentBuilder& SearchArgumentBuilderImpl::nullSafeEquals(const std::string& column,
-                                                                     PredicateDataType type,
-                                                                     Literal literal) {
+  SearchArgumentBuilder& SearchArgumentBuilderImpl::equals(uint64_t columnId,
+                                                           PredicateDataType type,
+                                                           Literal literal) {
+    if (literal.isNull()) {
+      return isNull(columnId, type);
+    } else {
+      return compareOperator(
+        PredicateLeaf::Operator::EQUALS, columnId, type, literal);
+    }
+  }
+
+  SearchArgumentBuilder& SearchArgumentBuilderImpl::nullSafeEquals(const std::string& column,
+                                                                   PredicateDataType type,
+                                                                   Literal literal) {
     return compareOperator(
       PredicateLeaf::Operator::NULL_SAFE_EQUALS, column, type, literal);
   }
 
-    SearchArgumentBuilder& SearchArgumentBuilderImpl::in(const std::string& column,
-                                                         PredicateDataType type,
-                                                         const std::initializer_list<Literal>& literals) {
-    TreeNode& parent = mCurrTree.front();
-    if (column.empty()) {
+  SearchArgumentBuilder& SearchArgumentBuilderImpl::nullSafeEquals(uint64_t columnId,
+                                                                   PredicateDataType type,
+                                                                   Literal literal) {
+    return compareOperator(
+      PredicateLeaf::Operator::NULL_SAFE_EQUALS, columnId, type, literal);
+  }
+
+  template<typename T>
+  SearchArgumentBuilder& SearchArgumentBuilderImpl::addChildForIn(T column,
+                                                PredicateDataType type,
+                                                const std::initializer_list<Literal>& literals) {
+    TreeNode &parent = mCurrTree.front();
+    if (isInvalidColumn(column)) {
       parent->addChild(
         std::make_shared<ExpressionTree>((TruthValue::YES_NO_NULL)));
     } else {
@@ -169,10 +211,22 @@ namespace orc {
     return *this;
   }
 
-    SearchArgumentBuilder& SearchArgumentBuilderImpl::isNull(const std::string& column,
-                                                             PredicateDataType type) {
+  SearchArgumentBuilder& SearchArgumentBuilderImpl::in(const std::string& column,
+                                                       PredicateDataType type,
+                                                       const std::initializer_list<Literal>& literals) {
+    return addChildForIn(column, type, literals);
+  }
+
+  SearchArgumentBuilder& SearchArgumentBuilderImpl::in(uint64_t columnId,
+                                                       PredicateDataType type,
+                                                       const std::initializer_list<Literal>& literals) {
+    return addChildForIn(columnId, type, literals);
+  }
+
+  template<typename T>
+  SearchArgumentBuilder& SearchArgumentBuilderImpl::addChildForIsNull(T column, PredicateDataType type) {
     TreeNode& parent = mCurrTree.front();
-    if (column.empty()) {
+    if (isInvalidColumn(column)) {
       parent->addChild(
         std::make_shared<ExpressionTree>(TruthValue::YES_NO_NULL));
     } else {
@@ -185,12 +239,22 @@ namespace orc {
     return *this;
   }
 
-    SearchArgumentBuilder& SearchArgumentBuilderImpl::between(const std::string& column,
-                                                              PredicateDataType type,
-                                                              Literal lower,
-                                                              Literal upper) {
+  SearchArgumentBuilder& SearchArgumentBuilderImpl::isNull(const std::string& column,
+                                                           PredicateDataType type) {
+    return addChildForIsNull(column, type);
+  }
+
+  SearchArgumentBuilder& SearchArgumentBuilderImpl::isNull(uint64_t columnId,
+                                                           PredicateDataType type) {
+    return addChildForIsNull(columnId, type);
+  }
+
+  template<typename T>
+  SearchArgumentBuilder& SearchArgumentBuilderImpl::addChildForBetween(T column,
+                                                                       PredicateDataType type,
+                                                                       Literal lower, Literal upper) {
     TreeNode& parent = mCurrTree.front();
-    if (column.empty()) {
+    if (isInvalidColumn(column)) {
       parent->addChild(
         std::make_shared<ExpressionTree>(TruthValue::YES_NO_NULL));
     } else {
@@ -203,6 +267,20 @@ namespace orc {
     return *this;
   }
 
+  SearchArgumentBuilder& SearchArgumentBuilderImpl::between(const std::string& column,
+                                                            PredicateDataType type,
+                                                            Literal lower,
+                                                            Literal upper) {
+    return addChildForBetween(column, type, lower, upper);
+  }
+
+  SearchArgumentBuilder& SearchArgumentBuilderImpl::between(uint64_t columnId,
+                                                            PredicateDataType type,
+                                                            Literal lower,
+                                                            Literal upper) {
+    return addChildForBetween(columnId, type, lower, upper);
+  }
+
   SearchArgumentBuilder& SearchArgumentBuilderImpl::literal(TruthValue truth) {
     TreeNode& parent = mCurrTree.front();
     parent->addChild(std::make_shared<ExpressionTree>(truth));
diff --git a/c++/src/sargs/SearchArgument.hh b/c++/src/sargs/SearchArgument.hh
index 8520b92..b2426f3 100644
--- a/c++/src/sargs/SearchArgument.hh
+++ b/c++/src/sargs/SearchArgument.hh
@@ -105,7 +105,7 @@ namespace orc {
 
     /**
      * Add a less than leaf to the current item on the stack.
-     * @param column the name of the column
+     * @param column the field name of the column
      * @param type the type of the expression
      * @param literal the literal
      * @return this
@@ -115,8 +115,19 @@ namespace orc {
                                     Literal literal) override;
 
     /**
+     * Add a less than leaf to the current item on the stack.
+     * @param columnId the column id of the column
+     * @param type the type of the expression
+     * @param literal the literal
+     * @return this
+     */
+    SearchArgumentBuilder& lessThan(uint64_t columnId,
+                                    PredicateDataType type,
+                                    Literal literal) override;
+
+    /**
      * Add a less than equals leaf to the current item on the stack.
-     * @param column the name of the column
+     * @param column the field name of the column
      * @param type the type of the expression
      * @param literal the literal
      * @return this
@@ -126,8 +137,19 @@ namespace orc {
                                           Literal literal) override;
 
     /**
+     * Add a less than equals leaf to the current item on the stack.
+     * @param columnId the column id of the column
+     * @param type the type of the expression
+     * @param literal the literal
+     * @return this
+     */
+    SearchArgumentBuilder& lessThanEquals(uint64_t columnId,
+                                          PredicateDataType type,
+                                          Literal literal) override;
+
+    /**
      * Add an equals leaf to the current item on the stack.
-     * @param column the name of the column
+     * @param column the field name of the column
      * @param type the type of the expression
      * @param literal the literal
      * @return this
@@ -137,8 +159,19 @@ namespace orc {
                                   Literal literal) override;
 
     /**
+     * Add an equals leaf to the current item on the stack.
+     * @param columnId the column id of the column
+     * @param type the type of the expression
+     * @param literal the literal
+     * @return this
+     */
+    SearchArgumentBuilder& equals(uint64_t columnId,
+                                  PredicateDataType type,
+                                  Literal literal) override;
+
+    /**
      * Add a null safe equals leaf to the current item on the stack.
-     * @param column the name of the column
+     * @param column the field name of the column
      * @param type the type of the expression
      * @param literal the literal
      * @return this
@@ -148,8 +181,19 @@ namespace orc {
                                           Literal literal) override;
 
     /**
+     * Add a null safe equals leaf to the current item on the stack.
+     * @param columnId the column id of the column
+     * @param type the type of the expression
+     * @param literal the literal
+     * @return this
+     */
+    SearchArgumentBuilder& nullSafeEquals(uint64_t columnId,
+                                          PredicateDataType type,
+                                          Literal literal) override;
+
+    /**
      * Add an in leaf to the current item on the stack.
-     * @param column the name of the column
+     * @param column the field name of the column
      * @param type the type of the expression
      * @param literals the literals
      * @return this
@@ -159,8 +203,19 @@ namespace orc {
                               const std::initializer_list<Literal>& literals) override;
 
     /**
+     * Add an in leaf to the current item on the stack.
+     * @param columnId the column id of the column
+     * @param type the type of the expression
+     * @param literals the literals
+     * @return this
+     */
+    SearchArgumentBuilder& in(uint64_t columnId,
+                              PredicateDataType type,
+                              const std::initializer_list<Literal>& literals) override;
+
+    /**
      * Add an is null leaf to the current item on the stack.
-     * @param column the name of the column
+     * @param column the field name of the column
      * @param type the type of the expression
      * @return this
      */
@@ -168,8 +223,17 @@ namespace orc {
                                   PredicateDataType type) override;
 
     /**
+     * Add an is null leaf to the current item on the stack.
+     * @param columnId the column id of the column
+     * @param type the type of the expression
+     * @return this
+     */
+    SearchArgumentBuilder& isNull(uint64_t columnId,
+                                  PredicateDataType type) override;
+
+    /**
      * Add a between leaf to the current item on the stack.
-     * @param column the name of the column
+     * @param column the field name of the column
      * @param type the type of the expression
      * @param lower the literal
      * @param upper the literal
@@ -181,6 +245,19 @@ namespace orc {
                                    Literal upper) override;
 
     /**
+     * Add a between leaf to the current item on the stack.
+     * @param columnId the column id of the column
+     * @param type the type of the expression
+     * @param lower the literal
+     * @param upper the literal
+     * @return this
+     */
+    SearchArgumentBuilder& between(uint64_t columnId,
+                                   PredicateDataType type,
+                                   Literal lower,
+                                   Literal upper) override;
+
+    /**
      * Add a truth value to the expression.
      * @param truth truth value
      * @return this
@@ -197,11 +274,31 @@ namespace orc {
   private:
     SearchArgumentBuilder& start(ExpressionTree::Operator op);
     size_t addLeaf(PredicateLeaf leaf);
+
+    static bool isInvalidColumn(const std::string& column);
+    static bool isInvalidColumn(uint64_t columnId);
+
+    template<typename T>
     SearchArgumentBuilder& compareOperator(PredicateLeaf::Operator op,
-                                           const std::string& column,
+                                           T column,
                                            PredicateDataType type,
                                            Literal literal);
 
+    template<typename T>
+    SearchArgumentBuilder& addChildForIn(T column,
+                                         PredicateDataType type,
+                                         const std::initializer_list<Literal>& literals);
+
+    template<typename T>
+    SearchArgumentBuilder& addChildForIsNull(T column,
+                                             PredicateDataType type);
+
+    template<typename T>
+    SearchArgumentBuilder& addChildForBetween(T column,
+                                              PredicateDataType type,
+                                              Literal lower,
+                                              Literal upper);
+
   public:
     static TreeNode pushDownNot(TreeNode root);
     static TreeNode foldMaybe(TreeNode expr);
diff --git a/c++/test/TestPredicatePushdown.cc b/c++/test/TestPredicatePushdown.cc
index bae5687..41d0b53 100644
--- a/c++/test/TestPredicatePushdown.cc
+++ b/c++/test/TestPredicatePushdown.cc
@@ -26,8 +26,7 @@ namespace orc {
 
   static const int DEFAULT_MEM_STREAM_SIZE = 10 * 1024 * 1024; // 10M
 
-  TEST(TestPredicatePushdown, testPredicatePushdown) {
-    MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
+  void createMemTestFile(MemoryOutputStream& memStream) {
     MemoryPool * pool = getDefaultPool();
     auto type = std::unique_ptr<Type>(Type::buildTypeFromString(
       "struct<int1:bigint,string1:string>"));
@@ -66,26 +65,36 @@ namespace orc {
     strBatch.numElements = 3500;
     writer->add(*batch);
     writer->close();
+  }
 
-    std::unique_ptr<InputStream> inStream(new MemoryInputStream (
-      memStream.getData(), memStream.getLength()));
-    ReaderOptions readerOptions;
-    options.setMemoryPool(pool);
-    auto reader = createReader(std::move(inStream), readerOptions);
-    EXPECT_EQ(3500, reader->getNumberOfRows());
-
-    // build search argument (x >= 300000 AND x < 600000)
-    {
-      std::unique_ptr<SearchArgument> sarg = SearchArgumentFactory::newBuilder()
-        ->startAnd()
-        .startNot()
-        .lessThan("int1", PredicateDataType::LONG,
-                  Literal(static_cast<int64_t>(300000L)))
-        .end()
-        .lessThan("int1", PredicateDataType::LONG,
-                  Literal(static_cast<int64_t>(600000L)))
-        .end()
-        .build();
+  void TestRangePredicates(Reader* reader) {
+    // Build search argument (x >= 300000 AND x < 600000) for column 'int1'.
+    // Test twice for using column name and column id respectively.
+    for (int k = 0; k < 2; ++k) {
+      std::unique_ptr<SearchArgument> sarg;
+      if (k == 0) {
+        sarg = SearchArgumentFactory::newBuilder()
+          ->startAnd()
+          .startNot()
+          .lessThan("int1", PredicateDataType::LONG,
+                    Literal(static_cast<int64_t>(300000L)))
+          .end()
+          .lessThan("int1", PredicateDataType::LONG,
+                    Literal(static_cast<int64_t>(600000L)))
+          .end()
+          .build();
+      } else {
+        sarg = SearchArgumentFactory::newBuilder()
+          ->startAnd()
+          .startNot()
+          .lessThan(/*columnId=*/1, PredicateDataType::LONG,
+                    Literal(static_cast<int64_t>(300000L)))
+          .end()
+          .lessThan(/*columnId=*/1, PredicateDataType::LONG,
+                    Literal(static_cast<int64_t>(600000L)))
+          .end()
+          .build();
+      }
 
       RowReaderOptions rowReaderOpts;
       rowReaderOpts.searchArgument(std::move(sarg));
@@ -107,15 +116,28 @@ namespace orc {
       EXPECT_EQ(false, rowReader->next(*readBatch));
       EXPECT_EQ(3500, rowReader->getRowNumber());
     }
+  }
 
-    // look through the file with no rows selected: x < 0
-    {
-      std::unique_ptr<SearchArgument> sarg = SearchArgumentFactory::newBuilder()
-        ->startAnd()
-        .lessThan("int1", PredicateDataType::LONG,
-          Literal(static_cast<int64_t>(0)))
-        .end()
-        .build();
+  void TestNoRowsSelected(Reader* reader) {
+    // Look through the file with no rows selected: x < 0
+    // Test twice for using column name and column id respectively.
+    for (int i = 0; i < 2; ++i) {
+      std::unique_ptr<SearchArgument> sarg;
+      if (i == 0) {
+        sarg = SearchArgumentFactory::newBuilder()
+          ->startAnd()
+          .lessThan("int1", PredicateDataType::LONG,
+                    Literal(static_cast<int64_t>(0)))
+          .end()
+          .build();
+      } else {
+        sarg = SearchArgumentFactory::newBuilder()
+          ->startAnd()
+          .lessThan(/*columnId=*/1, PredicateDataType::LONG,
+                    Literal(static_cast<int64_t>(0)))
+          .end()
+          .build();
+      }
 
       RowReaderOptions rowReaderOpts;
       rowReaderOpts.searchArgument(std::move(sarg));
@@ -125,19 +147,36 @@ namespace orc {
       EXPECT_EQ(false, rowReader->next(*readBatch));
       EXPECT_EQ(3500, rowReader->getRowNumber());
     }
+  }
 
-    // select first 1000 and last 500 rows: x < 30000 OR x >= 1020000
-    {
-      std::unique_ptr<SearchArgument> sarg = SearchArgumentFactory::newBuilder()
-        ->startOr()
-        .lessThan("int1", PredicateDataType::LONG,
-          Literal(static_cast<int64_t>(300 * 100)))
-        .startNot()
-        .lessThan("int1", PredicateDataType::LONG,
-          Literal(static_cast<int64_t>(300 * 3400)))
-        .end()
-        .end()
-        .build();
+  void TestOrPredicates(Reader* reader) {
+    // Select first 1000 and last 500 rows: x < 30000 OR x >= 1020000
+    // Test twice for using column name and column id respectively.
+    for (int k = 0; k < 2; ++k) {
+      std::unique_ptr<SearchArgument> sarg;
+      if (k == 0) {
+        sarg = SearchArgumentFactory::newBuilder()
+          ->startOr()
+          .lessThan("int1", PredicateDataType::LONG,
+                    Literal(static_cast<int64_t>(300 * 100)))
+          .startNot()
+          .lessThan("int1", PredicateDataType::LONG,
+                    Literal(static_cast<int64_t>(300 * 3400)))
+          .end()
+          .end()
+          .build();
+      } else {
+        sarg = SearchArgumentFactory::newBuilder()
+          ->startOr()
+          .lessThan(/*columnId=*/1, PredicateDataType::LONG,
+                    Literal(static_cast<int64_t>(300 * 100)))
+          .startNot()
+          .lessThan(/*columnId=*/1, PredicateDataType::LONG,
+                    Literal(static_cast<int64_t>(300 * 3400)))
+          .end()
+          .end()
+          .build();
+      }
 
       RowReaderOptions rowReaderOpts;
       rowReaderOpts.searchArgument(std::move(sarg));
@@ -184,4 +223,19 @@ namespace orc {
     }
   }
 
+  TEST(TestPredicatePushdown, testPredicatePushdown) {
+    MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
+    MemoryPool * pool = getDefaultPool();
+    createMemTestFile(memStream);
+    std::unique_ptr<InputStream> inStream(new MemoryInputStream (
+      memStream.getData(), memStream.getLength()));
+    ReaderOptions readerOptions;
+    readerOptions.setMemoryPool(*pool);
+    std::unique_ptr<Reader> reader = createReader(std::move(inStream), readerOptions);
+    EXPECT_EQ(3500, reader->getNumberOfRows());
+
+    TestRangePredicates(reader.get());
+    TestNoRowsSelected(reader.get());
+    TestOrPredicates(reader.get());
+  }
 }  // namespace orc