You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2022/04/27 05:33:41 UTC

[impala] branch master updated: IMPALA-11141: Use exact data types in IN-list filter

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 6380a3187 IMPALA-11141: Use exact data types in IN-list filter
6380a3187 is described below

commit 6380a3187c2d0efeadfa7c8a4ff5746b0e16e6ab
Author: stiga-huang <hu...@gmail.com>
AuthorDate: Sat Apr 9 21:32:45 2022 +0800

    IMPALA-11141: Use exact data types in IN-list filter
    
    Currently, we use a std::unordered_set<int64_t> for all numeric types
    (including DATE type). It's a waste of space for small data types like
    tinyint, smallint, int, etc. This patch extends the base InListFilter
    class with native implementations for different data types.
    
    For string type in-list filters, this patch uses impala::StringValue
    instead of std::string. This simplifies the Insert() method, which
    improves the codegen time. To use impala::StringValue, this patch
    switches the set implementation to boost::unordered_set. Same as what we
    use in InPredicate.
    
    Another improvement of using impala::StringValue is that we can easily
    maintain the strings in MemPool. When inserting a new batch of values,
    the new values are inserted into a temp set. String pointers still
    reference to the original tuple values. At the end of processing each
    batch, MaterializeValues() is invoked to copy the strings into the
    filter's own mem pool. This is more memory-friendly than the original
    approach since we can allocate the string batch at once.
    
    Tests:
     - Add unit tests for different types of in-list filters
    
    Change-Id: Id434a542b2ced64efa3bfc974cb565b94a4193e9
    Reviewed-on: http://gerrit.cloudera.org:8080/18433
    Reviewed-by: Qifan Chen <qc...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/codegen/gen_ir_descriptions.py              |   9 +-
 be/src/exec/filter-context.cc                      |  53 ++++-
 be/src/exec/hdfs-orc-scanner.cc                    |  25 +-
 be/src/runtime/runtime-filter-bank.cc              |   4 +-
 be/src/util/CMakeLists.txt                         |   2 +
 be/src/util/in-list-filter-ir.cc                   | 125 ++++++----
 be/src/util/in-list-filter-test.cc                 | 203 ++++++++++++++++
 be/src/util/in-list-filter.cc                      | 264 +++++++++++----------
 be/src/util/in-list-filter.h                       | 154 ++++++++++--
 .../impala/planner/RuntimeFilterGenerator.java     |   7 +-
 .../PlannerTest/runtime-filter-query-options.test  |   2 +-
 11 files changed, 614 insertions(+), 234 deletions(-)

diff --git a/be/src/codegen/gen_ir_descriptions.py b/be/src/codegen/gen_ir_descriptions.py
index a482a7264..344491b5c 100755
--- a/be/src/codegen/gen_ir_descriptions.py
+++ b/be/src/codegen/gen_ir_descriptions.py
@@ -217,7 +217,14 @@ ir_functions = [
   ["DECIMAL_MIN_MAX_FILTER_INSERT4", "_ZN6impala19DecimalMinMaxFilter7Insert4EPKv"],
   ["DECIMAL_MIN_MAX_FILTER_INSERT8", "_ZN6impala19DecimalMinMaxFilter7Insert8EPKv"],
   ["DECIMAL_MIN_MAX_FILTER_INSERT16", "_ZN6impala19DecimalMinMaxFilter8Insert16EPKv"],
-  ["IN_LIST_FILTER_INSERT", "_ZN6impala12InListFilter6InsertEPKv"],
+  ["TINYINT_IN_LIST_FILTER_INSERT",  "_ZN6impala16InListFilterImplIaLNS_13PrimitiveTypeE3EE6InsertEPKv"],
+  ["SMALLINT_IN_LIST_FILTER_INSERT", "_ZN6impala16InListFilterImplIsLNS_13PrimitiveTypeE4EE6InsertEPKv"],
+  ["INT_IN_LIST_FILTER_INSERT",      "_ZN6impala16InListFilterImplIiLNS_13PrimitiveTypeE5EE6InsertEPKv"],
+  ["BIGINT_IN_LIST_FILTER_INSERT",   "_ZN6impala16InListFilterImplIlLNS_13PrimitiveTypeE6EE6InsertEPKv"],
+  ["DATE_IN_LIST_FILTER_INSERT",     "_ZN6impala16InListFilterImplIiLNS_13PrimitiveTypeE11EE6InsertEPKv"],
+  ["STRING_IN_LIST_FILTER_INSERT",  "_ZN6impala16InListFilterImplINS_11StringValueELNS_13PrimitiveTypeE10EE6InsertEPKv"],
+  ["CHAR_IN_LIST_FILTER_INSERT",    "_ZN6impala16InListFilterImplINS_11StringValueELNS_13PrimitiveTypeE15EE6InsertEPKv"],
+  ["VARCHAR_IN_LIST_FILTER_INSERT", "_ZN6impala16InListFilterImplINS_11StringValueELNS_13PrimitiveTypeE16EE6InsertEPKv"],
   ["KRPC_DSS_GET_PART_EXPR_EVAL",
   "_ZN6impala20KrpcDataStreamSender25GetPartitionExprEvaluatorEi"],
   ["KRPC_DSS_HASH_AND_ADD_ROWS",
diff --git a/be/src/exec/filter-context.cc b/be/src/exec/filter-context.cc
index ae72596b2..886efc4fe 100644
--- a/be/src/exec/filter-context.cc
+++ b/be/src/exec/filter-context.cc
@@ -127,6 +127,8 @@ void FilterContext::InsertPerCompareOp(TupleRow* row) const noexcept {
 void FilterContext::MaterializeValues() const {
   if (filter->is_min_max_filter() && local_min_max_filter != nullptr) {
     local_min_max_filter->MaterializeValues();
+  } else if (filter->is_in_list_filter() && local_in_list_filter != nullptr) {
+    local_in_list_filter->MaterializeValues();
   }
 }
 
@@ -388,6 +390,8 @@ Status FilterContext::CodegenInsert(LlvmCodeGen* codegen, ScalarExpr* filter_exp
   llvm::Value* row_arg = args[1];
 
   llvm::Value* local_filter_arg;
+  // The function for inserting into the in-list filter.
+  llvm::Function* insert_in_list_filter_fn = nullptr;
   if (filter_desc.type == TRuntimeFilterType::BLOOM) {
     // Load 'local_bloom_filter' from 'this_arg' FilterContext object.
     llvm::Value* local_bloom_filter_ptr =
@@ -410,6 +414,51 @@ Status FilterContext::CodegenInsert(LlvmCodeGen* codegen, ScalarExpr* filter_exp
     // Load 'local_in_list_filter' from 'this_arg' FilterContext object.
     llvm::Value* local_in_list_filter_ptr =
         builder.CreateStructGEP(nullptr, this_arg, 5, "local_in_list_filter_ptr");
+    switch (filter_expr->type().type) {
+      case TYPE_TINYINT:
+        insert_in_list_filter_fn = codegen->GetFunction(
+            IRFunction::TINYINT_IN_LIST_FILTER_INSERT, false);
+        break;
+      case TYPE_SMALLINT:
+        insert_in_list_filter_fn = codegen->GetFunction(
+            IRFunction::SMALLINT_IN_LIST_FILTER_INSERT, false);
+        break;
+      case TYPE_INT:
+        insert_in_list_filter_fn = codegen->GetFunction(
+            IRFunction::INT_IN_LIST_FILTER_INSERT, false);
+        break;
+      case TYPE_BIGINT:
+        insert_in_list_filter_fn = codegen->GetFunction(
+            IRFunction::BIGINT_IN_LIST_FILTER_INSERT, false);
+        break;
+      case TYPE_DATE:
+        insert_in_list_filter_fn = codegen->GetFunction(
+            IRFunction::DATE_IN_LIST_FILTER_INSERT, false);
+        break;
+      case TYPE_STRING:
+        insert_in_list_filter_fn = codegen->GetFunction(
+            IRFunction::STRING_IN_LIST_FILTER_INSERT, false);
+        break;
+      case TYPE_CHAR:
+        insert_in_list_filter_fn = codegen->GetFunction(
+            IRFunction::CHAR_IN_LIST_FILTER_INSERT, false);
+        break;
+      case TYPE_VARCHAR:
+        insert_in_list_filter_fn = codegen->GetFunction(
+            IRFunction::VARCHAR_IN_LIST_FILTER_INSERT, false);
+        break;
+      default:
+        DCHECK(false);
+        break;
+    }
+    // Get type of the InListFilterImpl class from the first arg of the Insert() method.
+    // We can't hardcode the class name since it's a template class. The class name will
+    // be something like "class.impala::InListFilterImpl.1408". The last number is a
+    // unique id appended by LLVM at runtime.
+    llvm::Type* filter_impl_type = insert_in_list_filter_fn->arg_begin()->getType();
+    llvm::PointerType* in_list_filter_type = codegen->GetPtrType(filter_impl_type);
+    local_in_list_filter_ptr = builder.CreatePointerCast(
+        local_in_list_filter_ptr, in_list_filter_type, "cast_in_list_filter_ptr");
     local_filter_arg =
         builder.CreateLoad(local_in_list_filter_ptr, "local_in_list_filter_arg");
   }
@@ -534,11 +583,7 @@ Status FilterContext::CodegenInsert(LlvmCodeGen* codegen, ScalarExpr* filter_exp
     builder.CreateCall(min_max_insert_fn, insert_filter_args);
   } else {
     DCHECK(filter_desc.type == TRuntimeFilterType::IN_LIST);
-    // The function for inserting into the in-list filter.
-    llvm::Function* insert_in_list_filter_fn =
-        codegen->GetFunction(IRFunction::IN_LIST_FILTER_INSERT, false);
     DCHECK(insert_in_list_filter_fn != nullptr);
-
     llvm::Value* insert_filter_args[] = {local_filter_arg, val_ptr_phi};
     builder.CreateCall(insert_in_list_filter_fn, insert_filter_args);
   }
diff --git a/be/src/exec/hdfs-orc-scanner.cc b/be/src/exec/hdfs-orc-scanner.cc
index da55f7782..fd702550e 100644
--- a/be/src/exec/hdfs-orc-scanner.cc
+++ b/be/src/exec/hdfs-orc-scanner.cc
@@ -1423,31 +1423,8 @@ bool HdfsOrcScanner::UpdateSearchArgumentWithFilters(orc::SearchArgumentBuilder*
 
     VLOG_FILE << "Generating ORC IN-list for filter " << filter->id();
     std::vector<orc::Literal> in_list;
+    in_list_filter->ToOrcLiteralList(&in_list);
     const ColumnType& col_type = filter->type();
-    switch(col_type.type) {
-      case TYPE_TINYINT:
-      case TYPE_SMALLINT:
-      case TYPE_INT:
-      case TYPE_BIGINT: {
-        for (int64_t v : in_list_filter->values_) {
-          in_list.emplace_back(v);
-        }
-        break;
-      }
-      case TYPE_DATE: {
-        for (int64_t v : in_list_filter->values_) {
-          in_list.emplace_back(orc::PredicateDataType::DATE, v);
-        }
-        break;
-      }
-      case TYPE_STRING: {
-        for (const string& str : in_list_filter->str_values_) {
-          in_list.emplace_back(str.c_str(), str.length());
-        }
-        break;
-      }
-      default: break;
-    }
     if (in_list_filter->ContainsNull()) {
       // Add a null literal with type.
       in_list.emplace_back(GetOrcPredicateDataType(col_type));
diff --git a/be/src/runtime/runtime-filter-bank.cc b/be/src/runtime/runtime-filter-bank.cc
index 86b78c4f9..7e01c0ee5 100644
--- a/be/src/runtime/runtime-filter-bank.cc
+++ b/be/src/runtime/runtime-filter-bank.cc
@@ -380,7 +380,7 @@ void RuntimeFilterBank::PublishGlobalFilter(
     DCHECK(query_state_->query_options().__isset.runtime_in_list_filter_entry_limit);
     int entry_limit = query_state_->query_options().runtime_in_list_filter_entry_limit;
     in_list_filter = InListFilter::Create(params.in_list_filter(),
-        fs->consumed_filter->type(), entry_limit, &obj_pool_);
+        fs->consumed_filter->type(), entry_limit, &obj_pool_, filter_mem_tracker_);
     fs->in_list_filters.push_back(in_list_filter);
     total_in_list_filter_items_->Add(params.in_list_filter().value_size());
     details = Substitute(" with $0 items", params.in_list_filter().value_size());
@@ -442,7 +442,7 @@ InListFilter* RuntimeFilterBank::AllocateScratchInListFilter(
   DCHECK(query_state_->query_options().__isset.runtime_in_list_filter_entry_limit);
   int32_t entry_limit = query_state_->query_options().runtime_in_list_filter_entry_limit;
   InListFilter* in_list_filter =
-      InListFilter::Create(type, entry_limit, &obj_pool_);
+      InListFilter::Create(type, entry_limit, &obj_pool_, filter_mem_tracker_);
   fs->in_list_filters.push_back(in_list_filter);
   return in_list_filter;
 }
diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt
index f0d844abe..69622a5b6 100644
--- a/be/src/util/CMakeLists.txt
+++ b/be/src/util/CMakeLists.txt
@@ -158,6 +158,7 @@ add_library(UtilTests STATIC
   fixed-size-hash-table-test.cc
   hdfs-util-test.cc
   hdr-histogram-test.cc
+  in-list-filter-test.cc
   jwt-util-test.cc
   logging-support-test.cc
   lru-multi-cache-test.cc
@@ -230,6 +231,7 @@ ADD_UNIFIED_BE_LSAN_TEST(hdr-histogram-test HdrHistogramTest.*)
 # internal-queue-test has a non-standard main(), so it needs a small amount of thought
 # to use a unified executable
 ADD_BE_LSAN_TEST(internal-queue-test)
+ADD_UNIFIED_BE_LSAN_TEST(in-list-filter-test "InListFilterTest.*")
 ADD_UNIFIED_BE_LSAN_TEST(jwt-util-test "JwtUtilTest.*")
 ADD_UNIFIED_BE_LSAN_TEST(lru-multi-cache-test "LruMultiCache.*")
 ADD_UNIFIED_BE_LSAN_TEST(logging-support-test "LoggingSupport.*")
diff --git a/be/src/util/in-list-filter-ir.cc b/be/src/util/in-list-filter-ir.cc
index d57395445..9e532b418 100644
--- a/be/src/util/in-list-filter-ir.cc
+++ b/be/src/util/in-list-filter-ir.cc
@@ -20,56 +20,81 @@
 
 namespace impala {
 
-void InListFilter::Insert(const void* val) {
-  if (always_true_) return;
-  if (UNLIKELY(val == nullptr)) {
-    contains_null_ = true;
-    return;
-  }
-  if (UNLIKELY(values_.size() >= entry_limit_ || str_values_.size() >= entry_limit_)) {
-    always_true_ = true;
-    values_.clear();
-    str_values_.clear();
-    return;
-  }
-  switch (type_) {
-    case TYPE_TINYINT:
-      values_.insert(*reinterpret_cast<const int8_t*>(val));
-      break;
-    case TYPE_SMALLINT:
-      values_.insert(*reinterpret_cast<const int16_t*>(val));
-      break;
-    case TYPE_INT:
-      values_.insert(*reinterpret_cast<const int32_t*>(val));
-      break;
-    case TYPE_BIGINT:
-      values_.insert(*reinterpret_cast<const int64_t*>(val));
-      break;
-    case TYPE_DATE:
-      values_.insert(reinterpret_cast<const DateValue*>(val)->Value());
-      break;
-    case TYPE_STRING:
-    case TYPE_VARCHAR: {
-      const StringValue* s = reinterpret_cast<const StringValue*>(val);
-      if (UNLIKELY(s->ptr == nullptr)) {
-        contains_null_ = true;
-      } else {
-        str_total_size_ += s->len;
-        if (str_total_size_ >= STRING_SET_MAX_TOTAL_LENGTH) {
-          always_true_ = true;
-          str_values_.clear();
-          return;
-        }
-        str_values_.insert(string(s->ptr, s->len));
-      }
-      break;
-    }
-    case TYPE_CHAR:
-      str_values_.insert(string(reinterpret_cast<const char*>(val), type_len_));
-      break;
-    default:
-      DCHECK(false) << "Not supported IN-list filter type: " << TypeToString(type_);
-      break;
+template<>
+int32_t InListFilterImpl<int32_t, TYPE_DATE>::GetValue(const void* val) {
+  return reinterpret_cast<const DateValue*>(val)->Value();
+}
+
+#define NUMERIC_IN_LIST_FILTER_FUNCTIONS(TYPE, SLOT_TYPE)                             \
+  template<>                                                                          \
+  void InListFilterImpl<TYPE, SLOT_TYPE>::Insert(const void* val) {                   \
+    if (UNLIKELY(always_true_)) return;                                               \
+    if (UNLIKELY(val == nullptr)) {                                                   \
+      contains_null_ = true;                                                          \
+      return;                                                                         \
+    }                                                                                 \
+    const auto& res = values_.insert(GetValue(val));                                  \
+    if (res.second) {                                                                 \
+      ++total_entries_;                                                               \
+      if (UNLIKELY(total_entries_ > entry_limit_)) {                                  \
+        Reset();                                                                      \
+      }                                                                               \
+    }                                                                                 \
+  }                                                                                   \
+                                                                                      \
+  template<>                                                                          \
+  bool InListFilterImpl<TYPE, SLOT_TYPE>::Find(const void* val,                       \
+      const ColumnType& col_type) const noexcept {                                    \
+    if (always_true_) return true;                                                    \
+    if (val == nullptr) return contains_null_;                                        \
+    return values_.find(GetValue(val)) != values_.end();                              \
   }
+
+NUMERIC_IN_LIST_FILTER_FUNCTIONS(int8_t, TYPE_TINYINT)
+NUMERIC_IN_LIST_FILTER_FUNCTIONS(int16_t, TYPE_SMALLINT)
+NUMERIC_IN_LIST_FILTER_FUNCTIONS(int32_t, TYPE_INT)
+NUMERIC_IN_LIST_FILTER_FUNCTIONS(int64_t, TYPE_BIGINT)
+NUMERIC_IN_LIST_FILTER_FUNCTIONS(int32_t, TYPE_DATE)
+
+template<>
+StringValue InListFilterImpl<StringValue, TYPE_CHAR>::GetValue(const void* val,
+    int char_type_len) {
+  return {const_cast<char*>(reinterpret_cast<const char*>(val)), char_type_len};
 }
+
+#define STRING_IN_LIST_FILTER_FUNCTIONS(SLOT_TYPE)                                      \
+  template<>                                                                            \
+  void InListFilterImpl<StringValue, SLOT_TYPE>::Insert(const void* val) {              \
+    if (always_true_) return;                                                           \
+    if (UNLIKELY(val == nullptr)) {                                                     \
+      contains_null_ = true;                                                            \
+      return;                                                                           \
+    }                                                                                   \
+    StringValue s = GetValue(val, type_len_);                                           \
+    if (!values_.find(s)) {                                                             \
+      bool res = newly_inserted_values_.insert(s);                                      \
+      if (res) {                                                                        \
+        ++total_entries_;                                                               \
+        uint32_t str_total_len = values_.total_len + newly_inserted_values_.total_len;  \
+        if (UNLIKELY(total_entries_ > entry_limit_                                      \
+            || str_total_len >= STRING_SET_MAX_TOTAL_LENGTH)) {                         \
+          Reset();                                                                      \
+        }                                                                               \
+      }                                                                                 \
+    }                                                                                   \
+  }                                                                                     \
+                                                                                        \
+  template<>                                                                            \
+  bool InListFilterImpl<StringValue, SLOT_TYPE>::Find(const void* val,                  \
+      const ColumnType& col_type) const noexcept {                                      \
+    if (always_true_) return true;                                                      \
+    if (val == nullptr) return contains_null_;                                          \
+    StringValue s = GetValue(val, type_len_);                                           \
+    return values_.find(s);                                                             \
+  }
+
+STRING_IN_LIST_FILTER_FUNCTIONS(TYPE_STRING)
+STRING_IN_LIST_FILTER_FUNCTIONS(TYPE_VARCHAR)
+STRING_IN_LIST_FILTER_FUNCTIONS(TYPE_CHAR)
+
 } // namespace impala
diff --git a/be/src/util/in-list-filter-test.cc b/be/src/util/in-list-filter-test.cc
new file mode 100644
index 000000000..9db92486a
--- /dev/null
+++ b/be/src/util/in-list-filter-test.cc
@@ -0,0 +1,203 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "testutil/gtest-util.h"
+#include "util/in-list-filter.h"
+
+#include "runtime/date-value.h"
+#include "runtime/test-env.h"
+
+using namespace impala;
+
+template<typename T, PrimitiveType SLOT_TYPE>
+void TestNumericInListFilter() {
+  MemTracker mem_tracker;
+  ObjectPool obj_pool;
+  ColumnType col_type(SLOT_TYPE);
+  InListFilter* filter = InListFilter::Create(col_type, 20, &obj_pool, &mem_tracker);
+  EXPECT_TRUE(filter->AlwaysFalse());
+  EXPECT_FALSE(filter->AlwaysTrue());
+
+  for (T v = -10; v < 10; ++v) {
+    filter->Insert(&v);
+  }
+  // Insert duplicated values again
+  for (T v = 9; v >= 0; --v) {
+    filter->Insert(&v);
+  }
+  EXPECT_EQ(20, filter->NumItems());
+  EXPECT_FALSE(filter->ContainsNull());
+  filter->Insert(nullptr);
+  EXPECT_TRUE(filter->ContainsNull());
+  EXPECT_EQ(21, filter->NumItems());
+
+  for (T v = -10; v < 10; ++v) {
+    EXPECT_TRUE(filter->Find(&v, col_type));
+  }
+  T i = -11;
+  EXPECT_FALSE(filter->Find(&i, col_type));
+  i = 10;
+  EXPECT_FALSE(filter->Find(&i, col_type));
+
+  EXPECT_FALSE(filter->AlwaysFalse());
+  EXPECT_FALSE(filter->AlwaysTrue());
+
+  // Test falling back to an always_true filter when #items exceeds the limit
+  filter->Insert(&i);
+  EXPECT_FALSE(filter->AlwaysFalse());
+  EXPECT_TRUE(filter->AlwaysTrue());
+  EXPECT_EQ(0, filter->NumItems());
+}
+
+TEST(InListFilterTest, TestTinyint) {
+  TestNumericInListFilter<int8_t, TYPE_TINYINT>();
+}
+
+TEST(InListFilterTest, TestSmallint) {
+  TestNumericInListFilter<int16_t, TYPE_SMALLINT>();
+}
+
+TEST(InListFilterTest, TestInt) {
+  TestNumericInListFilter<int32_t, TYPE_INT>();
+}
+
+TEST(InListFilterTest, TestBigint) {
+  TestNumericInListFilter<int64_t, TYPE_BIGINT>();
+}
+
+TEST(InListFilterTest, TestDate) {
+  MemTracker mem_tracker;
+  ObjectPool obj_pool;
+  ColumnType col_type(TYPE_DATE);
+  InListFilter* filter = InListFilter::Create(col_type, 5, &obj_pool, &mem_tracker);
+  EXPECT_TRUE(filter->AlwaysFalse());
+  EXPECT_FALSE(filter->AlwaysTrue());
+
+  vector<DateValue> values;
+  for (int i = 1; i <= 5; ++i) {
+    values.emplace_back(i * 10000);
+  }
+
+  for (const auto& v : values) {
+    filter->Insert(&v);
+  }
+  // Insert duplicated values again
+  for (const auto& v : values) {
+    filter->Insert(&v);
+  }
+  EXPECT_EQ(5, filter->NumItems());
+  EXPECT_FALSE(filter->ContainsNull());
+  filter->Insert(nullptr);
+  EXPECT_TRUE(filter->ContainsNull());
+  EXPECT_EQ(6, filter->NumItems());
+
+  for (const auto& v : values) {
+    EXPECT_TRUE(filter->Find(&v, col_type));
+  }
+  DateValue d(60000);
+  EXPECT_FALSE(filter->Find(&d, col_type));
+}
+
+void TestStringInListFilter(const ColumnType& col_type) {
+  MemTracker mem_tracker;
+  ObjectPool obj_pool;
+  InListFilter* filter = InListFilter::Create(col_type, 5, &obj_pool, &mem_tracker);
+  EXPECT_TRUE(filter->AlwaysFalse());
+  EXPECT_FALSE(filter->AlwaysTrue());
+
+  vector<StringValue> ss1 = { StringValue("aaa"), StringValue("aa") };
+  vector<StringValue> ss2 = { StringValue("a"), StringValue("b"), StringValue("c") };
+
+  // Insert the first batch
+  for (const StringValue& s : ss1) {
+    filter->Insert(&s);
+  }
+  filter->MaterializeValues();
+  // Insert the second batch with some duplicated values
+  for (const StringValue& s : ss2) {
+    filter->Insert(&s);
+  }
+  for (const StringValue& s : ss1) {
+    filter->Insert(&s);
+  }
+  filter->MaterializeValues();
+
+  EXPECT_EQ(5, filter->NumItems());
+  EXPECT_FALSE(filter->ContainsNull());
+  filter->Insert(nullptr);
+  EXPECT_TRUE(filter->ContainsNull());
+  EXPECT_EQ(6, filter->NumItems());
+
+  // Merge ss2 to ss1
+  ss1.insert(ss1.end(), ss2.begin(), ss2.end());
+  for (const StringValue& s : ss1) {
+    EXPECT_TRUE(filter->Find(&s, col_type));
+  }
+  StringValue d("d");
+  EXPECT_FALSE(filter->Find(&d, col_type));
+  filter->Close();
+}
+
+TEST(InListFilterTest, TestString) {
+  ColumnType col_type(TYPE_STRING);
+  TestStringInListFilter(col_type);
+}
+
+TEST(InListFilterTest, TestVarchar) {
+  ColumnType col_type = ColumnType::CreateVarcharType(10);
+  TestStringInListFilter(col_type);
+}
+
+TEST(InListFilterTest, TestChar) {
+  MemTracker mem_tracker;
+  ObjectPool obj_pool;
+  ColumnType col_type = ColumnType::CreateCharType(2);
+  InListFilter* filter = InListFilter::Create(col_type, 5, &obj_pool, &mem_tracker);
+  EXPECT_TRUE(filter->AlwaysFalse());
+  EXPECT_FALSE(filter->AlwaysTrue());
+
+  char str_buffer[] = "aabbccddeeff";
+  const char* ptr = str_buffer;
+  // Insert 3 values first
+  for (int i = 0; i < 3; ++i) {
+    filter->Insert(ptr);
+    ptr += 2;
+  }
+  filter->MaterializeValues();
+  // Insert the 5 all values
+  ptr = str_buffer;
+  for (int i = 0; i < 5; ++i) {
+    filter->Insert(ptr);
+    ptr += 2;
+  }
+  filter->MaterializeValues();
+
+  EXPECT_EQ(5, filter->NumItems());
+  EXPECT_FALSE(filter->ContainsNull());
+  filter->Insert(nullptr);
+  EXPECT_TRUE(filter->ContainsNull());
+  EXPECT_EQ(6, filter->NumItems());
+
+  ptr = str_buffer;
+  for (int i = 0; i < 5; ++i) {
+    EXPECT_TRUE(filter->Find(ptr, col_type));
+    ptr += 2;
+  }
+  ptr = "gg";
+  EXPECT_FALSE(filter->Find(ptr, col_type));
+  filter->Close();
+}
\ No newline at end of file
diff --git a/be/src/util/in-list-filter.cc b/be/src/util/in-list-filter.cc
index 7dfc0eda3..8749deda2 100644
--- a/be/src/util/in-list-filter.cc
+++ b/be/src/util/in-list-filter.cc
@@ -18,96 +18,64 @@
 #include "util/in-list-filter.h"
 
 #include "common/object-pool.h"
+#include "runtime/string-value.inline.h"
 
 namespace impala {
 
-bool InListFilter::AlwaysFalse() {
-  return !always_true_ && !contains_null_ && values_.empty() && str_values_.empty();
+InListFilter::InListFilter(uint32_t entry_limit, bool contains_null):
+  entry_limit_(entry_limit), always_true_(false), contains_null_(contains_null) {
 }
 
 bool InListFilter::AlwaysFalse(const InListFilterPB& filter) {
   return !filter.always_true() && !filter.contains_null() && filter.value_size() == 0;
 }
 
-bool InListFilter::Find(void* val, const ColumnType& col_type) const noexcept {
-  if (always_true_) return true;
-  if (val == nullptr) return contains_null_;
-  DCHECK_EQ(type_, col_type.type);
-  int64_t v;
-  const StringValue* s;
-  switch (col_type.type) {
+InListFilter* InListFilter::Create(ColumnType type, uint32_t entry_limit,
+    ObjectPool* pool, MemTracker* mem_tracker, bool contains_null) {
+  InListFilter* res;
+  switch (type.type) {
     case TYPE_TINYINT:
-      v = *reinterpret_cast<const int8_t*>(val);
+      res = new InListFilterImpl<int8_t, TYPE_TINYINT>(entry_limit, contains_null);
       break;
     case TYPE_SMALLINT:
-      v = *reinterpret_cast<const int16_t*>(val);
+      res = new InListFilterImpl<int16_t, TYPE_SMALLINT>(entry_limit, contains_null);
       break;
     case TYPE_INT:
-      v = *reinterpret_cast<const int32_t*>(val);
+      res = new InListFilterImpl<int32_t, TYPE_INT>(entry_limit, contains_null);
       break;
     case TYPE_BIGINT:
-      v = *reinterpret_cast<const int64_t*>(val);
+      res = new InListFilterImpl<int64_t, TYPE_BIGINT>(entry_limit, contains_null);
       break;
     case TYPE_DATE:
-      v = reinterpret_cast<const DateValue*>(val)->Value();
+      // We use int32_t for DATE type as well
+      res = new InListFilterImpl<int32_t, TYPE_DATE>(entry_limit, contains_null);
       break;
     case TYPE_STRING:
+      res = new InListFilterImpl<StringValue, TYPE_STRING>(type, entry_limit,
+          mem_tracker, contains_null);
+      break;
     case TYPE_VARCHAR:
-      s = reinterpret_cast<const StringValue*>(val);
-      return str_values_.find(string(s->ptr, s->len)) != str_values_.end();
+      res = new InListFilterImpl<StringValue, TYPE_VARCHAR>(type, entry_limit,
+          mem_tracker, contains_null);
+      break;
     case TYPE_CHAR:
-      return str_values_.find(string(reinterpret_cast<const char*>(val), col_type.len))
-          != str_values_.end();
+      res = new InListFilterImpl<StringValue, TYPE_CHAR>(type, entry_limit,
+          mem_tracker, contains_null);
+      break;
     default:
-      DCHECK(false) << "Not support IN-list filter type: " << TypeToString(type_);
-      return false;
+      DCHECK(false) << "Not support IN-list filter type: " << TypeToString(type.type);
+      return nullptr;
   }
-  return values_.find(v) != values_.end();
-}
-
-InListFilter::InListFilter(ColumnType type, uint32_t entry_limit, bool contains_null):
-  always_true_(false), contains_null_(contains_null), type_(type.type),
-  entry_limit_(entry_limit) {
-  if (type.type == TYPE_CHAR) type_len_ = type.len;
-}
-
-InListFilter* InListFilter::Create(ColumnType type, uint32_t entry_limit,
-    ObjectPool* pool) {
-  return pool->Add(new InListFilter(type, entry_limit));
+  return pool->Add(res);
 }
 
 InListFilter* InListFilter::Create(const InListFilterPB& protobuf, ColumnType type,
-    uint32_t entry_limit, ObjectPool* pool) {
-  InListFilter* filter = pool->Add(
-      new InListFilter(type, entry_limit, protobuf.contains_null()));
+    uint32_t entry_limit, ObjectPool* pool, MemTracker* mem_tracker) {
+  InListFilter* filter = InListFilter::Create(type, entry_limit, pool, mem_tracker,
+      protobuf.contains_null());
   filter->always_true_ = protobuf.always_true();
-  for (const ColumnValuePB& v : protobuf.value()) {
-    switch (type.type) {
-      case TYPE_TINYINT:
-      case TYPE_SMALLINT:
-      case TYPE_INT:
-      case TYPE_BIGINT:
-      case TYPE_DATE:
-        DCHECK(v.has_long_val());
-        filter->values_.insert(v.long_val());
-        break;
-      case TYPE_STRING:
-      case TYPE_CHAR:
-      case TYPE_VARCHAR:
-        DCHECK(v.has_string_val());
-        // TODO(IMPALA-11143): use mem_tracker
-        filter->str_values_.insert(v.string_val());
-        break;
-      default:
-        DCHECK(false) << "Not support IN-list filter type: " << TypeToString(type.type);
-        return nullptr;
-    }
-  }
-  if (type.IsStringType()) {
-    DCHECK(filter->values_.empty());
-  } else {
-    DCHECK(filter->str_values_.empty());
-  }
+  filter->InsertBatch(protobuf.value());
+  filter->MaterializeValues();
   return filter;
 }
 
@@ -120,59 +88,10 @@ void InListFilter::ToProtobuf(const InListFilter* filter, InListFilterPB* protob
   filter->ToProtobuf(protobuf);
 }
 
-void InListFilter::ToProtobuf(InListFilterPB* protobuf) const {
-  protobuf->set_always_true(always_true_);
-  if (always_true_) return;
-  protobuf->set_contains_null(contains_null_);
-  if (type_ == TYPE_STRING || type_ == TYPE_VARCHAR || type_ == TYPE_CHAR) {
-    for (const string& s : str_values_) {
-      ColumnValuePB* proto = protobuf->add_value();
-      proto->set_string_val(s);
-    }
-  } else {
-    for (int64_t v : values_) {
-      ColumnValuePB* proto = protobuf->add_value();
-      proto->set_long_val(v);
-    }
-  }
-}
-
-int InListFilter::NumItems() const noexcept {
-  int res = contains_null_ ? 1 : 0;
-  if (type_ == TYPE_STRING || type_ == TYPE_VARCHAR || type_ == TYPE_CHAR) {
-    return res + str_values_.size();
-  }
-  return res + values_.size();
-}
-
 string InListFilter::DebugString() const noexcept {
   std::stringstream ss;
-  bool first_value = true;
-  ss << "IN-list filter: [";
-  if (type_ == TYPE_STRING) {
-    for (const string &s : str_values_) {
-      if (first_value) {
-        first_value = false;
-      } else {
-        ss << ',';
-      }
-      ss << "\"" << s << "\"";
-    }
-  } else {
-    for (int64_t v : values_) {
-      if (first_value) {
-        first_value = false;
-      } else {
-        ss << ',';
-      }
-      ss << v;
-    }
-  }
-  if (contains_null_) {
-    if (!first_value) ss << ',';
-    ss << "NULL";
-  }
-  ss << ']';
+  ss << "IN-list filter of " << total_entries_ << " items";
+  if (contains_null_) ss << " with NULL";
   return ss.str();
 }
 
@@ -192,22 +111,115 @@ string InListFilter::DebugStringOfList(const InListFilterPB& filter) {
     } else {
       ss << ',';
     }
-    if (v.has_byte_val()) {
-      ss << v.byte_val();
-    } else if (v.has_short_val()) {
-      ss << v.short_val();
-    } else if (v.has_int_val()) {
-      ss << v.int_val();
-    } else if (v.has_long_val()) {
-      ss << v.long_val();
-    } else if (v.has_date_val()) {
-      ss << v.date_val();
-    } else if (v.has_string_val()) {
-      ss << "\"" << v.string_val() << "\"";
-    }
+    ss << v.ShortDebugString();
   }
   ss << ']';
   return ss.str();
 }
 
+template<PrimitiveType SLOT_TYPE>
+void InListFilterImpl<StringValue, SLOT_TYPE>::MaterializeValues() {
+  if (newly_inserted_values_.total_len == 0) {
+    if (!newly_inserted_values_.values.empty()) {
+      // Newly inserted values are all empty strings. Don't need to allocate memory.
+      values_.values.insert(
+          newly_inserted_values_.values.begin(), newly_inserted_values_.values.end());
+    }
+    return;
+  }
+  uint8_t* buffer = mem_pool_.Allocate(newly_inserted_values_.total_len);
+  if (buffer == nullptr) {
+    VLOG_QUERY << "Not enough memory in materializing string IN-list filters. "
+        << "Fallback to always true. New string batch size: "
+        << newly_inserted_values_.total_len << "\n" << mem_pool_.DebugString();
+    always_true_ = true;
+    values_.clear();
+    newly_inserted_values_.clear();
+    total_entries_ = 0;
+    return;
+  }
+  // Transfer values to the finial set. Don't need to update total_entries_ since it's
+  // already done in Insert().
+  for (const StringValue& s : newly_inserted_values_.values) {
+    Ubsan::MemCpy(buffer, s.ptr, s.len);
+    values_.insert(StringValue(reinterpret_cast<char*>(buffer), s.len));
+    buffer += s.len;
+  }
+  newly_inserted_values_.clear();
+}
+
+#define IN_LIST_FILTER_INSERT_BATCH(TYPE, SLOT_TYPE, PB_VAL_METHOD)                      \
+  template<>                                                                             \
+  void InListFilterImpl<TYPE, SLOT_TYPE>::InsertBatch(const ColumnValueBatchPB& batch) { \
+    for (const ColumnValuePB& v : batch) {                                               \
+      DCHECK(v.has_##PB_VAL_METHOD());                                                   \
+      values_.insert(v.PB_VAL_METHOD());                                                 \
+    }                                                                                    \
+  }
+
+IN_LIST_FILTER_INSERT_BATCH(int8_t, TYPE_TINYINT, byte_val)
+IN_LIST_FILTER_INSERT_BATCH(int16_t, TYPE_SMALLINT, short_val)
+IN_LIST_FILTER_INSERT_BATCH(int32_t, TYPE_INT, int_val)
+IN_LIST_FILTER_INSERT_BATCH(int64_t, TYPE_BIGINT, long_val)
+IN_LIST_FILTER_INSERT_BATCH(int32_t, TYPE_DATE, int_val)
+IN_LIST_FILTER_INSERT_BATCH(StringValue, TYPE_STRING, string_val)
+IN_LIST_FILTER_INSERT_BATCH(StringValue, TYPE_VARCHAR, string_val)
+IN_LIST_FILTER_INSERT_BATCH(StringValue, TYPE_CHAR, string_val)
+
+#define NUMERIC_IN_LIST_FILTER_TO_PROTOBUF(TYPE, SLOT_TYPE, PB_VAL_METHOD)             \
+  template<>                                                                           \
+  void InListFilterImpl<TYPE, SLOT_TYPE>::ToProtobuf(InListFilterPB* protobuf) const { \
+    protobuf->set_always_true(always_true_);                                           \
+    if (always_true_) return;                                                          \
+    protobuf->set_contains_null(contains_null_);                                       \
+    for (TYPE v : values_) {                                                           \
+      ColumnValuePB* proto = protobuf->add_value();                                    \
+      proto->set_##PB_VAL_METHOD(v);                                                   \
+    }                                                                                  \
+  }
+
+NUMERIC_IN_LIST_FILTER_TO_PROTOBUF(int8_t, TYPE_TINYINT, byte_val)
+NUMERIC_IN_LIST_FILTER_TO_PROTOBUF(int16_t, TYPE_SMALLINT, short_val)
+NUMERIC_IN_LIST_FILTER_TO_PROTOBUF(int32_t, TYPE_INT, int_val)
+NUMERIC_IN_LIST_FILTER_TO_PROTOBUF(int64_t, TYPE_BIGINT, long_val)
+NUMERIC_IN_LIST_FILTER_TO_PROTOBUF(int32_t, TYPE_DATE, long_val)
+
+#define STRING_IN_LIST_FILTER_TO_PROTOBUF(SLOT_TYPE)                                   \
+  template<>                                                                           \
+  void InListFilterImpl<StringValue, SLOT_TYPE>::ToProtobuf(InListFilterPB* protobuf)  \
+      const {                                                                          \
+    protobuf->set_always_true(always_true_);                                           \
+    if (always_true_) return;                                                          \
+    protobuf->set_contains_null(contains_null_);                                       \
+    for (const StringValue& v : values_.values) {                                      \
+      ColumnValuePB* proto = protobuf->add_value();                                    \
+      proto->set_string_val(v.ptr, v.len);                                             \
+    }                                                                                  \
+  }
+
+STRING_IN_LIST_FILTER_TO_PROTOBUF(TYPE_STRING)
+STRING_IN_LIST_FILTER_TO_PROTOBUF(TYPE_VARCHAR)
+STRING_IN_LIST_FILTER_TO_PROTOBUF(TYPE_CHAR)
+
+template<>
+void InListFilterImpl<int32_t, TYPE_DATE>::ToOrcLiteralList(
+    vector<orc::Literal>* in_list) {
+  for (int32_t v : values_) {
+    in_list->emplace_back(orc::PredicateDataType::DATE, v);
+  }
+}
+
+#define STRING_IN_LIST_FILTER_TO_ORC_LITERAL_LIST(SLOT_TYPE)            \
+  template<>                                                            \
+  void InListFilterImpl<StringValue, SLOT_TYPE>::ToOrcLiteralList(      \
+      vector<orc::Literal>* in_list) {                                  \
+    for (const StringValue& s : values_.values) {                       \
+      in_list->emplace_back(s.ptr, s.len);                              \
+    }                                                                   \
+  }
+
+STRING_IN_LIST_FILTER_TO_ORC_LITERAL_LIST(TYPE_STRING)
+STRING_IN_LIST_FILTER_TO_ORC_LITERAL_LIST(TYPE_VARCHAR)
+STRING_IN_LIST_FILTER_TO_ORC_LITERAL_LIST(TYPE_CHAR)
+
 } // namespace impala
diff --git a/be/src/util/in-list-filter.h b/be/src/util/in-list-filter.h
index 5a209fef9..cb2e7c6b2 100644
--- a/be/src/util/in-list-filter.h
+++ b/be/src/util/in-list-filter.h
@@ -17,17 +17,23 @@
 
 #pragma once
 
+#include <boost/unordered_set.hpp>
+#include <orc/sargs/Literal.hh>
+
 #include "gen-cpp/ImpalaInternalService_types.h"
 #include "impala-ir/impala-ir-functions.h"
 #include "runtime/date-value.h"
 #include "runtime/decimal-value.h"
 #include "runtime/string-buffer.h"
 #include "runtime/string-value.h"
+#include "runtime/string-value.inline.h"
 #include "runtime/timestamp-value.h"
 #include "runtime/types.h"
 
 namespace impala {
 
+typedef google::protobuf::RepeatedPtrField<ColumnValuePB> ColumnValueBatchPB;
+
 class InListFilter {
  public:
   /// Upper bound of total length for the string set to avoid it explodes.
@@ -35,63 +41,165 @@ class InListFilter {
   /// configurable.
   const static uint32_t STRING_SET_MAX_TOTAL_LENGTH = 4 * 1024 * 1024;
 
-  InListFilter(ColumnType type, uint32_t entry_limit, bool contains_null = false);
-  ~InListFilter() {}
-  void Close() {}
+  InListFilter(uint32_t entry_limit, bool contains_null = false);
+  virtual ~InListFilter() {}
+  virtual void Close() {}
 
   /// Add a new value to the list.
-  void Insert(const void* val);
+  virtual void Insert(const void* val) = 0;
+
+  /// Materialize filter values by copying any values stored by filters into memory owned
+  /// by the filter. Filters may assume that the memory for Insert()-ed values stays valid
+  /// until this is called. Invoked after inserting a batch.
+  virtual void MaterializeValues() {}
 
   std::string DebugString() const noexcept;
 
   bool ContainsNull() { return contains_null_; }
   bool AlwaysTrue() { return always_true_; }
-  bool AlwaysFalse();
+  bool AlwaysFalse() {
+    return !always_true_ && !contains_null_ && total_entries_ == 0;
+  }
   static bool AlwaysFalse(const InListFilterPB& filter);
 
   /// Makes this filter always return true.
   void SetAlwaysTrue() { always_true_ = true; }
 
-  bool Find(void* val, const ColumnType& col_type) const noexcept;
-  int NumItems() const noexcept;
+  virtual bool Find(const void* val, const ColumnType& col_type) const noexcept = 0;
+  int NumItems() const noexcept {
+    return total_entries_ + (contains_null_ ? 1 : 0);
+  }
+
+  /// Fills the orc::Literal vector with set values (excluding NULL).
+  virtual void ToOrcLiteralList(std::vector<orc::Literal>* in_list) = 0;
 
   /// Returns a new InListFilter with the given type, allocated from 'mem_tracker'.
-  static InListFilter* Create(ColumnType type, uint32_t entry_limit, ObjectPool* pool);
+  static InListFilter* Create(ColumnType type, uint32_t entry_limit, ObjectPool* pool,
+      MemTracker* mem_tracker, bool contains_null = false);
 
   /// Returns a new InListFilter created from the protobuf representation, allocated from
   /// 'mem_tracker'.
   static InListFilter* Create(const InListFilterPB& protobuf, ColumnType type,
-      uint32_t entry_limit, ObjectPool* pool);
+      uint32_t entry_limit, ObjectPool* pool, MemTracker* mem_tracker);
 
   /// Converts 'filter' to its corresponding Protobuf representation.
   /// If the first argument is NULL, it is interpreted as a complete filter which
   /// contains all elements, i.e. always true.
   static void ToProtobuf(const InListFilter* filter, InListFilterPB* protobuf);
 
-  /// Returns the LLVM_CLASS_NAME for this base class 'InListFilter'.
-  static const char* LLVM_CLASS_NAME;
-
   /// Return a debug string for 'filter'
   static std::string DebugString(const InListFilterPB& filter);
   /// Return a debug string for the list of the 'filter'
   static std::string DebugStringOfList(const InListFilterPB& filter);
 
- private:
+ protected:
   friend class HdfsOrcScanner;
-  void ToProtobuf(InListFilterPB* protobuf) const;
 
+  virtual void ToProtobuf(InListFilterPB* protobuf) const = 0;
+
+  /// Insert a batch of protobuf values.
+  virtual void InsertBatch(const ColumnValueBatchPB& batch) = 0;
+
+  uint32_t entry_limit_;
+  uint32_t total_entries_ = 0;
   bool always_true_;
   bool contains_null_;
-  PrimitiveType type_;
-  // Type len for CHAR type.
+};
+
+template<typename T, PrimitiveType SLOT_TYPE>
+class InListFilterImpl : public InListFilter {
+ public:
+  InListFilterImpl(uint32_t entry_limit, bool contains_null = false):
+      InListFilter(entry_limit, contains_null) {}
+  ~InListFilterImpl() {}
+
+  void Insert(const void* val) override;
+  void InsertBatch(const ColumnValueBatchPB& batch) override;
+  bool Find(const void* val, const ColumnType& col_type) const noexcept override;
+
+  void ToProtobuf(InListFilterPB* protobuf) const override;
+  void ToOrcLiteralList(std::vector<orc::Literal>* in_list) override {
+    for (auto v : values_) in_list->emplace_back(static_cast<int64_t>(v));
+  }
+
+  inline void Reset() {
+    always_true_ = true;
+    contains_null_ = false;
+    values_.clear();
+    total_entries_ = 0;
+  }
+
+  inline static T GetValue(const void* val) {
+    return *reinterpret_cast<const T*>(val);
+  }
+ private:
+  std::unordered_set<T> values_;
+};
+
+struct StringSetWithTotalLen {
+  boost::unordered_set<StringValue> values;
+  uint32_t total_len = 0;
+
+  inline bool insert(StringValue v) {
+    const auto& res = values.insert(v);
+    total_len += (res.second? v.len : 0);
+    return res.second;
+  }
+
+  inline bool insert(const string& s) {
+    const auto& res = values.emplace(s);
+    total_len += (res.second ? s.length() : 0);
+    return res.second;
+  }
+
+  inline bool find(StringValue v) const {
+    return values.find(v) != values.end();
+  }
+
+  inline void clear() {
+    values.clear();
+    total_len = 0;
+  }
+};
+
+template<PrimitiveType SLOT_TYPE>
+class InListFilterImpl<StringValue, SLOT_TYPE> : public InListFilter {
+ public:
+  InListFilterImpl(ColumnType type, uint32_t entry_limit, MemTracker* mem_tracker,
+      bool contains_null = false):
+      InListFilter(entry_limit, contains_null), mem_pool_(mem_tracker) {
+    if (SLOT_TYPE == TYPE_CHAR) type_len_ = type.len;
+  }
+  ~InListFilterImpl() {}
+  void Close() override { mem_pool_.FreeAll(); }
+
+  void Insert(const void* val) override;
+  void InsertBatch(const ColumnValueBatchPB& batch) override;
+  void MaterializeValues() override;
+  bool Find(const void* val, const ColumnType& col_type) const noexcept override;
+
+  void ToProtobuf(InListFilterPB* protobuf) const override;
+  void ToOrcLiteralList(std::vector<orc::Literal>* in_list) override;
+
+  inline void Reset() {
+    always_true_ = true;
+    contains_null_ = false;
+    values_.clear();
+    newly_inserted_values_.clear();
+    total_entries_ = 0;
+  }
+
+  inline static StringValue GetValue(const void* val, int char_type_len) {
+    return *reinterpret_cast<const StringValue*>(val);
+  }
+ private:
+  MemPool mem_pool_;
+  StringSetWithTotalLen values_;
+  /// Temp set used to insert new values. They will be transferred to values_ in
+  /// MaterializeValues().
+  StringSetWithTotalLen newly_inserted_values_;
+  /// Type len for CHAR type.
   int type_len_;
-  /// Value set for all numeric types. Use int64_t for simplicity.
-  /// TODO(IMPALA-11141): use the exact type to save memory space.
-  std::unordered_set<int64_t> values_;
-  /// Value set for all string types.
-  std::unordered_set<std::string> str_values_;
-  uint32_t str_total_size_ = 0;
-  uint32_t entry_limit_;
 };
 }
 
diff --git a/fe/src/main/java/org/apache/impala/planner/RuntimeFilterGenerator.java b/fe/src/main/java/org/apache/impala/planner/RuntimeFilterGenerator.java
index bd38c8a6c..a9f526fac 100644
--- a/fe/src/main/java/org/apache/impala/planner/RuntimeFilterGenerator.java
+++ b/fe/src/main/java/org/apache/impala/planner/RuntimeFilterGenerator.java
@@ -681,11 +681,12 @@ public final class RuntimeFilterGenerator {
     private void calculateFilterSize(FilterSizeLimits filterSizeLimits) {
       if (type_ == TRuntimeFilterType.MIN_MAX) return;
       if (type_ == TRuntimeFilterType.IN_LIST) {
-        if (srcExpr_.getType().isStringType()) {
+        Type colType = srcExpr_.getType();
+        if (colType.isStringType()) {
           filterSizeBytes_ = IN_LIST_FILTER_STRING_SET_MAX_TOTAL_LENGTH;
         } else {
-          // We currently use int64_t(8 bytes) as entry items for all numeric types.
-          filterSizeBytes_ = filterSizeLimits.inListFilterEntryLimit * 8;
+          filterSizeBytes_ =
+              filterSizeLimits.inListFilterEntryLimit * colType.getSlotSize();
         }
         return;
       }
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-query-options.test b/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-query-options.test
index 87ae8c46a..a8d13fbb6 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-query-options.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-query-options.test
@@ -808,7 +808,7 @@ PLAN-ROOT SINK
 |  in pipelines: 03(GETNEXT)
 |
 F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3
-Per-Host Resources: mem-estimate=33.96MB mem-reservation=1.96MB thread-reservation=2 runtime-filters-memory=8.00KB
+Per-Host Resources: mem-estimate=33.96MB mem-reservation=1.96MB thread-reservation=2 runtime-filters-memory=4.00KB
 03:AGGREGATE
 |  output: count(*)
 |  mem-estimate=16.00KB mem-reservation=0B spill-buffer=2.00MB thread-reservation=0