You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by he...@apache.org on 2016/10/24 22:48:03 UTC
[1/2] incubator-impala git commit: Remove unused Bitmap code.
Repository: incubator-impala
Updated Branches:
refs/heads/master ff6b450ad -> 61fcb4897
Remove unused Bitmap code.
These methods and code paths have been made obsolete by the switch to
Bloom filters.
Change-Id: I95fcaaa40243999800c2ec2ead5b3479d66a63e7
Reviewed-on: http://gerrit.cloudera.org:8080/4801
Reviewed-by: Henry Robinson <he...@cloudera.com>
Tested-by: Internal Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/0fbb5b7e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/0fbb5b7e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/0fbb5b7e
Branch: refs/heads/master
Commit: 0fbb5b7e71e55346c8b97ec143854dba0088f124
Parents: ff6b450
Author: Jim Apple <jb...@cloudera.com>
Authored: Sat Oct 22 10:42:51 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Mon Oct 24 17:53:33 2016 +0000
----------------------------------------------------------------------
be/src/benchmarks/bitmap-benchmark.cc | 6 +--
be/src/exec/hash-table.h | 4 +-
be/src/exec/nested-loop-join-node.cc | 12 ++---
be/src/util/bitmap-test.cc | 82 +++++-------------------------
be/src/util/bitmap.cc | 8 +--
be/src/util/bitmap.h | 42 ++-------------
6 files changed, 32 insertions(+), 122 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0fbb5b7e/be/src/benchmarks/bitmap-benchmark.cc
----------------------------------------------------------------------
diff --git a/be/src/benchmarks/bitmap-benchmark.cc b/be/src/benchmarks/bitmap-benchmark.cc
index 26c51d8..5c1c9e6 100644
--- a/be/src/benchmarks/bitmap-benchmark.cc
+++ b/be/src/benchmarks/bitmap-benchmark.cc
@@ -110,7 +110,7 @@ struct TestData {
void Benchmark(int batch_size, void* data) {
TestData* d = reinterpret_cast<TestData*>(data);
for (int i = 0; i < batch_size; ++i) {
- d->bm.Set<true>(d->data[i & (d->data.size() - 1)], true);
+ d->bm.Set(d->data[i & (d->data.size() - 1)] % d->bm.num_bits(), true);
}
}
@@ -122,7 +122,7 @@ struct TestData {
TestData(int64_t size)
: bm(size), data (1ull << 20) {
for (size_t i = 0; i < size/2; ++i) {
- bm.Set<true>(MakeNonNegativeRand(), true);
+ bm.Set(MakeNonNegativeRand() % size, true);
}
for (size_t i = 0; i < data.size(); ++i) {
data[i] = MakeNonNegativeRand();
@@ -138,7 +138,7 @@ struct TestData {
void Benchmark(int batch_size, void* data) {
TestData* d = reinterpret_cast<TestData*>(data);
for (int i = 0; i < batch_size; ++i) {
- d->result += d->bm.Get<true>(d->data[i & (d->data.size() - 1)]);
+ d->result += d->bm.Get(d->data[i & (d->data.size() - 1)] % d->bm.num_bits());
}
}
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0fbb5b7e/be/src/exec/hash-table.h
----------------------------------------------------------------------
diff --git a/be/src/exec/hash-table.h b/be/src/exec/hash-table.h
index 404b294..2ebc22f 100644
--- a/be/src/exec/hash-table.h
+++ b/be/src/exec/hash-table.h
@@ -290,11 +290,11 @@ class HashTableCtx {
/// Returns true if the current row is null but nulls are not considered in the current
/// phase (build or probe).
- bool ALWAYS_INLINE IsRowNull() const { return null_bitmap_.Get<false>(CurIdx()); }
+ bool ALWAYS_INLINE IsRowNull() const { return null_bitmap_.Get(CurIdx()); }
/// Record in a bitmap that the current row is null but nulls are not considered in
/// the current phase (build or probe).
- void ALWAYS_INLINE SetRowNull() { null_bitmap_.Set<false>(CurIdx(), true); }
+ void ALWAYS_INLINE SetRowNull() { null_bitmap_.Set(CurIdx(), true); }
/// Returns the hash values of the current row.
uint32_t ALWAYS_INLINE CurExprValuesHash() const { return *cur_expr_values_hash_; }
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0fbb5b7e/be/src/exec/nested-loop-join-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/nested-loop-join-node.cc b/be/src/exec/nested-loop-join-node.cc
index 0a115c6..6c75797 100644
--- a/be/src/exec/nested-loop-join-node.cc
+++ b/be/src/exec/nested-loop-join-node.cc
@@ -410,7 +410,7 @@ Status NestedLoopJoinNode::GetNextRightSemiJoin(RuntimeState* state,
}
// Check if we already have a match for the build row.
- if (matching_build_rows_->Get<false>(current_build_row_idx_)) {
+ if (matching_build_rows_->Get(current_build_row_idx_)) {
build_row_iterator_.Next();
++current_build_row_idx_;
continue;
@@ -424,7 +424,7 @@ Status NestedLoopJoinNode::GetNextRightSemiJoin(RuntimeState* state,
continue;
}
TupleRow* output_row = output_batch->GetRow(output_batch->AddRow());
- matching_build_rows_->Set<false>(current_build_row_idx_, true);
+ matching_build_rows_->Set(current_build_row_idx_, true);
output_batch->CopyRow(build_row_iterator_.GetRow(), output_row);
build_row_iterator_.Next();
++current_build_row_idx_;
@@ -461,7 +461,7 @@ Status NestedLoopJoinNode::GetNextRightAntiJoin(RuntimeState* state,
RETURN_IF_ERROR(QueryMaintenance(state));
}
- if (matching_build_rows_->Get<false>(current_build_row_idx_)) {
+ if (matching_build_rows_->Get(current_build_row_idx_)) {
build_row_iterator_.Next();
++current_build_row_idx_;
continue;
@@ -469,7 +469,7 @@ Status NestedLoopJoinNode::GetNextRightAntiJoin(RuntimeState* state,
CreateOutputRow(semi_join_staging_row_, current_probe_row_,
build_row_iterator_.GetRow());
if (EvalConjuncts(join_conjunct_ctxs, num_join_ctxs, semi_join_staging_row_)) {
- matching_build_rows_->Set<false>(current_build_row_idx_, true);
+ matching_build_rows_->Set(current_build_row_idx_, true);
}
build_row_iterator_.Next();
++current_build_row_idx_;
@@ -548,7 +548,7 @@ Status NestedLoopJoinNode::ProcessUnmatchedBuildRows(
RETURN_IF_ERROR(QueryMaintenance(state));
}
- if (matching_build_rows_->Get<false>(current_build_row_idx_)) {
+ if (matching_build_rows_->Get(current_build_row_idx_)) {
build_row_iterator_.Next();
++current_build_row_idx_;
continue;
@@ -612,7 +612,7 @@ Status NestedLoopJoinNode::FindBuildMatches(
if (!EvalConjuncts(join_conjunct_ctxs, num_join_ctxs, output_row)) continue;
matched_probe_ = true;
if (matching_build_rows_ != NULL) {
- matching_build_rows_->Set<false>(current_build_row_idx_ - 1, true);
+ matching_build_rows_->Set(current_build_row_idx_ - 1, true);
}
if (!EvalConjuncts(conjunct_ctxs, num_ctxs, output_row)) continue;
VLOG_ROW << "match row: " << PrintRow(output_row, row_desc());
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0fbb5b7e/be/src/util/bitmap-test.cc
----------------------------------------------------------------------
diff --git a/be/src/util/bitmap-test.cc b/be/src/util/bitmap-test.cc
index 238b5fc..c0a3899 100644
--- a/be/src/util/bitmap-test.cc
+++ b/be/src/util/bitmap-test.cc
@@ -30,14 +30,14 @@ namespace impala {
void CreateRandomBitmap(Bitmap* bitmap) {
for (int64_t i = 0; i < bitmap->num_bits(); ++i) {
- bitmap->Set<false>(i, rand() % 2 == 0);
+ bitmap->Set(i, rand() % 2 == 0);
}
}
// Returns true if all the bits in the bitmap are equal to 'value'.
bool CheckAll(const Bitmap& bitmap, const bool value) {
for (int64_t i = 0; i < bitmap.num_bits(); ++i) {
- if (bitmap.Get<false>(i) != value) return false;
+ if (bitmap.Get(i) != value) return false;
}
return true;
}
@@ -70,24 +70,6 @@ TEST(Bitmap, SetAllTest) {
EXPECT_TRUE(CheckAll(bm, false));
}
-TEST(Bitmap, AndTest) {
- Bitmap bm(1024);
- CreateRandomBitmap(&bm);
- Bitmap bm_zeros(1024);
- bm_zeros.SetAllBits(false);
- bm.And(&bm_zeros);
- EXPECT_TRUE(CheckAll(bm, false));
-}
-
-TEST(Bitmap, OrTest) {
- Bitmap bm(1024);
- CreateRandomBitmap(&bm);
- Bitmap bm_ones(1024);
- bm_ones.SetAllBits(true);
- bm.Or(&bm_ones);
- EXPECT_TRUE(CheckAll(bm, true));
-}
-
// Regression test for IMPALA-2155.
TEST(Bitmap, SetGetTest) {
Bitmap bm(1024);
@@ -96,36 +78,16 @@ TEST(Bitmap, SetGetTest) {
// to 0 and 1.
for (int64_t bit_idx = 0; bit_idx < 63; ++bit_idx) {
for (int64_t i = 0; i < 4; ++i) {
- bm.Set<false>((1 << (6 + i)) + bit_idx, (i + bit_idx) % 2 == 0);
+ bm.Set((1 << (6 + i)) + bit_idx, (i + bit_idx) % 2 == 0);
}
}
for (int64_t bit_idx = 0; bit_idx < 63; ++bit_idx) {
for (int64_t i = 0; i < 4; ++i) {
- EXPECT_EQ(bm.Get<false>((1 << (6 + i)) + bit_idx), (i + bit_idx) % 2 == 0);
+ EXPECT_EQ(bm.Get((1 << (6 + i)) + bit_idx), (i + bit_idx) % 2 == 0);
}
}
}
-TEST(Bitmap, SetGetModTest) {
- Bitmap bm(256);
- bm.SetAllBits(false);
- for (int64_t bit_idx = 0; bit_idx < 1024; ++bit_idx) {
- bm.Set<true>(bit_idx, true);
- EXPECT_TRUE(bm.Get<true>(bit_idx));
- bm.Set<true>(bit_idx, false);
- EXPECT_FALSE(bm.Get<true>(bit_idx));
- }
-
- bm.SetAllBits(false);
- EXPECT_TRUE(CheckAll(bm, false));
- for (int64_t bit_idx = 0; bit_idx < 1024; ++bit_idx) {
- bm.Set<true>(bit_idx, bit_idx % 2 == 0);
- }
- for (int64_t bit_idx = 0; bit_idx < 1024; ++bit_idx) {
- EXPECT_EQ(bm.Get<true>(bit_idx), bit_idx % 2 == 0);
- }
-}
-
/// Regression test for IMPALA-2307.
TEST(Bitmap, OverflowTest) {
Bitmap bm(64);
@@ -133,36 +95,20 @@ TEST(Bitmap, OverflowTest) {
int64_t bit_idx = 45;
int64_t ovr_idx = 13;
- bm.Set<false>(bit_idx, true);
- EXPECT_FALSE(bm.Get<false>(ovr_idx));
- EXPECT_TRUE(bm.Get<false>(bit_idx));
-
- bm.SetAllBits(false);
- bm.Set<false>(ovr_idx, true);
- EXPECT_FALSE(bm.Get<false>(bit_idx));
- EXPECT_TRUE(bm.Get<false>(ovr_idx));
-
- bm.SetAllBits(false);
- bm.Set<false>(ovr_idx, true);
- bm.Set<false>(bit_idx, false);
- EXPECT_TRUE(bm.Get<false>(ovr_idx));
- EXPECT_FALSE(bm.Get<false>(bit_idx));
-
- bm.SetAllBits(false);
- bm.Set<true>(bit_idx, true);
- EXPECT_FALSE(bm.Get<true>(ovr_idx));
- EXPECT_TRUE(bm.Get<true>(bit_idx));
+ bm.Set(bit_idx, true);
+ EXPECT_FALSE(bm.Get(ovr_idx));
+ EXPECT_TRUE(bm.Get(bit_idx));
bm.SetAllBits(false);
- bm.Set<true>(ovr_idx, true);
- EXPECT_FALSE(bm.Get<true>(bit_idx));
- EXPECT_TRUE(bm.Get<true>(ovr_idx));
+ bm.Set(ovr_idx, true);
+ EXPECT_FALSE(bm.Get(bit_idx));
+ EXPECT_TRUE(bm.Get(ovr_idx));
bm.SetAllBits(false);
- bm.Set<true>(ovr_idx, true);
- bm.Set<true>(bit_idx, false);
- EXPECT_TRUE(bm.Get<true>(ovr_idx));
- EXPECT_FALSE(bm.Get<true>(bit_idx));
+ bm.Set(ovr_idx, true);
+ bm.Set(bit_idx, false);
+ EXPECT_TRUE(bm.Get(ovr_idx));
+ EXPECT_FALSE(bm.Get(bit_idx));
}
/// Test that bitmap memory usage calculation is correct.
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0fbb5b7e/be/src/util/bitmap.cc
----------------------------------------------------------------------
diff --git a/be/src/util/bitmap.cc b/be/src/util/bitmap.cc
index b1f54bc..504c919 100644
--- a/be/src/util/bitmap.cc
+++ b/be/src/util/bitmap.cc
@@ -23,21 +23,21 @@
using namespace impala;
-string Bitmap::DebugString(bool print_bits) {
+string Bitmap::DebugString(bool print_bits) const {
int64_t words = BitUtil::RoundUp(num_bits_, 64) / 64;
stringstream ss;
ss << "Size (" << num_bits_ << ") words (" << words << ") ";
if (print_bits) {
for (int i = 0; i < num_bits(); ++i) {
- if (Get<false>(i)) {
+ if (Get(i)) {
ss << "1";
} else {
ss << "0";
}
}
} else {
- for (vector<uint64_t>::iterator it = buffer_.begin(); it != buffer_.end(); ++it) {
- ss << *it << ".";
+ for (auto v : buffer_) {
+ ss << v << ".";
}
}
ss << endl;
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0fbb5b7e/be/src/util/bitmap.h
----------------------------------------------------------------------
diff --git a/be/src/util/bitmap.h b/be/src/util/bitmap.h
index 1ff8050..5f02f60 100644
--- a/be/src/util/bitmap.h
+++ b/be/src/util/bitmap.h
@@ -37,10 +37,6 @@ class Bitmap {
num_bits_ = num_bits;
}
- Bitmap(const uint64_t* from_buf, int64_t num_bits) {
- SetFromBuffer(from_buf, num_bits);
- }
-
/// Resize bitmap and set all bits to zero.
void Reset(int64_t num_bits) {
DCHECK_GE(num_bits, 0);
@@ -49,30 +45,17 @@ class Bitmap {
SetAllBits(false);
}
- void SetFromBuffer(const uint64_t* from_buf, int64_t num_bits) {
- buffer_.resize(BitUtil::RoundUpNumi64(num_bits));
- for (int i = 0; i < buffer_.size(); ++i) {
- buffer_[i] = from_buf[i];
- }
- num_bits_ = num_bits;
- }
-
/// Compute memory usage of a bitmap, not including the Bitmap object itself.
static int64_t MemUsage(int64_t num_bits) {
DCHECK_GE(num_bits, 0);
return BitUtil::RoundUpNumi64(num_bits) * sizeof(int64_t);
}
- static int64_t DefaultHashSeed() { return 1234; }
-
/// Compute memory usage of this bitmap, not including the Bitmap object itself.
int64_t MemUsage() const { return MemUsage(num_bits_); }
- /// Sets the bit at 'bit_index' to v. If mod is true, this
- /// function will first mod the bit_index by the bitmap size.
- template<bool mod>
+ /// Sets the bit at 'bit_index' to v.
void Set(int64_t bit_index, bool v) {
- if (mod) bit_index &= (num_bits() - 1);
int64_t word_index = bit_index >> NUM_OFFSET_BITS;
bit_index &= BIT_INDEX_MASK;
DCHECK_LT(word_index, buffer_.size());
@@ -83,33 +66,14 @@ class Bitmap {
}
}
- /// Returns true if the bit at 'bit_index' is set. If mod is true, this
- /// function will first mod the bit_index by the bitmap size.
- template<bool mod>
+ /// Returns true if the bit at 'bit_index' is set.
bool Get(int64_t bit_index) const {
- if (mod) bit_index &= (num_bits() - 1);
int64_t word_index = bit_index >> NUM_OFFSET_BITS;
bit_index &= BIT_INDEX_MASK;
DCHECK_LT(word_index, buffer_.size());
return (buffer_[word_index] & (1LL << bit_index)) != 0;
}
- /// Bitwise ANDs the src bitmap into this one.
- void And(const Bitmap* src) {
- DCHECK_EQ(num_bits(), src->num_bits());
- for (int i = 0; i < buffer_.size(); ++i) {
- buffer_[i] &= src->buffer_[i];
- }
- }
-
- /// Bitwise ORs the src bitmap into this one.
- void Or(const Bitmap* src) {
- DCHECK_EQ(num_bits(), src->num_bits());
- for (int i = 0; i < buffer_.size(); ++i) {
- buffer_[i] |= src->buffer_[i];
- }
- }
-
void SetAllBits(bool b) {
memset(&buffer_[0], 255 * b, buffer_.size() * sizeof(uint64_t));
}
@@ -117,7 +81,7 @@ class Bitmap {
int64_t num_bits() const { return num_bits_; }
/// If 'print_bits' prints 0/1 per bit, otherwise it prints the int64_t value.
- std::string DebugString(bool print_bits);
+ std::string DebugString(bool print_bits) const;
private:
std::vector<uint64_t> buffer_;
[2/2] incubator-impala git commit: IMPALA-4300: Speed up
BloomFilter::Or with SIMD
Posted by he...@apache.org.
IMPALA-4300: Speed up BloomFilter::Or with SIMD
The previous code was not written in a way that GCC could
auto-vectorize it. Manually vectorizing speeds up BloomFilter::Or by
up to 184x.
Change-Id: I840799d9cfb81285c796e2abfe2029bb869b0f67
Reviewed-on: http://gerrit.cloudera.org:8080/4813
Reviewed-by: Jim Apple <jb...@cloudera.com>
Tested-by: Internal Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/61fcb489
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/61fcb489
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/61fcb489
Branch: refs/heads/master
Commit: 61fcb489745f3f0b3f1abbf9fbf666a29a6363de
Parents: 0fbb5b7
Author: Jim Apple <jb...@cloudera.com>
Authored: Fri Oct 21 07:46:42 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Mon Oct 24 18:07:25 2016 +0000
----------------------------------------------------------------------
be/src/benchmarks/bloom-filter-benchmark.cc | 198 +++++++++++++++--------
be/src/testutil/mem-util.h | 2 +
be/src/util/bloom-filter.cc | 49 +++++-
be/src/util/cpu-info.cc | 1 +
be/src/util/cpu-info.h | 3 +-
5 files changed, 178 insertions(+), 75 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/61fcb489/be/src/benchmarks/bloom-filter-benchmark.cc
----------------------------------------------------------------------
diff --git a/be/src/benchmarks/bloom-filter-benchmark.cc b/be/src/benchmarks/bloom-filter-benchmark.cc
index 1aa0619..d9019c8 100644
--- a/be/src/benchmarks/bloom-filter-benchmark.cc
+++ b/be/src/benchmarks/bloom-filter-benchmark.cc
@@ -30,13 +30,14 @@
using namespace std;
using namespace impala;
-// Tests Bloom filter performance on four tasks:
+// Tests Bloom filter performance on:
//
// 1. Construct/destruct pairs
// 2. Inserts
// 3. Lookups when the item is present
// 4. Lookups when the item is absent (this is theoretically faster than when the item is
// present in some Bloom filter variants)
+// 5. Unions
//
// As in bloom-filter.h, ndv refers to the number of unique items inserted into a filter
// and fpp is the probability of false positives.
@@ -46,89 +47,116 @@ using namespace impala;
// initialize: Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// ndv 10k fpp 10.0% 7.05e+03 7.27e+03 7.34e+03 1X 1X 1X
-// ndv 10k fpp 1.0% 3.79e+03 3.93e+03 3.96e+03 0.538X 0.541X 0.54X
-// ndv 10k fpp 0.1% 1.39e+03 1.42e+03 1.44e+03 0.198X 0.196X 0.196X
-// ndv 1000k fpp 10.0% 4.62 4.78 4.81 0.000655X 0.000658X 0.000655X
-// ndv 1000k fpp 1.0% 2.49 2.55 2.6 0.000354X 0.000351X 0.000354X
-// ndv 1000k fpp 0.1% 2.45 2.55 2.6 0.000347X 0.000351X 0.000354X
-// ndv 100000k fpp 10.0% 0.035 0.0358 0.037 4.96e-06X 4.93e-06X 5.04e-06X
-// ndv 100000k fpp 1.0% 0.0347 0.0361 0.0372 4.93e-06X 4.96e-06X 5.06e-06X
-// ndv 100000k fpp 0.1% 0.0176 0.0181 0.0186 2.5e-06X 2.49e-06X 2.53e-06X
+// ndv 10k fpp 10.0% 5.89e+03 5.98e+03 6.03e+03 1X 1X 1X
+// ndv 10k fpp 1.0% 3.22e+03 3.25e+03 3.27e+03 0.546X 0.543X 0.542X
+// ndv 10k fpp 0.1% 1.13e+03 1.17e+03 1.18e+03 0.191X 0.195X 0.195X
+// ndv 1000k fpp 10.0% 3.85 3.93 3.93 0.000654X 0.000657X 0.000652X
+// ndv 1000k fpp 1.0% 2.04 2.12 2.12 0.000346X 0.000354X 0.000351X
+// ndv 1000k fpp 0.1% 2.04 2.12 2.12 0.000346X 0.000354X 0.000351X
+// ndv 100000k fpp 10.0% 0.0281 0.029 0.0294 4.77e-06X 4.85e-06X 4.87e-06X
+// ndv 100000k fpp 1.0% 0.0284 0.029 0.0298 4.82e-06X 4.85e-06X 4.93e-06X
+// ndv 100000k fpp 0.1% 0.0144 0.0147 0.0149 2.44e-06X 2.47e-06X 2.47e-06X
//
// With AVX2:
//
// insert: Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// ndv 10k fpp 10.0% 2.03e+05 2.05e+05 2.08e+05 1X 1X 1X
-// ndv 10k fpp 1.0% 2.03e+05 2.06e+05 2.08e+05 0.997X 1X 1X
-// ndv 10k fpp 0.1% 2.03e+05 2.05e+05 2.07e+05 0.997X 0.998X 0.997X
-// ndv 1000k fpp 10.0% 1.82e+05 1.87e+05 1.89e+05 0.896X 0.91X 0.907X
-// ndv 1000k fpp 1.0% 1.49e+05 1.53e+05 1.56e+05 0.731X 0.747X 0.75X
-// ndv 1000k fpp 0.1% 1.79e+05 1.82e+05 1.83e+05 0.881X 0.886X 0.882X
-// ndv 100000k fpp 10.0% 4.08e+04 4.49e+04 5.44e+04 0.201X 0.219X 0.262X
-// ndv 100000k fpp 1.0% 3.94e+04 4.4e+04 5.04e+04 0.194X 0.214X 0.242X
-// ndv 100000k fpp 0.1% 4.08e+04 4.48e+04 5.68e+04 0.201X 0.218X 0.273X
+// ndv 10k fpp 10.0% 1.17e+05 1.23e+05 1.25e+05 1X 1X 1X
+// ndv 10k fpp 1.0% 1.17e+05 1.24e+05 1.25e+05 1X 1X 1X
+// ndv 10k fpp 0.1% 1.2e+05 1.23e+05 1.24e+05 1.02X 0.996X 0.991X
+// ndv 1000k fpp 10.0% 1.1e+05 1.18e+05 1.2e+05 0.944X 0.959X 0.96X
+// ndv 1000k fpp 1.0% 1.11e+05 1.16e+05 1.17e+05 0.954X 0.938X 0.934X
+// ndv 1000k fpp 0.1% 9.73e+04 1.16e+05 1.17e+05 0.834X 0.937X 0.936X
+// ndv 100000k fpp 10.0% 2.96e+04 4.19e+04 5.44e+04 0.254X 0.34X 0.436X
+// ndv 100000k fpp 1.0% 2.92e+04 3.81e+04 4.89e+04 0.25X 0.308X 0.391X
+// ndv 100000k fpp 0.1% 2.44e+04 3.28e+04 4.31e+04 0.209X 0.266X 0.345X
//
// find: Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// present ndv 10k fpp 10.0% 2.48e+05 2.51e+05 2.53e+05 1X 1X 1X
-// absent ndv 10k fpp 10.0% 2.47e+05 2.52e+05 2.55e+05 0.995X 1X 1.01X
-// present ndv 10k fpp 1.0% 2.49e+05 2.52e+05 2.55e+05 1X 1.01X 1.01X
-// absent ndv 10k fpp 1.0% 2.47e+05 2.53e+05 2.56e+05 0.997X 1.01X 1.01X
-// present ndv 10k fpp 0.1% 2.49e+05 2.53e+05 2.54e+05 1X 1.01X 1.01X
-// absent ndv 10k fpp 0.1% 2.47e+05 2.53e+05 2.56e+05 0.997X 1.01X 1.01X
-// present ndv 1000k fpp 10.0% 1.98e+05 2.04e+05 2.06e+05 0.8X 0.814X 0.812X
-// absent ndv 1000k fpp 10.0% 2.01e+05 2.07e+05 2.1e+05 0.808X 0.826X 0.829X
-// present ndv 1000k fpp 1.0% 1.83e+05 1.95e+05 2.02e+05 0.737X 0.78X 0.798X
-// absent ndv 1000k fpp 1.0% 2.01e+05 2.04e+05 2.08e+05 0.808X 0.815X 0.82X
-// present ndv 1000k fpp 0.1% 1.96e+05 2.01e+05 2.03e+05 0.788X 0.8X 0.801X
-// absent ndv 1000k fpp 0.1% 2e+05 2.05e+05 2.07e+05 0.808X 0.817X 0.818X
-// present ndv 100000k fpp 10.0% 4.6e+04 5.09e+04 6.08e+04 0.185X 0.203X 0.24X
-// absent ndv 100000k fpp 10.0% 4.11e+04 4.36e+04 4.53e+04 0.166X 0.174X 0.179X
-// present ndv 100000k fpp 1.0% 4.55e+04 4.96e+04 6.19e+04 0.184X 0.198X 0.245X
-// absent ndv 100000k fpp 1.0% 3.83e+04 4.15e+04 4.69e+04 0.154X 0.166X 0.186X
-// present ndv 100000k fpp 0.1% 4.73e+04 5.43e+04 6.58e+04 0.191X 0.217X 0.26X
-// absent ndv 100000k fpp 0.1% 3.77e+04 4.07e+04 4.37e+04 0.152X 0.163X 0.173X
+// present ndv 10k fpp 10.0% 1.16e+05 1.17e+05 1.18e+05 1X 1X 1X
+// absent ndv 10k fpp 10.0% 1.16e+05 1.17e+05 1.18e+05 1X 1X 0.998X
+// present ndv 10k fpp 1.0% 1.15e+05 1.17e+05 1.18e+05 0.988X 1X 0.999X
+// absent ndv 10k fpp 1.0% 1.14e+05 1.17e+05 1.19e+05 0.978X 1X 1X
+// present ndv 10k fpp 0.1% 1.09e+05 1.17e+05 1.18e+05 0.939X 1X 1X
+// absent ndv 10k fpp 0.1% 1.13e+05 1.17e+05 1.18e+05 0.97X 1X 1X
+// present ndv 1000k fpp 10.0% 1.09e+05 1.13e+05 1.15e+05 0.942X 0.968X 0.97X
+// absent ndv 1000k fpp 10.0% 1.09e+05 1.15e+05 1.16e+05 0.937X 0.982X 0.982X
+// present ndv 1000k fpp 1.0% 9.44e+04 1.12e+05 1.13e+05 0.814X 0.952X 0.951X
+// absent ndv 1000k fpp 1.0% 1.02e+05 1.14e+05 1.15e+05 0.877X 0.973X 0.972X
+// present ndv 1000k fpp 0.1% 1.01e+05 1.11e+05 1.12e+05 0.868X 0.951X 0.949X
+// absent ndv 1000k fpp 0.1% 1.08e+05 1.14e+05 1.15e+05 0.927X 0.975X 0.975X
+// present ndv 100000k fpp 10.0% 3.18e+04 3.94e+04 5.18e+04 0.274X 0.336X 0.437X
+// absent ndv 100000k fpp 10.0% 2.74e+04 3.07e+04 3.49e+04 0.236X 0.262X 0.294X
+// present ndv 100000k fpp 1.0% 3.07e+04 4.29e+04 5.51e+04 0.265X 0.366X 0.465X
+// absent ndv 100000k fpp 1.0% 2.67e+04 2.9e+04 3.25e+04 0.23X 0.248X 0.274X
+// present ndv 100000k fpp 0.1% 2.78e+04 3.88e+04 4.9e+04 0.24X 0.331X 0.413X
+// absent ndv 100000k fpp 0.1% 2.44e+04 2.84e+04 3.02e+04 0.211X 0.242X 0.255X
//
-// Without AVX2:
+// union: Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
+// (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+// ndv 10k fpp 10.0% 1.81e+04 1.84e+04 1.86e+04 1X 1X 1X
+// ndv 10k fpp 1.0% 8.25e+03 8.39e+03 8.47e+03 0.455X 0.455X 0.455X
+// ndv 10k fpp 0.1% 4.02e+03 4.31e+03 4.35e+03 0.222X 0.234X 0.234X
+// ndv 1000k fpp 10.0% 105 111 112 0.00577X 0.00603X 0.00602X
+// ndv 1000k fpp 1.0% 45.9 46.4 46.9 0.00253X 0.00252X 0.00252X
+// ndv 1000k fpp 0.1% 46.2 46.6 46.9 0.00255X 0.00253X 0.00252X
+// ndv 100000k fpp 10.0% 0.2 0.2 0.2 1.1e-05X 1.08e-05X 1.07e-05X
+// ndv 100000k fpp 1.0% 0.2 0.2 0.2 1.1e-05X 1.08e-05X 1.07e-05X
+// ndv 100000k fpp 0.1% 0.133 0.143 0.145 7.35e-06X 7.75e-06X 7.79e-06X
+//
+//
+// Without AVX or AVX2:
//
// insert: Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// ndv 10k fpp 10.0% 1.25e+05 1.27e+05 1.28e+05 1X 1X 1X
-// ndv 10k fpp 1.0% 1.27e+05 1.29e+05 1.3e+05 1.01X 1.02X 1.02X
-// ndv 10k fpp 0.1% 1.26e+05 1.28e+05 1.3e+05 1X 1.01X 1.01X
-// ndv 1000k fpp 10.0% 1.23e+05 1.25e+05 1.26e+05 0.977X 0.981X 0.985X
-// ndv 1000k fpp 1.0% 1.16e+05 1.22e+05 1.23e+05 0.925X 0.958X 0.958X
-// ndv 1000k fpp 0.1% 1.16e+05 1.22e+05 1.23e+05 0.928X 0.958X 0.957X
-// ndv 100000k fpp 10.0% 3.77e+04 4.06e+04 5.62e+04 0.301X 0.319X 0.438X
-// ndv 100000k fpp 1.0% 3.71e+04 4.06e+04 5.45e+04 0.296X 0.32X 0.425X
-// ndv 100000k fpp 0.1% 3.37e+04 3.68e+04 5.15e+04 0.269X 0.29X 0.401X
+// ndv 10k fpp 10.0% 9.27e+04 9.33e+04 9.4e+04 1X 1X 1X
+// ndv 10k fpp 1.0% 9.43e+04 9.49e+04 9.61e+04 1.02X 1.02X 1.02X
+// ndv 10k fpp 0.1% 9.36e+04 9.5e+04 9.58e+04 1.01X 1.02X 1.02X
+// ndv 1000k fpp 10.0% 8.4e+04 9.49e+04 9.61e+04 0.906X 1.02X 1.02X
+// ndv 1000k fpp 1.0% 7.64e+04 9.34e+04 9.45e+04 0.824X 1X 1.01X
+// ndv 1000k fpp 0.1% 8.24e+04 9.34e+04 9.44e+04 0.888X 1X 1X
+// ndv 100000k fpp 10.0% 3.22e+04 4e+04 5.03e+04 0.347X 0.429X 0.535X
+// ndv 100000k fpp 1.0% 2.77e+04 3.6e+04 4.8e+04 0.298X 0.386X 0.51X
+// ndv 100000k fpp 0.1% 2.54e+04 2.93e+04 4.32e+04 0.274X 0.314X 0.46X
//
// find: Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
// (relative) (relative) (relative)
// ---------------------------------------------------------------------------------------------------------
-// present ndv 10k fpp 10.0% 1.6e+05 1.64e+05 1.66e+05 1X 1X 1X
-// absent ndv 10k fpp 10.0% 1.11e+05 1.14e+05 1.15e+05 0.696X 0.697X 0.695X
-// present ndv 10k fpp 1.0% 1.57e+05 1.63e+05 1.64e+05 0.982X 0.994X 0.989X
-// absent ndv 10k fpp 1.0% 1.3e+05 1.33e+05 1.35e+05 0.814X 0.813X 0.812X
-// present ndv 10k fpp 0.1% 1.55e+05 1.58e+05 1.61e+05 0.967X 0.968X 0.969X
-// absent ndv 10k fpp 0.1% 2.26e+05 2.29e+05 2.31e+05 1.41X 1.4X 1.4X
-// present ndv 1000k fpp 10.0% 1.21e+05 1.23e+05 1.25e+05 0.758X 0.753X 0.756X
-// absent ndv 1000k fpp 10.0% 7.6e+04 7.72e+04 7.81e+04 0.475X 0.472X 0.471X
-// present ndv 1000k fpp 1.0% 1.23e+05 1.27e+05 1.28e+05 0.771X 0.773X 0.77X
-// absent ndv 1000k fpp 1.0% 1.19e+05 1.21e+05 1.22e+05 0.744X 0.739X 0.738X
-// present ndv 1000k fpp 0.1% 1.17e+05 1.18e+05 1.2e+05 0.731X 0.724X 0.723X
-// absent ndv 1000k fpp 0.1% 1.13e+05 1.16e+05 1.17e+05 0.707X 0.706X 0.705X
-// present ndv 100000k fpp 10.0% 3.42e+04 3.63e+04 3.9e+04 0.214X 0.222X 0.235X
-// absent ndv 100000k fpp 10.0% 3.6e+04 3.77e+04 3.82e+04 0.225X 0.23X 0.23X
-// present ndv 100000k fpp 1.0% 3.18e+04 3.42e+04 3.57e+04 0.199X 0.209X 0.216X
-// absent ndv 100000k fpp 1.0% 3.63e+04 3.73e+04 3.79e+04 0.227X 0.228X 0.229X
-// present ndv 100000k fpp 0.1% 2.89e+04 3.2e+04 3.33e+04 0.18X 0.196X 0.201X
-// absent ndv 100000k fpp 0.1% 4.56e+04 4.78e+04 4.86e+04 0.285X 0.292X 0.293X
+// present ndv 10k fpp 10.0% 1.3e+05 1.31e+05 1.33e+05 1X 1X 1X
+// absent ndv 10k fpp 10.0% 8.74e+04 8.83e+04 8.92e+04 0.674X 0.673X 0.671X
+// present ndv 10k fpp 1.0% 1.25e+05 1.3e+05 1.31e+05 0.96X 0.991X 0.988X
+// absent ndv 10k fpp 1.0% 1.04e+05 1.06e+05 1.07e+05 0.805X 0.809X 0.807X
+// present ndv 10k fpp 0.1% 1.28e+05 1.3e+05 1.31e+05 0.986X 0.988X 0.984X
+// absent ndv 10k fpp 0.1% 1.69e+05 1.72e+05 1.74e+05 1.3X 1.31X 1.31X
+// present ndv 1000k fpp 10.0% 9.33e+04 9.6e+04 9.69e+04 0.719X 0.732X 0.729X
+// absent ndv 1000k fpp 10.0% 5.99e+04 6.07e+04 6.12e+04 0.462X 0.462X 0.461X
+// present ndv 1000k fpp 1.0% 9.48e+04 1.01e+05 1.02e+05 0.731X 0.768X 0.768X
+// absent ndv 1000k fpp 1.0% 9.49e+04 9.67e+04 9.74e+04 0.731X 0.737X 0.734X
+// present ndv 1000k fpp 0.1% 8.46e+04 9.3e+04 9.41e+04 0.652X 0.709X 0.709X
+// absent ndv 1000k fpp 0.1% 9.05e+04 9.18e+04 9.28e+04 0.697X 0.7X 0.699X
+// present ndv 100000k fpp 10.0% 2.6e+04 2.88e+04 3.11e+04 0.201X 0.22X 0.235X
+// absent ndv 100000k fpp 10.0% 2.88e+04 2.99e+04 3.08e+04 0.222X 0.228X 0.232X
+// present ndv 100000k fpp 1.0% 2.34e+04 2.76e+04 2.91e+04 0.18X 0.21X 0.219X
+// absent ndv 100000k fpp 1.0% 2.86e+04 2.97e+04 3.03e+04 0.22X 0.227X 0.228X
+// present ndv 100000k fpp 0.1% 2.34e+04 2.65e+04 2.81e+04 0.18X 0.202X 0.211X
+// absent ndv 100000k fpp 0.1% 3.73e+04 3.85e+04 3.91e+04 0.287X 0.293X 0.295X
+//
+// union: Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
+// (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+// ndv 10k fpp 10.0% 3.06e+03 3.1e+03 3.12e+03 1X 1X 1X
+// ndv 10k fpp 1.0% 1.51e+03 1.55e+03 1.57e+03 0.493X 0.502X 0.503X
+// ndv 10k fpp 0.1% 748 775 782 0.244X 0.25X 0.251X
+// ndv 1000k fpp 10.0% 19.6 20 20.2 0.0064X 0.00646X 0.00647X
+// ndv 1000k fpp 1.0% 9.41 10 10.1 0.00307X 0.00324X 0.00323X
+// ndv 1000k fpp 0.1% 9.9 10 10.1 0.00323X 0.00324X 0.00323X
+// ndv 100000k fpp 10.0% 0.0671 0.0714 0.0725 2.19e-05X 2.3e-05X 2.32e-05X
+// ndv 100000k fpp 1.0% 0.0676 0.0709 0.0719 2.21e-05X 2.29e-05X 2.31e-05X
+// ndv 100000k fpp 0.1% 0.0338 0.035 0.0356 1.1e-05X 1.13e-05X 1.14e-05X
// Make a random uint32_t, avoiding the absent high bit and the low-entropy low bits
// produced by rand().
@@ -221,6 +249,27 @@ void Absent(int batch_size, void* data) {
} // namespace find
+// Benchmark or
+namespace either {
+
+struct TestData {
+ explicit TestData(int log_heap_size) {
+ BloomFilter bf(log_heap_size);
+ BloomFilter::ToThrift(&bf, &tbf);
+ }
+
+ TBloomFilter tbf;
+};
+
+void Benchmark(int batch_size, void* data) {
+ TestData* d = reinterpret_cast<TestData*>(data);
+ for (int i = 0; i < batch_size; ++i) {
+ BloomFilter::Or(d->tbf, &d->tbf);
+ }
+}
+
+} // namespace either
+
void RunBenchmarks() {
char name[120];
@@ -254,6 +303,20 @@ void RunBenchmarks() {
}
cout << suite.Measure() << endl;
}
+
+ {
+ Benchmark suite("union");
+ vector<unique_ptr<either::TestData> > testdata;
+ for (int ndv = 10000; ndv <= 100 * 1000 * 1000; ndv *= 100) {
+ for (double fpp = 0.1; fpp >= 0.001; fpp /= 10) {
+ testdata.emplace_back(
+ new either::TestData(BloomFilter::MinLogSpace(ndv, fpp)));
+ snprintf(name, sizeof(name), "ndv %7dk fpp %6.1f%%", ndv/1000, fpp*100);
+ suite.AddBenchmark(name, either::Benchmark, testdata.back().get());
+ }
+ }
+ cout << suite.Measure() << endl;
+ }
}
int main(int argc, char **argv) {
@@ -277,7 +340,8 @@ int main(int argc, char **argv) {
cout << "With AVX2:" << endl << endl;
RunBenchmarks();
- cout << endl << "Without AVX2:" << endl << endl;
- CpuInfo::TempDisable t(CpuInfo::AVX2);
+ cout << endl << "Without AVX or AVX2:" << endl << endl;
+ CpuInfo::TempDisable t1(CpuInfo::AVX);
+ CpuInfo::TempDisable t2(CpuInfo::AVX2);
RunBenchmarks();
}
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/61fcb489/be/src/testutil/mem-util.h
----------------------------------------------------------------------
diff --git a/be/src/testutil/mem-util.h b/be/src/testutil/mem-util.h
index 78b7b48..b4ce9ea 100644
--- a/be/src/testutil/mem-util.h
+++ b/be/src/testutil/mem-util.h
@@ -21,6 +21,8 @@
#include <cstdint>
#include <cstdlib>
+#include <glog/logging.h>
+
#include "gutil/macros.h"
namespace impala {
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/61fcb489/be/src/util/bloom-filter.cc
----------------------------------------------------------------------
diff --git a/be/src/util/bloom-filter.cc b/be/src/util/bloom-filter.cc
index 6fd53f5..2aadc05 100644
--- a/be/src/util/bloom-filter.cc
+++ b/be/src/util/bloom-filter.cc
@@ -154,6 +154,21 @@ bool BloomFilter::BucketFind(
return true;
}
+namespace {
+// Computes out[i] |= in[i] for the arrays 'in' and 'out' of length 'n' using AVX
+// instructions. 'n' must be a multiple of 32.
+void __attribute__((target("avx"))) OrEqualArrayAvx(size_t n, const char* in, char* out) {
+ constexpr size_t REGISTER_SIZE = sizeof(__m256d);
+ DCHECK_EQ(n % REGISTER_SIZE, 0) << "Invalid Bloom Filter directory size";
+ const double* simd_in = reinterpret_cast<const double*>(in);
+ double* simd_out = reinterpret_cast<double*>(out);
+ const size_t simd_size = n / REGISTER_SIZE;
+ for (size_t i = 0; i < simd_size; i += REGISTER_SIZE / sizeof(simd_in[0])) {
+ _mm256_storeu_pd(simd_out + i,
+ _mm256_or_pd(_mm256_loadu_pd(simd_out + i), _mm256_loadu_pd(simd_in + i)));
+ }
+}
+} //namespace
void BloomFilter::Or(const TBloomFilter& in, TBloomFilter* out) {
DCHECK(out != NULL);
@@ -163,8 +178,29 @@ void BloomFilter::Or(const TBloomFilter& in, TBloomFilter* out) {
out->directory.resize(0);
return;
}
-
- for (int i = 0; i < in.directory.size(); ++i) out->directory[i] |= in.directory[i];
+ // The trivial loop out[i] |= in[i] should auto-vectorize with gcc at -O3, but it is not
+ // written in a way that is very friendly to auto-vectorization. Instead, we manually
+ // vectorize, increasing the speed by up to 184x.
+ //
+ // TODO: Tune gcc flags to auto-vectorize the trivial loop instead of hand-vectorizing
+ // it. This might not be possible.
+ if (CpuInfo::IsSupported(CpuInfo::AVX)) {
+ OrEqualArrayAvx(in.directory.size(), &in.directory[0], &out->directory[0]);
+ } else {
+ const __m128i* simd_in = reinterpret_cast<const __m128i*>(&in.directory[0]);
+ __m128i* simd_out = reinterpret_cast<__m128i*>(&out->directory[0]);
+ const size_t simd_size =
+ (in.directory.size() * sizeof(in.directory[0])) / sizeof(simd_in[0]);
+ // in.directory has a size (in bytes) that is a multiple of 32. Since sizeof(__m128i)
+ // == 16, we can do two _mm_or_si128's in each iteration without checking array
+ // bounds.
+ for (size_t i = 0; i < simd_size; i += 2) {
+ _mm_storeu_si128(simd_out + i,
+ _mm_or_si128(_mm_loadu_si128(simd_out + i), _mm_loadu_si128(simd_in + i)));
+ _mm_storeu_si128(simd_out + i + 1, _mm_or_si128(_mm_loadu_si128(simd_out + i + 1),
+ _mm_loadu_si128(simd_in + i + 1)));
+ }
+ }
}
// The following three methods are derived from
@@ -187,14 +223,13 @@ int BloomFilter::MinLogSpace(const size_t ndv, const double fpp) {
const double m = -k * ndv / log(1 - pow(fpp, 1.0 / k));
// Handle case where ndv == 1 => ceil(log2(m/8)) < 0.
- return max(0, static_cast<int>(ceil(log2(m/8))));
+ return max(0, static_cast<int>(ceil(log2(m / 8))));
}
double BloomFilter::FalsePositiveProb(const size_t ndv, const int log_heap_space) {
- return pow(
- 1 - exp((-1.0 * static_cast<double>(BUCKET_WORDS) * static_cast<double>(ndv)) /
- static_cast<double>(1ull << (log_heap_space + 3))),
+ return pow(1 - exp((-1.0 * static_cast<double>(BUCKET_WORDS) * static_cast<double>(ndv))
+ / static_cast<double>(1ull << (log_heap_space + 3))),
BUCKET_WORDS);
}
-} // namespace impala
+} // namespace impala
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/61fcb489/be/src/util/cpu-info.cc
----------------------------------------------------------------------
diff --git a/be/src/util/cpu-info.cc b/be/src/util/cpu-info.cc
index 532c9dd..6329ca8 100644
--- a/be/src/util/cpu-info.cc
+++ b/be/src/util/cpu-info.cc
@@ -77,6 +77,7 @@ static struct {
{ "sse4_1", CpuInfo::SSE4_1 },
{ "sse4_2", CpuInfo::SSE4_2 },
{ "popcnt", CpuInfo::POPCNT },
+ { "avx", CpuInfo::AVX },
{ "avx2", CpuInfo::AVX2 },
};
static const long num_flags = sizeof(flag_mappings) / sizeof(flag_mappings[0]);
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/61fcb489/be/src/util/cpu-info.h
----------------------------------------------------------------------
diff --git a/be/src/util/cpu-info.h b/be/src/util/cpu-info.h
index cb577c2..868d2dd 100644
--- a/be/src/util/cpu-info.h
+++ b/be/src/util/cpu-info.h
@@ -36,7 +36,8 @@ class CpuInfo {
static const int64_t SSE4_1 = (1 << 2);
static const int64_t SSE4_2 = (1 << 3);
static const int64_t POPCNT = (1 << 4);
- static const int64_t AVX2 = (1 << 5);
+ static const int64_t AVX = (1 << 5);
+ static const int64_t AVX2 = (1 << 6);
/// Cache enums for L1 (data), L2 and L3
enum CacheLevel {