You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by ga...@apache.org on 2021/09/30 02:35:34 UTC
[orc] branch main updated: ORC-1008: Fix overflow detection in the
IntegerColumnStatistics
This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/main by this push:
new c69e1fc ORC-1008: Fix overflow detection in the IntegerColumnStatistics
c69e1fc is described below
commit c69e1fcd91e4c0a65052618b0161e3ab1f3d5443
Author: Guiyanakaung <gu...@gmail.com>
AuthorDate: Thu Sep 30 10:35:30 2021 +0800
ORC-1008: Fix overflow detection in the IntegerColumnStatistics
- Fix overflow detection code for C++ int64_t / java long in the IntegerColumnStatistics.
- Merging is also done using the same algorithm for overflow detection, and add test cases.
- Compatibility with different compilers for overflow detection.
- Use cmakedefine check builtin overflow function.
This fixes #917
---
c++/src/Adaptor.hh.in | 35 ++++++++++++++++++++++
c++/src/CMakeLists.txt | 8 +++++
c++/src/Statistics.cc | 12 --------
c++/src/Statistics.hh | 26 ++++++++++++----
c++/test/TestColumnStatistics.cc | 8 +++++
.../org/apache/orc/impl/ColumnStatisticsImpl.java | 19 +++++++-----
.../test/org/apache/orc/TestColumnStatistics.java | 11 +++++++
7 files changed, 94 insertions(+), 25 deletions(-)
diff --git a/c++/src/Adaptor.hh.in b/c++/src/Adaptor.hh.in
index a7795be..cd32b16 100644
--- a/c++/src/Adaptor.hh.in
+++ b/c++/src/Adaptor.hh.in
@@ -31,6 +31,7 @@
#cmakedefine HAS_POST_2038
#cmakedefine HAS_STD_ISNAN
#cmakedefine HAS_STD_MUTEX
+#cmakedefine HAS_BUILTIN_OVERFLOW_CHECK
#cmakedefine NEEDS_REDUNDANT_MOVE
#cmakedefine NEEDS_Z_PREFIX
@@ -173,6 +174,40 @@ namespace orc {
std::string to_string(int64_t val);
}
+#ifdef HAS_BUILTIN_OVERFLOW_CHECK
+ #define multiplyExact !__builtin_mul_overflow
+ #define addExact !__builtin_add_overflow
+#else
+namespace orc {
+ /**
+ * Compute value * repetitions, return false if overflow, return true otherwise
+ * and save the result at the address pointed to by result
+ * imitates the jdk Math.multiplyExact implementation
+ * but this method makes the assumption that repetitions > 1
+ */
+ static bool multiplyExact(int64_t value, int64_t repetitions, int64_t* result) {
+ int64_t r = value * repetitions;
+ if (((value < 0 ? -value : value) | repetitions) >> 31 != 0 && r / repetitions != value) {
+ return false;
+ }
+ *result = r;
+ return true;
+ }
+
+ /**
+ * imitates the jdk Math.addExact implementation
+ */
+ static bool addExact(int64_t sum, int64_t increment, int64_t* result) {
+ int64_t r = sum + increment;
+ if (((sum ^ r) & (increment ^ r)) < 0) {
+ return false;
+ }
+ *result = r;
+ return true;
+ }
+}
+#endif
+
#ifndef HAS_CONSTEXPR
#define constexpr const
#endif
diff --git a/c++/src/CMakeLists.txt b/c++/src/CMakeLists.txt
index 772f4c6..ccb48da 100644
--- a/c++/src/CMakeLists.txt
+++ b/c++/src/CMakeLists.txt
@@ -43,6 +43,14 @@ CHECK_CXX_SOURCE_COMPILES("
)
CHECK_CXX_SOURCE_COMPILES("
+ int main(){
+ int a;
+ return __builtin_add_overflow(1, 2, &a);
+ }"
+ HAS_BUILTIN_OVERFLOW_CHECK
+)
+
+CHECK_CXX_SOURCE_COMPILES("
#include<stdint.h>
#include<stdio.h>
int main(int,char*[]){
diff --git a/c++/src/Statistics.cc b/c++/src/Statistics.cc
index 53e0da2..ccc54c2 100644
--- a/c++/src/Statistics.cc
+++ b/c++/src/Statistics.cc
@@ -177,18 +177,6 @@ namespace orc {
// PASS
}
- void IntegerColumnStatisticsImpl::update(int64_t value, int repetitions) {
- _stats.updateMinMax(value);
-
- if (_stats.hasSum()) {
- bool wasPositive = _stats.getSum() >= 0;
- _stats.setSum(value * repetitions + _stats.getSum());
- if ((value >= 0) == wasPositive) {
- _stats.setHasSum((_stats.getSum() >= 0) == wasPositive);
- }
- }
- }
-
StringColumnStatisticsImpl::~StringColumnStatisticsImpl() {
// PASS
}
diff --git a/c++/src/Statistics.hh b/c++/src/Statistics.hh
index f056aba..e281bdb 100644
--- a/c++/src/Statistics.hh
+++ b/c++/src/Statistics.hh
@@ -964,7 +964,23 @@ namespace orc {
_stats.setSum(sum);
}
- void update(int64_t value, int repetitions);
+ void update(int64_t value, int repetitions) {
+ _stats.updateMinMax(value);
+
+ if (_stats.hasSum()) {
+ if (repetitions > 1) {
+ _stats.setHasSum(multiplyExact(value, repetitions, &value));
+ }
+
+ if (_stats.hasSum()) {
+ _stats.setHasSum(addExact(_stats.getSum(), value, &value));
+
+ if (_stats.hasSum()) {
+ _stats.setSum(value);
+ }
+ }
+ }
+ }
void merge(const MutableColumnStatistics& other) override {
const IntegerColumnStatisticsImpl& intStats =
@@ -975,10 +991,10 @@ namespace orc {
// update sum and check overflow
_stats.setHasSum(_stats.hasSum() && intStats.hasSum());
if (_stats.hasSum()) {
- bool wasPositive = _stats.getSum() >= 0;
- _stats.setSum(_stats.getSum() + intStats.getSum());
- if ((intStats.getSum() >= 0) == wasPositive) {
- _stats.setHasSum((_stats.getSum() >= 0) == wasPositive);
+ int64_t value;
+ _stats.setHasSum(addExact(_stats.getSum(), intStats.getSum(), &value));
+ if (_stats.hasSum()) {
+ _stats.setSum(value);
}
}
}
diff --git a/c++/test/TestColumnStatistics.cc b/c++/test/TestColumnStatistics.cc
index 77f3e2d..11338ad 100644
--- a/c++/test/TestColumnStatistics.cc
+++ b/c++/test/TestColumnStatistics.cc
@@ -106,6 +106,14 @@ namespace orc {
EXPECT_EQ(std::numeric_limits<int64_t>::min() + 500, other->getSum());
intStats->merge(*other);
EXPECT_FALSE(intStats->hasSum());
+
+ std::unique_ptr<IntegerColumnStatisticsImpl> intStats2(
+ new IntegerColumnStatisticsImpl());
+
+ intStats2->update(1, 1);
+ EXPECT_TRUE(intStats2->hasSum());
+ intStats2->update(std::numeric_limits<int64_t>::max(), 3);
+ EXPECT_FALSE(intStats2->hasSum());
}
TEST(ColumnStatistics, doubleColumnStatistics) {
diff --git a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
index 131a658..5573d4b 100644
--- a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
@@ -370,10 +370,13 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
maximum = value;
}
if (!overflow) {
- boolean wasPositive = sum >= 0;
- sum += value * repetitions;
- if ((value >= 0) == wasPositive) {
- overflow = (sum >= 0) != wasPositive;
+ try {
+ long increment = repetitions > 1
+ ? Math.multiplyExact(value, repetitions)
+ : value;
+ sum = Math.addExact(sum, increment);
+ } catch (ArithmeticException e) {
+ overflow = true;
}
}
}
@@ -397,10 +400,10 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
overflow |= otherInt.overflow;
if (!overflow) {
- boolean wasPositive = sum >= 0;
- sum += otherInt.sum;
- if ((otherInt.sum >= 0) == wasPositive) {
- overflow = (sum >= 0) != wasPositive;
+ try {
+ sum = Math.addExact(sum, otherInt.sum);
+ } catch (ArithmeticException e) {
+ overflow = true;
}
}
} else {
diff --git a/java/core/src/test/org/apache/orc/TestColumnStatistics.java b/java/core/src/test/org/apache/orc/TestColumnStatistics.java
index d3372b8..d66e5ee 100644
--- a/java/core/src/test/org/apache/orc/TestColumnStatistics.java
+++ b/java/core/src/test/org/apache/orc/TestColumnStatistics.java
@@ -42,6 +42,7 @@ import java.text.SimpleDateFormat;
import java.util.TimeZone;
import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -51,6 +52,16 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
public class TestColumnStatistics {
@Test
+ public void testLongSumOverflow() {
+ TypeDescription schema = TypeDescription.createInt();
+ ColumnStatisticsImpl stats = ColumnStatisticsImpl.create(schema);
+ stats.updateInteger(1, 1);
+ assertTrue(((IntegerColumnStatistics) stats).isSumDefined());
+ stats.updateInteger(Long.MAX_VALUE, 3);
+ assertFalse(((IntegerColumnStatistics) stats).isSumDefined());
+ }
+
+ @Test
public void testLongMerge() throws Exception {
TypeDescription schema = TypeDescription.createInt();