You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by do...@apache.org on 2022/04/04 17:39:43 UTC
[orc] branch branch-1.7 updated: ORC-1146: Float category missing check if the statistic sum is a finite value
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-1.7
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/branch-1.7 by this push:
new 9a56fa578 ORC-1146: Float category missing check if the statistic sum is a finite value
9a56fa578 is described below
commit 9a56fa578aef30962ba0b0447c0d8911c9c321e1
Author: Guiyanakuang <gu...@gmail.com>
AuthorDate: Mon Apr 4 10:16:25 2022 -0700
ORC-1146: Float category missing check if the statistic sum is a finite value
### What changes were proposed in this pull request?
This pr is aimed at checking whether the float category statistic sum has a finite value.
### Why are the changes needed?
When the orc float category is written with NaN, pushing down is not supported.
### How was this patch tested?
Added unit test.
Closes #1077
Signed-off-by: Dongjoon Hyun <do...@apache.org>
(cherry picked from commit 93a350520f5f9f1582fbb3415d3a567e880c29da)
Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
.../java/org/apache/orc/impl/RecordReaderImpl.java | 3 ++-
.../src/test/org/apache/orc/TestVectorOrcFile.java | 31 +++++++++++++++++++++-
2 files changed, 32 insertions(+), 2 deletions(-)
diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
index 96ce3dd52..4d50e9ebb 100644
--- a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
@@ -650,7 +650,8 @@ public class RecordReaderImpl implements RecordReader {
" include ORC-517. Writer version: {}",
predicate.getColumnName(), writerVersion);
return TruthValue.YES_NO_NULL;
- } else if (category == TypeDescription.Category.DOUBLE) {
+ } else if (category == TypeDescription.Category.DOUBLE
+ || category == TypeDescription.Category.FLOAT) {
DoubleColumnStatistics dstas = (DoubleColumnStatistics) cs;
if (!Double.isFinite(dstas.getSum())) {
LOG.debug("Not using predication pushdown on {} because stats contain NaN values",
diff --git a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
index 13fa6b8cf..a06dc1b4b 100644
--- a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
+++ b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
@@ -4301,7 +4301,8 @@ public class TestVectorOrcFile {
@MethodSource("data")
public void testPredicatePushdownWithNan(Version fileFormat) throws Exception {
TypeDescription schema = TypeDescription.createStruct()
- .addField("double1", TypeDescription.createDouble());
+ .addField("double1", TypeDescription.createDouble())
+ .addField("float1", TypeDescription.createFloat());
Writer writer = OrcFile.createWriter(testFilePath,
OrcFile.writerOptions(conf)
@@ -4315,14 +4316,18 @@ public class TestVectorOrcFile {
batch.ensureSize(3500);
batch.size = 3500;
batch.cols[0].noNulls = true;
+ batch.cols[1].noNulls = true;
DoubleColumnVector dbcol = ((DoubleColumnVector) batch.cols[0]);
+ DoubleColumnVector fcol = ((DoubleColumnVector) batch.cols[1]);
// first row NaN (resulting to min/max and sum columnStats of stride to be NaN)
// NaN in the middle of a stride causes Sum of last stride to be NaN
dbcol.vector[0] = Double.NaN;
+ fcol.vector[0] = Double.NaN;
for (int i=1; i < 3500; ++i) {
dbcol.vector[i] = i == 3200 ? Double.NaN : i;
+ fcol.vector[i] = i == 3200 ? Double.NaN : i;
}
writer.addRowBatch(batch);
writer.close();
@@ -4332,6 +4337,7 @@ public class TestVectorOrcFile {
assertEquals(3500, reader.getNumberOfRows());
// Only the first stride matches the predicate, just need to make sure NaN stats are ignored
+ // Test double category push down
SearchArgument sarg = SearchArgumentFactory.newBuilder()
.startAnd()
.lessThan("double1", PredicateLeaf.Type.FLOAT, 100d)
@@ -4353,6 +4359,29 @@ public class TestVectorOrcFile {
rows.nextBatch(batch);
assertEquals(0, batch.size);
+
+ // Test float category push down
+ sarg = SearchArgumentFactory.newBuilder()
+ .startAnd()
+ .lessThan("float1", PredicateLeaf.Type.FLOAT, 100d)
+ .end()
+ .build();
+
+ rows = reader.rows(reader.options()
+ .range(0L, Long.MAX_VALUE)
+ .searchArgument(sarg, new String[]{"float1"}));
+ batch = reader.getSchema().createRowBatch(3500);
+
+ rows.nextBatch(batch);
+ // First stride should be read as NaN sum is ignored
+ assertEquals(1000, batch.size);
+
+ rows.nextBatch(batch);
+ // Last stride should be read as NaN sum is ignored
+ assertEquals(500, batch.size);
+
+ rows.nextBatch(batch);
+ assertEquals(0, batch.size);
}
/**