You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2021/09/30 10:44:05 UTC
[arrow-rs] branch master updated: parquet: Avoid NaN check for
non-floats (#798)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new e8438bb parquet: Avoid NaN check for non-floats (#798)
e8438bb is described below
commit e8438bb54472a3f62044cd64ee68dd2e4ff59ba9
Author: Kornelijus Survila <ko...@gmail.com>
AuthorDate: Thu Sep 30 04:43:57 2021 -0600
parquet: Avoid NaN check for non-floats (#798)
It was especially expensive for `ByteArray` columns, potentially taking as
long as the rest of encoding.
---
parquet/src/column/writer.rs | 34 +++++++++++++++++++---------------
1 file changed, 19 insertions(+), 15 deletions(-)
diff --git a/parquet/src/column/writer.rs b/parquet/src/column/writer.rs
index 0da9439..162941a 100644
--- a/parquet/src/column/writer.rs
+++ b/parquet/src/column/writer.rs
@@ -951,23 +951,27 @@ impl<T: DataType> ColumnWriterImpl<T> {
#[allow(clippy::eq_op)]
fn update_page_min_max(&mut self, val: &T::T) {
- // simple "isNaN" check that works for all types
- if val == val {
- if self
- .min_page_value
- .as_ref()
- .map_or(true, |min| self.compare_greater(min, val))
- {
- self.min_page_value = Some(val.clone());
- }
- if self
- .max_page_value
- .as_ref()
- .map_or(true, |max| self.compare_greater(val, max))
- {
- self.max_page_value = Some(val.clone());
+ if let Type::FLOAT | Type::DOUBLE = T::get_physical_type() {
+ // Skip NaN values
+ if val != val {
+ return;
}
}
+
+ if self
+ .min_page_value
+ .as_ref()
+ .map_or(true, |min| self.compare_greater(min, val))
+ {
+ self.min_page_value = Some(val.clone());
+ }
+ if self
+ .max_page_value
+ .as_ref()
+ .map_or(true, |max| self.compare_greater(val, max))
+ {
+ self.max_page_value = Some(val.clone());
+ }
}
fn update_column_min_max(&mut self) {