You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@orc.apache.org by GitBox <gi...@apache.org> on 2020/10/27 09:06:36 UTC

[GitHub] [orc] wgtmac commented on a change in pull request #543: ORC-663: [C++] Support timestamp statistics with nanosecond

wgtmac commented on a change in pull request #543:
URL: https://github.com/apache/orc/pull/543#discussion_r512518148



##########
File path: c++/src/sargs/PredicateLeaf.cc
##########
@@ -510,6 +510,7 @@ namespace orc {
         break;
       }
       case PredicateDataType::STRING: {
+        ///FIXME: check lowerBound and upperBound as well

Review comment:
       yes

##########
File path: c++/test/TestPredicateLeaf.cc
##########
@@ -145,6 +145,22 @@ namespace orc {
     return colStats;
   }
 
+  static proto::ColumnStatistics createTimestampStats(
+                                    int64_t minSecond, int32_t minNano,

Review comment:
       I think we have: https://orc.apache.org/develop/coding/

##########
File path: c++/include/orc/Statistics.hh
##########
@@ -352,7 +352,17 @@ namespace orc {
      */
     virtual int64_t getUpperBound() const = 0;
 
+    /**
+     * Get the last 6 digits of nanosecond of minimum timestamp.
+     * @return last 6 digits of nanosecond of minimum timestamp.
+     */
+    virtual int32_t getMinimumNano() const = 0;
 
+    /**
+     * Get the last 6 digits of nanosecond of maximum timestamp.
+     * @return last 6 digits of nanosecond of maximum timestamp.
+     */
+    virtual int32_t getMaximumNano() const = 0;

Review comment:
       done

##########
File path: c++/include/orc/sargs/Literal.hh
##########
@@ -63,10 +90,15 @@ namespace orc {
     Literal(bool val);
 
     /**
-     * Create a literal of Timestamp or DATE type

Review comment:
       This is fine. Predicate pushdown in C++ is not complete yet and I have another patch waiting for review.

##########
File path: c++/src/Statistics.hh
##########
@@ -1298,9 +1356,17 @@ namespace orc {
       if (_stats.hasMinimum()) {
         tsStats->set_minimumutc(_stats.getMinimum());
         tsStats->set_maximumutc(_stats.getMaximum());
+        if (_minimumNano != DEFAULT_MIN_NANOS) {
+          tsStats->set_minimumnanos(_minimumNano + 1);

Review comment:
       just to be consistent with java side.

##########
File path: c++/include/orc/Statistics.hh
##########
@@ -352,7 +352,17 @@ namespace orc {
      */
     virtual int64_t getUpperBound() const = 0;
 
+    /**
+     * Get the last 6 digits of nanosecond of minimum timestamp.
+     * @return last 6 digits of nanosecond of minimum timestamp.
+     */
+    virtual int32_t getMinimumNano() const = 0;

Review comment:
       done

##########
File path: c++/src/Statistics.hh
##########
@@ -1214,6 +1214,10 @@ namespace orc {
     bool _hasUpperBound;
     int64_t _lowerBound;
     int64_t _upperBound;
+    int32_t _minimumNano;  // last 6 digits of nanosecond of minimum timestamp

Review comment:
       done

##########
File path: c++/src/Statistics.cc
##########
@@ -327,6 +329,10 @@ namespace orc {
                 (stats.has_maximum() && (statContext.writerTimezone != nullptr)));
       _hasLowerBound = stats.has_minimumutc() || stats.has_minimum();
       _hasUpperBound = stats.has_maximumutc() || stats.has_maximum();
+      _minimumNano = stats.has_minimumnanos() ?
+                     stats.minimumnanos() - 1 : DEFAULT_MIN_NANOS;

Review comment:
       sure

##########
File path: c++/src/Statistics.cc
##########
@@ -317,6 +317,8 @@ namespace orc {
       _stats.setMaximum(0);
       _lowerBound = 0;
       _upperBound = 0;
+      _minimumNano = DEFAULT_MIN_NANOS;

Review comment:
       done

##########
File path: c++/src/Statistics.hh
##########
@@ -1379,6 +1445,22 @@ namespace orc {
         throw ParseError("UpperBound is not defined.");
       }
     }
+
+    int32_t getMinimumNano() const override {

Review comment:
       done




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org