You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@orc.apache.org by "ASF GitHub Bot (JIRA)" <ji...@apache.org> on 2017/05/01 20:40:04 UTC

[jira] [Commented] (ORC-87) [C++] Handle missing timezone conversion for timestamp statistics

    [ https://issues.apache.org/jira/browse/ORC-87?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15991495#comment-15991495 ] 

ASF GitHub Bot commented on ORC-87:
-----------------------------------

Github user omalley commented on a diff in the pull request:

    https://github.com/apache/orc/pull/110#discussion_r114196797
  
    --- Diff: c++/src/Statistics.cc ---
    @@ -279,20 +277,55 @@ namespace orc {
       }
     
       TimestampColumnStatisticsImpl::TimestampColumnStatisticsImpl
    -  (const proto::ColumnStatistics& pb, bool correctStats) {
    +  (const proto::ColumnStatistics& pb, const StatContext& statContext) {
         valueCount = pb.numberofvalues();
    -    if (!pb.has_timestampstatistics() || !correctStats) {
    +    if (!pb.has_timestampstatistics() || !statContext.correctStats) {
           _hasMinimum = false;
           _hasMaximum = false;
    +      _hasLowerBound = false;
    +      _hasUpperBound = false;
           minimum = 0;
           maximum = 0;
    +      lowerBound = 0;
    +      upperBound = 0;
         }else{
           const proto::TimestampStatistics& stats = pb.timestampstatistics();
    -      _hasMinimum = stats.has_minimum();
    -      _hasMaximum = stats.has_maximum();
    -
    -      minimum = stats.minimum();
    -      maximum = stats.maximum();
    +      _hasMinimum = stats.has_minimumutc() || (stats.has_minimum() && (statContext.writerTimezone != NULL));
    +      _hasMaximum = stats.has_maximumutc() || (stats.has_maximum() && (statContext.writerTimezone != NULL));
    +      _hasLowerBound = stats.has_minimumutc() || stats.has_minimum();
    +      _hasUpperBound = stats.has_maximumutc() || stats.has_maximum();
    +
    +      // Timestamp stats are stored in milliseconds
    +      if (stats.has_minimumutc()) {
    +        minimum = stats.minimumutc();
    +        lowerBound = minimum;
    +      } else if (statContext.writerTimezone) {
    +        int64_t writerTimeSec = stats.minimum() / 1000;
    +        // multiply the offset by 1000 to convert to millisecond
    +        minimum = stats.minimum() + (statContext.writerTimezone->getVariant(writerTimeSec).gmtOffset) * 1000;
    +        lowerBound = minimum;
    +      } else {
    +        minimum = 0;
    +        // subtract 1 day to handle unknown TZ
    +        lowerBound = stats.minimum() - (24 * 60 * 60 * 1000);
    +      }
    +
    +      // Timestamp stats are stored in milliseconds
    +      if (stats.has_maximumutc()) {
    +        maximum = stats.maximumutc();
    +        upperBound = maximum;
    +      } else if (statContext.writerTimezone) {
    +        int64_t writerTimeSec = stats.maximum() / 1000;
    +        // multiply the offset by 1000 to convert to millisecond
    +        maximum = stats.maximum() + (statContext.writerTimezone->getVariant(writerTimeSec).gmtOffset) * 1000;
    +        upperBound = maximum;
    +      } else {
    +        maximum = 0;
    +        // add 1 day to handle unknown TZ
    +        upperBound = stats.maximum() +  (SECONDS_PER_DAY * 1000);
    +      }
    +      // Add 1 millisecond to account for microsecond precision of values
    +      maximum += 1;
    --- End diff --
    
    I don't understand this and the unit tests pass without it.


> [C++] Handle missing timezone conversion for timestamp statistics
> -----------------------------------------------------------------
>
>                 Key: ORC-87
>                 URL: https://issues.apache.org/jira/browse/ORC-87
>             Project: ORC
>          Issue Type: Bug
>          Components: C++
>            Reporter: Deepak Majeti
>            Assignee: Deepak Majeti
>
> The recent release adds timezone to timestamp values but does not add the conversion for timestamp statistics.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)