You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@orc.apache.org by "ASF GitHub Bot (JIRA)" <ji...@apache.org> on 2017/05/01 20:40:04 UTC
[jira] [Commented] (ORC-87) [C++] Handle missing timezone
conversion for timestamp statistics
[ https://issues.apache.org/jira/browse/ORC-87?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15991495#comment-15991495 ]
ASF GitHub Bot commented on ORC-87:
-----------------------------------
Github user omalley commented on a diff in the pull request:
https://github.com/apache/orc/pull/110#discussion_r114196797
--- Diff: c++/src/Statistics.cc ---
@@ -279,20 +277,55 @@ namespace orc {
}
TimestampColumnStatisticsImpl::TimestampColumnStatisticsImpl
- (const proto::ColumnStatistics& pb, bool correctStats) {
+ (const proto::ColumnStatistics& pb, const StatContext& statContext) {
valueCount = pb.numberofvalues();
- if (!pb.has_timestampstatistics() || !correctStats) {
+ if (!pb.has_timestampstatistics() || !statContext.correctStats) {
_hasMinimum = false;
_hasMaximum = false;
+ _hasLowerBound = false;
+ _hasUpperBound = false;
minimum = 0;
maximum = 0;
+ lowerBound = 0;
+ upperBound = 0;
}else{
const proto::TimestampStatistics& stats = pb.timestampstatistics();
- _hasMinimum = stats.has_minimum();
- _hasMaximum = stats.has_maximum();
-
- minimum = stats.minimum();
- maximum = stats.maximum();
+ _hasMinimum = stats.has_minimumutc() || (stats.has_minimum() && (statContext.writerTimezone != NULL));
+ _hasMaximum = stats.has_maximumutc() || (stats.has_maximum() && (statContext.writerTimezone != NULL));
+ _hasLowerBound = stats.has_minimumutc() || stats.has_minimum();
+ _hasUpperBound = stats.has_maximumutc() || stats.has_maximum();
+
+ // Timestamp stats are stored in milliseconds
+ if (stats.has_minimumutc()) {
+ minimum = stats.minimumutc();
+ lowerBound = minimum;
+ } else if (statContext.writerTimezone) {
+ int64_t writerTimeSec = stats.minimum() / 1000;
+ // multiply the offset by 1000 to convert to millisecond
+ minimum = stats.minimum() + (statContext.writerTimezone->getVariant(writerTimeSec).gmtOffset) * 1000;
+ lowerBound = minimum;
+ } else {
+ minimum = 0;
+ // subtract 1 day to handle unknown TZ
+ lowerBound = stats.minimum() - (24 * 60 * 60 * 1000);
+ }
+
+ // Timestamp stats are stored in milliseconds
+ if (stats.has_maximumutc()) {
+ maximum = stats.maximumutc();
+ upperBound = maximum;
+ } else if (statContext.writerTimezone) {
+ int64_t writerTimeSec = stats.maximum() / 1000;
+ // multiply the offset by 1000 to convert to millisecond
+ maximum = stats.maximum() + (statContext.writerTimezone->getVariant(writerTimeSec).gmtOffset) * 1000;
+ upperBound = maximum;
+ } else {
+ maximum = 0;
+ // add 1 day to handle unknown TZ
+ upperBound = stats.maximum() + (SECONDS_PER_DAY * 1000);
+ }
+ // Add 1 millisecond to account for microsecond precision of values
+ maximum += 1;
--- End diff --
I don't understand this and the unit tests pass without it.
> [C++] Handle missing timezone conversion for timestamp statistics
> -----------------------------------------------------------------
>
> Key: ORC-87
> URL: https://issues.apache.org/jira/browse/ORC-87
> Project: ORC
> Issue Type: Bug
> Components: C++
> Reporter: Deepak Majeti
> Assignee: Deepak Majeti
>
> The recent release adds timezone to timestamp values but does not add the conversion for timestamp statistics.
--
This message was sent by Atlassian JIRA
(v6.3.15#6346)