You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2014/06/20 04:55:25 UTC

svn commit: r1604077 - in /hive/trunk: ./ data/files/ ql/ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/utils/ ql/src/java/org/apache/h...

Author: brock
Date: Fri Jun 20 02:55:24 2014
New Revision: 1604077

URL: http://svn.apache.org/r1604077
Log:
HIVE-6394 - Implement Timestmap in ParquetSerde (Szehon via Brock)

Added:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/utils/
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/utils/NanoTimeUtils.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java
Removed:
    hive/trunk/ql/src/test/queries/clientnegative/parquet_timestamp.q
Modified:
    hive/trunk/data/files/parquet_types.txt
    hive/trunk/pom.xml
    hive/trunk/ql/pom.xml
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
    hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q
    hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out

Modified: hive/trunk/data/files/parquet_types.txt
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/parquet_types.txt?rev=1604077&r1=1604076&r2=1604077&view=diff
==============================================================================
--- hive/trunk/data/files/parquet_types.txt (original)
+++ hive/trunk/data/files/parquet_types.txt Fri Jun 20 02:55:24 2014
@@ -1,21 +1,21 @@
-100|1|1|1.0|0.0|abc
-101|2|2|1.1|0.3|def
-102|3|3|1.2|0.6|ghi
-103|1|4|1.3|0.9|jkl
-104|2|5|1.4|1.2|mno
-105|3|1|1.0|1.5|pqr
-106|1|2|1.1|1.8|stu
-107|2|3|1.2|2.1|vwx
-108|3|4|1.3|2.4|yza
-109|1|5|1.4|2.7|bcd
-110|2|1|1.0|3.0|efg
-111|3|2|1.1|3.3|hij
-112|1|3|1.2|3.6|klm
-113|2|4|1.3|3.9|nop
-114|3|5|1.4|4.2|qrs
-115|1|1|1.0|4.5|tuv
-116|2|2|1.1|4.8|wxy
-117|3|3|1.2|5.1|zab
-118|1|4|1.3|5.4|cde
-119|2|5|1.4|5.7|fgh
-120|3|1|1.0|6.0|ijk
+100|1|1|1.0|0.0|abc|2011-01-01 01:01:01.111111111
+101|2|2|1.1|0.3|def|2012-02-02 02:02:02.222222222
+102|3|3|1.2|0.6|ghi|2013-03-03 03:03:03.333333333
+103|1|4|1.3|0.9|jkl|2014-04-04 04:04:04.444444444
+104|2|5|1.4|1.2|mno|2015-05-05 05:05:05.555555555
+105|3|1|1.0|1.5|pqr|2016-06-06 06:06:06.666666666
+106|1|2|1.1|1.8|stu|2017-07-07 07:07:07.777777777
+107|2|3|1.2|2.1|vwx|2018-08-08 08:08:08.888888888
+108|3|4|1.3|2.4|yza|2019-09-09 09:09:09.999999999
+109|1|5|1.4|2.7|bcd|2020-10-10 10:10:10.101010101
+110|2|1|1.0|3.0|efg|2021-11-11 11:11:11.111111111
+111|3|2|1.1|3.3|hij|2022-12-12 12:12:12.121212121
+112|1|3|1.2|3.6|klm|2023-01-02 13:13:13.131313131
+113|2|4|1.3|3.9|nop|2024-02-02 14:14:14.141414141
+114|3|5|1.4|4.2|qrs|2025-03-03 15:15:15.151515151
+115|1|1|1.0|4.5|tuv|2026-04-04 16:16:16.161616161
+116|2|2|1.1|4.8|wxy|2027-05-05 17:17:17.171717171
+117|3|3|1.2|5.1|zab|2028-06-06 18:18:18.181818181
+118|1|4|1.3|5.4|cde|2029-07-07 19:19:19.191919191
+119|2|5|1.4|5.7|fgh|2030-08-08 20:20:20.202020202
+120|3|1|1.0|6.0|ijk|2031-09-09 21:21:21.212121212

Modified: hive/trunk/pom.xml
URL: http://svn.apache.org/viewvc/hive/trunk/pom.xml?rev=1604077&r1=1604076&r2=1604077&view=diff
==============================================================================
--- hive/trunk/pom.xml (original)
+++ hive/trunk/pom.xml Fri Jun 20 02:55:24 2014
@@ -123,6 +123,7 @@
     <jersey.version>1.14</jersey.version>
     <jline.version>0.9.94</jline.version>
     <jms.version>1.1</jms.version>
+    <jodd.version>3.5.2</jodd.version>
     <json.version>20090211</json.version>
     <junit.version>4.10</junit.version>
     <kryo.version>2.22</kryo.version>

Modified: hive/trunk/ql/pom.xml
URL: http://svn.apache.org/viewvc/hive/trunk/ql/pom.xml?rev=1604077&r1=1604076&r2=1604077&view=diff
==============================================================================
--- hive/trunk/ql/pom.xml (original)
+++ hive/trunk/ql/pom.xml Fri Jun 20 02:55:24 2014
@@ -167,6 +167,11 @@
       <version>${jackson.version}</version>
     </dependency>
     <dependency>
+      <groupId>org.jodd</groupId>
+      <artifactId>jodd-core</artifactId>
+      <version>${jodd.version}</version>
+    </dependency>
+    <dependency>
       <groupId>org.codehaus.jackson</groupId>
       <artifactId>jackson-mapper-asl</artifactId>
       <version>${jackson.version}</version>
@@ -512,6 +517,7 @@
                   <include>com.twitter:parquet-hadoop-bundle</include>
                   <include>org.apache.thrift:libthrift</include>
                   <include>commons-lang:commons-lang</include>
+                  <include>org.jodd:jodd-core</include>
                   <include>org.json:json</include>
                   <include>org.apache.avro:avro</include>
                   <include>org.apache.avro:avro-mapred</include>

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java?rev=1604077&r1=1604076&r2=1604077&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java Fri Jun 20 02:55:24 2014
@@ -14,20 +14,23 @@
 package org.apache.hadoop.hive.ql.io.parquet.convert;
 
 import java.math.BigDecimal;
-
+import java.sql.Timestamp;
 import java.util.ArrayList;
 
+import org.apache.hadoop.hive.ql.io.parquet.utils.NanoTimeUtils;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.io.TimestampWritable;
 import org.apache.hadoop.io.BooleanWritable;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.FloatWritable;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
-
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
+
 import parquet.column.Dictionary;
+import parquet.example.data.simple.NanoTime;
 import parquet.io.api.Binary;
 import parquet.io.api.Converter;
 import parquet.io.api.PrimitiveConverter;
@@ -43,6 +46,7 @@ public enum ETypeConverter {
 
   EDOUBLE_CONVERTER(Double.TYPE) {
     @Override
+
     Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) {
       return new PrimitiveConverter() {
         @Override
@@ -128,6 +132,19 @@ public enum ETypeConverter {
         }
       };
     }
+  },
+  ETIMESTAMP_CONVERTER(TimestampWritable.class) {
+    @Override
+    Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) {
+      return new BinaryConverter<TimestampWritable>(type, parent, index) {
+        @Override
+        protected TimestampWritable convert(Binary binary) {
+          NanoTime nt = NanoTime.fromBinary(binary);
+          Timestamp ts = NanoTimeUtils.getTimestamp(nt);
+          return new TimestampWritable(ts);
+        }
+      };
+    }
   };
 
   final Class<?> _type;
@@ -143,6 +160,10 @@ public enum ETypeConverter {
   abstract Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent);
 
   public static Converter getNewConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) {
+    if (type.isPrimitive() && (type.asPrimitiveType().getPrimitiveTypeName().equals(PrimitiveType.PrimitiveTypeName.INT96))) {
+      //TODO- cleanup once parquet support Timestamp type annotation.
+      return ETypeConverter.ETIMESTAMP_CONVERTER.getConverter(type, index, parent);
+    }
     if (OriginalType.DECIMAL == type.getOriginalType()) {
       return EDECIMAL_CONVERTER.getConverter(type, index, parent);
     } else if (OriginalType.UTF8 == type.getOriginalType()) {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java?rev=1604077&r1=1604076&r2=1604077&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java Fri Jun 20 02:55:24 2014
@@ -78,7 +78,7 @@ public class HiveSchemaConverter {
         // TODO : binaryTypeInfo is a byte array. Need to map it
         throw new UnsupportedOperationException("Binary type not implemented");
       } else if (typeInfo.equals(TypeInfoFactory.timestampTypeInfo)) {
-        throw new UnsupportedOperationException("Timestamp type not implemented");
+        return new PrimitiveType(repetition, PrimitiveTypeName.INT96, name);
       } else if (typeInfo.equals(TypeInfoFactory.voidTypeInfo)) {
         throw new UnsupportedOperationException("Void type not implemented");
       } else if (typeInfo instanceof DecimalTypeInfo) {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java?rev=1604077&r1=1604076&r2=1604077&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java Fri Jun 20 02:55:24 2014
@@ -21,6 +21,7 @@ import java.util.List;
 import org.apache.hadoop.hive.ql.io.parquet.serde.primitive.ParquetPrimitiveInspectorFactory;
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
@@ -98,7 +99,7 @@ public class ArrayWritableObjectInspecto
     } else if (typeInfo.equals(TypeInfoFactory.shortTypeInfo)) {
       return ParquetPrimitiveInspectorFactory.parquetShortInspector;
     } else if (typeInfo.equals(TypeInfoFactory.timestampTypeInfo)) {
-      throw new UnsupportedOperationException("Parquet does not support timestamp. See HIVE-6384");
+      return PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
     } else if (typeInfo.equals(TypeInfoFactory.dateTypeInfo)) {
       throw new UnsupportedOperationException("Parquet does not support date. See HIVE-6384");
     } else if (typeInfo.getTypeName().toLowerCase().startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java?rev=1604077&r1=1604076&r2=1604077&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java Fri Jun 20 02:55:24 2014
@@ -30,6 +30,7 @@ import org.apache.hadoop.hive.serde2.Ser
 import org.apache.hadoop.hive.serde2.io.ByteWritable;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.io.TimestampWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -46,6 +47,7 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
@@ -276,6 +278,8 @@ public class ParquetHiveSerDe extends Ab
       }
       System.arraycopy(src, 0, tgt, bytes - src.length, src.length); // Padding leading zeroes/ones.
       return new BytesWritable(tgt);
+    case TIMESTAMP:
+      return new TimestampWritable(((TimestampObjectInspector) inspector).getPrimitiveJavaObject(obj));
     default:
       throw new SerDeException("Unknown primitive : " + inspector.getPrimitiveCategory());
     }

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/utils/NanoTimeUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/utils/NanoTimeUtils.java?rev=1604077&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/utils/NanoTimeUtils.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/utils/NanoTimeUtils.java Fri Jun 20 02:55:24 2014
@@ -0,0 +1,87 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.parquet.utils;
+
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.Calendar;
+import java.util.TimeZone;
+
+import jodd.datetime.JDateTime;
+import jodd.datetime.TimeUtil;
+import parquet.example.data.simple.NanoTime;
+
+/**
+ * Utilities for converting from java.sql.Timestamp to parquet timestamp.
+ * This utilizes the Jodd library.
+ */
+public class NanoTimeUtils {
+   static final long NANOS_PER_SECOND = 1000000000;
+   static final long SECONDS_PER_MINUTE = 60;
+   static final long MINUTES_PER_HOUR = 60;
+
+   private static final ThreadLocal<Calendar> parquetTsCalendar = new ThreadLocal<Calendar>();
+
+   private static Calendar getCalendar() {
+     //Calendar.getInstance calculates the current-time needlessly, so cache an instance.
+     if (parquetTsCalendar.get() == null) {
+       parquetTsCalendar.set(Calendar.getInstance(TimeZone.getTimeZone("GMT")));
+     }
+     return parquetTsCalendar.get();
+   }
+
+   public static NanoTime getNanoTime(Timestamp ts) {
+
+     Calendar calendar = getCalendar();
+     calendar.setTime(ts);
+     JDateTime jDateTime = new JDateTime(calendar.get(Calendar.YEAR),
+       calendar.get(Calendar.MONTH) + 1,  //java calendar index starting at 1.
+       calendar.get(Calendar.DAY_OF_MONTH));
+     int days = jDateTime.getJulianDayNumber();
+
+     long hour = calendar.get(Calendar.HOUR_OF_DAY);
+     long minute = calendar.get(Calendar.MINUTE);
+     long second = calendar.get(Calendar.SECOND);
+     long nanos = ts.getNanos();
+     long nanosOfDay = nanos + NANOS_PER_SECOND * second + NANOS_PER_SECOND * SECONDS_PER_MINUTE * minute +
+         NANOS_PER_SECOND * SECONDS_PER_MINUTE * MINUTES_PER_HOUR * hour;
+     return new NanoTime(days, nanosOfDay);
+   }
+
+   public static Timestamp getTimestamp(NanoTime nt) {
+     int julianDay = nt.getJulianDay();
+     long nanosOfDay = nt.getTimeOfDayNanos();
+
+     JDateTime jDateTime = new JDateTime((double) julianDay);
+     Calendar calendar = getCalendar();
+     calendar.set(Calendar.YEAR, jDateTime.getYear());
+     calendar.set(Calendar.MONTH, jDateTime.getMonth() - 1); //java calender index starting at 1.
+     calendar.set(Calendar.DAY_OF_MONTH, jDateTime.getDay());
+
+     long remainder = nanosOfDay;
+     int hour = (int) (remainder / (NANOS_PER_SECOND * SECONDS_PER_MINUTE * MINUTES_PER_HOUR));
+     remainder = remainder % (NANOS_PER_SECOND * SECONDS_PER_MINUTE * MINUTES_PER_HOUR);
+     int minutes = (int) (remainder / (NANOS_PER_SECOND * SECONDS_PER_MINUTE));
+     remainder = remainder % (NANOS_PER_SECOND * SECONDS_PER_MINUTE);
+     int seconds = (int) (remainder / (NANOS_PER_SECOND));
+     long nanos = remainder % NANOS_PER_SECOND;
+
+     calendar.set(Calendar.HOUR_OF_DAY, hour);
+     calendar.set(Calendar.MINUTE, minutes);
+     calendar.set(Calendar.SECOND, seconds);
+     Timestamp ts = new Timestamp(calendar.getTimeInMillis());
+     ts.setNanos((int) nanos);
+     return ts;
+   }
+}

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java?rev=1604077&r1=1604076&r2=1604077&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java Fri Jun 20 02:55:24 2014
@@ -13,10 +13,14 @@
  */
 package org.apache.hadoop.hive.ql.io.parquet.write;
 
+import java.sql.Timestamp;
+
+import org.apache.hadoop.hive.ql.io.parquet.utils.NanoTimeUtils;
 import org.apache.hadoop.hive.serde2.io.ByteWritable;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
 import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.io.TimestampWritable;
 import org.apache.hadoop.io.ArrayWritable;
 import org.apache.hadoop.io.BooleanWritable;
 import org.apache.hadoop.io.BytesWritable;
@@ -25,6 +29,7 @@ import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Writable;
 
+import parquet.example.data.simple.NanoTime;
 import parquet.io.ParquetEncodingException;
 import parquet.io.api.Binary;
 import parquet.io.api.RecordConsumer;
@@ -149,6 +154,10 @@ public class DataWritableWriter {
       throw new UnsupportedOperationException("HiveDecimalWritable writing not implemented");
     } else if (value instanceof BytesWritable) {
       recordConsumer.addBinary((Binary.fromByteArray(((BytesWritable) value).getBytes())));
+    } else if (value instanceof TimestampWritable) {
+      Timestamp ts = ((TimestampWritable) value).getTimestamp();
+      NanoTime nt = NanoTimeUtils.getNanoTime(ts);
+      nt.writeValue(recordConsumer);
     } else {
       throw new IllegalArgumentException("Unknown value type: " + value + " " + value.getClass());
     }

Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java?rev=1604077&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java Fri Jun 20 02:55:24 2014
@@ -0,0 +1,201 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.parquet.serde;
+
+import java.sql.Timestamp;
+import java.util.Calendar;
+import java.util.Date;
+import java.util.TimeZone;
+
+import junit.framework.Assert;
+import junit.framework.TestCase;
+
+import org.apache.hadoop.hive.ql.io.parquet.utils.NanoTimeUtils;
+
+import parquet.example.data.simple.NanoTime;
+
+/**
+ * Tests util-libraries used for parquet-timestamp.
+ */
+public class TestParquetTimestampUtils extends TestCase {
+
+  public void testJulianDay() {
+    //check if May 23, 1968 is Julian Day 2440000
+    Calendar cal = Calendar.getInstance();
+    cal.set(Calendar.YEAR,  1968);
+    cal.set(Calendar.MONTH, Calendar.MAY);
+    cal.set(Calendar.DAY_OF_MONTH, 23);
+    cal.set(Calendar.HOUR_OF_DAY, 0);
+    cal.setTimeZone(TimeZone.getTimeZone("GMT"));
+
+    Timestamp ts = new Timestamp(cal.getTimeInMillis());
+    NanoTime nt = NanoTimeUtils.getNanoTime(ts);
+    Assert.assertEquals(nt.getJulianDay(), 2440000);
+
+    Timestamp tsFetched = NanoTimeUtils.getTimestamp(nt);
+    Assert.assertEquals(tsFetched, ts);
+
+    //check if 30 Julian Days between Jan 1, 2005 and Jan 31, 2005.
+    Calendar cal1 = Calendar.getInstance();
+    cal1.set(Calendar.YEAR,  2005);
+    cal1.set(Calendar.MONTH, Calendar.JANUARY);
+    cal1.set(Calendar.DAY_OF_MONTH, 1);
+    cal1.set(Calendar.HOUR_OF_DAY, 0);
+    cal1.setTimeZone(TimeZone.getTimeZone("GMT"));
+
+    Timestamp ts1 = new Timestamp(cal1.getTimeInMillis());
+    NanoTime nt1 = NanoTimeUtils.getNanoTime(ts1);
+
+    Timestamp ts1Fetched = NanoTimeUtils.getTimestamp(nt1);
+    Assert.assertEquals(ts1Fetched, ts1);
+
+    Calendar cal2 = Calendar.getInstance();
+    cal2.set(Calendar.YEAR,  2005);
+    cal2.set(Calendar.MONTH, Calendar.JANUARY);
+    cal2.set(Calendar.DAY_OF_MONTH, 31);
+    cal2.set(Calendar.HOUR_OF_DAY, 0);
+    cal2.setTimeZone(TimeZone.getTimeZone("UTC"));
+
+    Timestamp ts2 = new Timestamp(cal2.getTimeInMillis());
+    NanoTime nt2 = NanoTimeUtils.getNanoTime(ts2);
+
+    Timestamp ts2Fetched = NanoTimeUtils.getTimestamp(nt2);
+    Assert.assertEquals(ts2Fetched, ts2);
+    Assert.assertEquals(nt2.getJulianDay() - nt1.getJulianDay(), 30);
+  }
+
+  public void testNanos() {
+    //case 1: 01:01:01.0000000001
+    Calendar cal = Calendar.getInstance();
+    cal.set(Calendar.YEAR,  1968);
+    cal.set(Calendar.MONTH, Calendar.MAY);
+    cal.set(Calendar.DAY_OF_MONTH, 23);
+    cal.set(Calendar.HOUR_OF_DAY, 1);
+    cal.set(Calendar.MINUTE, 1);
+    cal.set(Calendar.SECOND, 1);
+    cal.setTimeZone(TimeZone.getTimeZone("GMT"));
+    Timestamp ts = new Timestamp(cal.getTimeInMillis());
+    ts.setNanos(1);
+
+    //(1*60*60 + 1*60 + 1) * 10e9 + 1
+    NanoTime nt = NanoTimeUtils.getNanoTime(ts);
+    Assert.assertEquals(nt.getTimeOfDayNanos(), 3661000000001L);
+
+    //case 2: 23:59:59.999999999
+    cal = Calendar.getInstance();
+    cal.set(Calendar.YEAR,  1968);
+    cal.set(Calendar.MONTH, Calendar.MAY);
+    cal.set(Calendar.DAY_OF_MONTH, 23);
+    cal.set(Calendar.HOUR_OF_DAY, 23);
+    cal.set(Calendar.MINUTE, 59);
+    cal.set(Calendar.SECOND, 59);
+    cal.setTimeZone(TimeZone.getTimeZone("GMT"));
+    ts = new Timestamp(cal.getTimeInMillis());
+    ts.setNanos(999999999);
+
+    //(23*60*60 + 59*60 + 59)*10e9 + 999999999
+    nt = NanoTimeUtils.getNanoTime(ts);
+    Assert.assertEquals(nt.getTimeOfDayNanos(), 86399999999999L);
+
+    //case 3: verify the difference.
+    Calendar cal2 = Calendar.getInstance();
+    cal2.set(Calendar.YEAR,  1968);
+    cal2.set(Calendar.MONTH, Calendar.MAY);
+    cal2.set(Calendar.DAY_OF_MONTH, 23);
+    cal2.set(Calendar.HOUR_OF_DAY, 0);
+    cal2.set(Calendar.MINUTE, 10);
+    cal2.set(Calendar.SECOND, 0);
+    cal2.setTimeZone(TimeZone.getTimeZone("GMT"));
+    Timestamp ts2 = new Timestamp(cal2.getTimeInMillis());
+    ts2.setNanos(10);
+
+    Calendar cal1 = Calendar.getInstance();
+    cal1.set(Calendar.YEAR,  1968);
+    cal1.set(Calendar.MONTH, Calendar.MAY);
+    cal1.set(Calendar.DAY_OF_MONTH, 23);
+    cal1.set(Calendar.HOUR_OF_DAY, 0);
+    cal1.set(Calendar.MINUTE, 0);
+    cal1.set(Calendar.SECOND, 0);
+    cal1.setTimeZone(TimeZone.getTimeZone("GMT"));
+    Timestamp ts1 = new Timestamp(cal1.getTimeInMillis());
+    ts1.setNanos(1);
+
+    NanoTime n2 = NanoTimeUtils.getNanoTime(ts2);
+    NanoTime n1 = NanoTimeUtils.getNanoTime(ts1);
+
+    Assert.assertEquals(n2.getTimeOfDayNanos() - n1.getTimeOfDayNanos(), 600000000009L);
+  }
+
+  public void testTimezone() {
+    Calendar cal = Calendar.getInstance();
+    cal.set(Calendar.YEAR,  1968);
+    cal.set(Calendar.MONTH, Calendar.MAY);
+    cal.set(Calendar.DAY_OF_MONTH, 23);
+    if ((TimeZone.getTimeZone("US/Pacific").inDaylightTime(new Date()))) {
+      cal.set(Calendar.HOUR_OF_DAY, 18);
+    } else {
+      cal.set(Calendar.HOUR_OF_DAY, 17);
+    }
+    cal.set(Calendar.MINUTE, 1);
+    cal.set(Calendar.SECOND, 1);
+    cal.setTimeZone(TimeZone.getTimeZone("US/Pacific"));
+    Timestamp ts = new Timestamp(cal.getTimeInMillis());
+    ts.setNanos(1);
+
+    //18:00 PST = 01:00 GMT (if daylight-savings)
+    //17:00 PST = 01:00 GMT (if not daylight savings)
+    //(1*60*60 + 1*60 + 1)*10e9 + 1
+    NanoTime nt = NanoTimeUtils.getNanoTime(ts);
+    Assert.assertEquals(nt.getTimeOfDayNanos(), 3661000000001L);
+
+    //in both cases, this will be the next day in GMT
+    Assert.assertEquals(nt.getJulianDay(), 2440001);
+  }
+
+  public void testValues() {
+    //exercise a broad range of timestamps close to the present.
+    verifyTsString("2011-01-01 01:01:01.111111111");
+    verifyTsString("2012-02-02 02:02:02.222222222");
+    verifyTsString("2013-03-03 03:03:03.333333333");
+    verifyTsString("2014-04-04 04:04:04.444444444");
+    verifyTsString("2015-05-05 05:05:05.555555555");
+    verifyTsString("2016-06-06 06:06:06.666666666");
+    verifyTsString("2017-07-07 07:07:07.777777777");
+    verifyTsString("2018-08-08 08:08:08.888888888");
+    verifyTsString("2019-09-09 09:09:09.999999999");
+    verifyTsString("2020-10-10 10:10:10.101010101");
+    verifyTsString("2021-11-11 11:11:11.111111111");
+    verifyTsString("2022-12-12 12:12:12.121212121");
+    verifyTsString("2023-01-02 13:13:13.131313131");
+    verifyTsString("2024-02-02 14:14:14.141414141");
+    verifyTsString("2025-03-03 15:15:15.151515151");
+    verifyTsString("2026-04-04 16:16:16.161616161");
+    verifyTsString("2027-05-05 17:17:17.171717171");
+    verifyTsString("2028-06-06 18:18:18.181818181");
+    verifyTsString("2029-07-07 19:19:19.191919191");
+    verifyTsString("2030-08-08 20:20:20.202020202");
+    verifyTsString("2031-09-09 21:21:21.212121212");
+
+    //test some extreme cases.
+    verifyTsString("9999-09-09 09:09:09.999999999");
+    verifyTsString("0001-01-01 00:00:00.0");
+  }
+
+  private void verifyTsString(String tsString) {
+    Timestamp ts = Timestamp.valueOf(tsString);
+    NanoTime nt = NanoTimeUtils.getNanoTime(ts);
+    Timestamp tsFetched = NanoTimeUtils.getTimestamp(nt);
+    Assert.assertEquals(tsString, tsFetched.toString());
+  }
+}

Modified: hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q?rev=1604077&r1=1604076&r2=1604077&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q Fri Jun 20 02:55:24 2014
@@ -7,7 +7,8 @@ CREATE TABLE parquet_types_staging (
   csmallint smallint,
   cfloat float,
   cdouble double,
-  cstring1 string
+  cstring1 string,
+  t timestamp
 ) ROW FORMAT DELIMITED
 FIELDS TERMINATED BY '|';
 
@@ -17,7 +18,8 @@ CREATE TABLE parquet_types (
   csmallint smallint,
   cfloat float,
   cdouble double,
-  cstring1 string
+  cstring1 string,
+  t timestamp
 ) STORED AS PARQUET;
 
 LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging;

Modified: hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out?rev=1604077&r1=1604076&r2=1604077&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out Fri Jun 20 02:55:24 2014
@@ -12,7 +12,8 @@ PREHOOK: query: CREATE TABLE parquet_typ
   csmallint smallint,
   cfloat float,
   cdouble double,
-  cstring1 string
+  cstring1 string,
+  t timestamp
 ) ROW FORMAT DELIMITED
 FIELDS TERMINATED BY '|'
 PREHOOK: type: CREATETABLE
@@ -23,7 +24,8 @@ POSTHOOK: query: CREATE TABLE parquet_ty
   csmallint smallint,
   cfloat float,
   cdouble double,
-  cstring1 string
+  cstring1 string,
+  t timestamp
 ) ROW FORMAT DELIMITED
 FIELDS TERMINATED BY '|'
 POSTHOOK: type: CREATETABLE
@@ -35,7 +37,8 @@ PREHOOK: query: CREATE TABLE parquet_typ
   csmallint smallint,
   cfloat float,
   cdouble double,
-  cstring1 string
+  cstring1 string,
+  t timestamp
 ) STORED AS PARQUET
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
@@ -45,7 +48,8 @@ POSTHOOK: query: CREATE TABLE parquet_ty
   csmallint smallint,
   cfloat float,
   cdouble double,
-  cstring1 string
+  cstring1 string,
+  t timestamp
 ) STORED AS PARQUET
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
@@ -72,6 +76,7 @@ POSTHOOK: Lineage: parquet_types.cint SI
 POSTHOOK: Lineage: parquet_types.csmallint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:csmallint, type:smallint, comment:null), ]
 POSTHOOK: Lineage: parquet_types.cstring1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cstring1, type:string, comment:null), ]
 POSTHOOK: Lineage: parquet_types.ctinyint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:ctinyint, type:tinyint, comment:null), ]
+POSTHOOK: Lineage: parquet_types.t SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:t, type:timestamp, comment:null), ]
 PREHOOK: query: SELECT * FROM parquet_types
 PREHOOK: type: QUERY
 PREHOOK: Input: default@parquet_types
@@ -80,27 +85,27 @@ POSTHOOK: query: SELECT * FROM parquet_t
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@parquet_types
 #### A masked pattern was here ####
-100	1	1	1.0	0.0	abc
-101	2	2	1.1	0.3	def
-102	3	3	1.2	0.6	ghi
-103	1	4	1.3	0.9	jkl
-104	2	5	1.4	1.2	mno
-105	3	1	1.0	1.5	pqr
-106	1	2	1.1	1.8	stu
-107	2	3	1.2	2.1	vwx
-108	3	4	1.3	2.4	yza
-109	1	5	1.4	2.7	bcd
-110	2	1	1.0	3.0	efg
-111	3	2	1.1	3.3	hij
-112	1	3	1.2	3.6	klm
-113	2	4	1.3	3.9	nop
-114	3	5	1.4	4.2	qrs
-115	1	1	1.0	4.5	tuv
-116	2	2	1.1	4.8	wxy
-117	3	3	1.2	5.1	zab
-118	1	4	1.3	5.4	cde
-119	2	5	1.4	5.7	fgh
-120	3	1	1.0	6.0	ijk
+100	1	1	1.0	0.0	abc	2011-01-01 01:01:01.111111111
+101	2	2	1.1	0.3	def	2012-02-02 02:02:02.222222222
+102	3	3	1.2	0.6	ghi	2013-03-03 03:03:03.333333333
+103	1	4	1.3	0.9	jkl	2014-04-04 04:04:04.444444444
+104	2	5	1.4	1.2	mno	2015-05-05 05:05:05.555555555
+105	3	1	1.0	1.5	pqr	2016-06-06 06:06:06.666666666
+106	1	2	1.1	1.8	stu	2017-07-07 07:07:07.777777777
+107	2	3	1.2	2.1	vwx	2018-08-08 08:08:08.888888888
+108	3	4	1.3	2.4	yza	2019-09-09 09:09:09.999999999
+109	1	5	1.4	2.7	bcd	2020-10-10 10:10:10.101010101
+110	2	1	1.0	3.0	efg	2021-11-11 11:11:11.111111111
+111	3	2	1.1	3.3	hij	2022-12-12 12:12:12.121212121
+112	1	3	1.2	3.6	klm	2023-01-02 13:13:13.131313131
+113	2	4	1.3	3.9	nop	2024-02-02 14:14:14.141414141
+114	3	5	1.4	4.2	qrs	2025-03-03 15:15:15.151515151
+115	1	1	1.0	4.5	tuv	2026-04-04 16:16:16.161616161
+116	2	2	1.1	4.8	wxy	2027-05-05 17:17:17.171717171
+117	3	3	1.2	5.1	zab	2028-06-06 18:18:18.181818181
+118	1	4	1.3	5.4	cde	2029-07-07 19:19:19.191919191
+119	2	5	1.4	5.7	fgh	2030-08-08 20:20:20.202020202
+120	3	1	1.0	6.0	ijk	2031-09-09 21:21:21.212121212
 PREHOOK: query: SELECT ctinyint,
   MAX(cint),
   MIN(csmallint),