You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2014/06/20 04:55:25 UTC
svn commit: r1604077 - in /hive/trunk: ./ data/files/ ql/
ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/
ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/
ql/src/java/org/apache/hadoop/hive/ql/io/parquet/utils/
ql/src/java/org/apache/h...
Author: brock
Date: Fri Jun 20 02:55:24 2014
New Revision: 1604077
URL: http://svn.apache.org/r1604077
Log:
HIVE-6394 - Implement Timestmap in ParquetSerde (Szehon via Brock)
Added:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/utils/
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/utils/NanoTimeUtils.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java
Removed:
hive/trunk/ql/src/test/queries/clientnegative/parquet_timestamp.q
Modified:
hive/trunk/data/files/parquet_types.txt
hive/trunk/pom.xml
hive/trunk/ql/pom.xml
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q
hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out
Modified: hive/trunk/data/files/parquet_types.txt
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/parquet_types.txt?rev=1604077&r1=1604076&r2=1604077&view=diff
==============================================================================
--- hive/trunk/data/files/parquet_types.txt (original)
+++ hive/trunk/data/files/parquet_types.txt Fri Jun 20 02:55:24 2014
@@ -1,21 +1,21 @@
-100|1|1|1.0|0.0|abc
-101|2|2|1.1|0.3|def
-102|3|3|1.2|0.6|ghi
-103|1|4|1.3|0.9|jkl
-104|2|5|1.4|1.2|mno
-105|3|1|1.0|1.5|pqr
-106|1|2|1.1|1.8|stu
-107|2|3|1.2|2.1|vwx
-108|3|4|1.3|2.4|yza
-109|1|5|1.4|2.7|bcd
-110|2|1|1.0|3.0|efg
-111|3|2|1.1|3.3|hij
-112|1|3|1.2|3.6|klm
-113|2|4|1.3|3.9|nop
-114|3|5|1.4|4.2|qrs
-115|1|1|1.0|4.5|tuv
-116|2|2|1.1|4.8|wxy
-117|3|3|1.2|5.1|zab
-118|1|4|1.3|5.4|cde
-119|2|5|1.4|5.7|fgh
-120|3|1|1.0|6.0|ijk
+100|1|1|1.0|0.0|abc|2011-01-01 01:01:01.111111111
+101|2|2|1.1|0.3|def|2012-02-02 02:02:02.222222222
+102|3|3|1.2|0.6|ghi|2013-03-03 03:03:03.333333333
+103|1|4|1.3|0.9|jkl|2014-04-04 04:04:04.444444444
+104|2|5|1.4|1.2|mno|2015-05-05 05:05:05.555555555
+105|3|1|1.0|1.5|pqr|2016-06-06 06:06:06.666666666
+106|1|2|1.1|1.8|stu|2017-07-07 07:07:07.777777777
+107|2|3|1.2|2.1|vwx|2018-08-08 08:08:08.888888888
+108|3|4|1.3|2.4|yza|2019-09-09 09:09:09.999999999
+109|1|5|1.4|2.7|bcd|2020-10-10 10:10:10.101010101
+110|2|1|1.0|3.0|efg|2021-11-11 11:11:11.111111111
+111|3|2|1.1|3.3|hij|2022-12-12 12:12:12.121212121
+112|1|3|1.2|3.6|klm|2023-01-02 13:13:13.131313131
+113|2|4|1.3|3.9|nop|2024-02-02 14:14:14.141414141
+114|3|5|1.4|4.2|qrs|2025-03-03 15:15:15.151515151
+115|1|1|1.0|4.5|tuv|2026-04-04 16:16:16.161616161
+116|2|2|1.1|4.8|wxy|2027-05-05 17:17:17.171717171
+117|3|3|1.2|5.1|zab|2028-06-06 18:18:18.181818181
+118|1|4|1.3|5.4|cde|2029-07-07 19:19:19.191919191
+119|2|5|1.4|5.7|fgh|2030-08-08 20:20:20.202020202
+120|3|1|1.0|6.0|ijk|2031-09-09 21:21:21.212121212
Modified: hive/trunk/pom.xml
URL: http://svn.apache.org/viewvc/hive/trunk/pom.xml?rev=1604077&r1=1604076&r2=1604077&view=diff
==============================================================================
--- hive/trunk/pom.xml (original)
+++ hive/trunk/pom.xml Fri Jun 20 02:55:24 2014
@@ -123,6 +123,7 @@
<jersey.version>1.14</jersey.version>
<jline.version>0.9.94</jline.version>
<jms.version>1.1</jms.version>
+ <jodd.version>3.5.2</jodd.version>
<json.version>20090211</json.version>
<junit.version>4.10</junit.version>
<kryo.version>2.22</kryo.version>
Modified: hive/trunk/ql/pom.xml
URL: http://svn.apache.org/viewvc/hive/trunk/ql/pom.xml?rev=1604077&r1=1604076&r2=1604077&view=diff
==============================================================================
--- hive/trunk/ql/pom.xml (original)
+++ hive/trunk/ql/pom.xml Fri Jun 20 02:55:24 2014
@@ -167,6 +167,11 @@
<version>${jackson.version}</version>
</dependency>
<dependency>
+ <groupId>org.jodd</groupId>
+ <artifactId>jodd-core</artifactId>
+ <version>${jodd.version}</version>
+ </dependency>
+ <dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-mapper-asl</artifactId>
<version>${jackson.version}</version>
@@ -512,6 +517,7 @@
<include>com.twitter:parquet-hadoop-bundle</include>
<include>org.apache.thrift:libthrift</include>
<include>commons-lang:commons-lang</include>
+ <include>org.jodd:jodd-core</include>
<include>org.json:json</include>
<include>org.apache.avro:avro</include>
<include>org.apache.avro:avro-mapred</include>
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java?rev=1604077&r1=1604076&r2=1604077&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java Fri Jun 20 02:55:24 2014
@@ -14,20 +14,23 @@
package org.apache.hadoop.hive.ql.io.parquet.convert;
import java.math.BigDecimal;
-
+import java.sql.Timestamp;
import java.util.ArrayList;
+import org.apache.hadoop.hive.ql.io.parquet.utils.NanoTimeUtils;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
-
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
+
import parquet.column.Dictionary;
+import parquet.example.data.simple.NanoTime;
import parquet.io.api.Binary;
import parquet.io.api.Converter;
import parquet.io.api.PrimitiveConverter;
@@ -43,6 +46,7 @@ public enum ETypeConverter {
EDOUBLE_CONVERTER(Double.TYPE) {
@Override
+
Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) {
return new PrimitiveConverter() {
@Override
@@ -128,6 +132,19 @@ public enum ETypeConverter {
}
};
}
+ },
+ ETIMESTAMP_CONVERTER(TimestampWritable.class) {
+ @Override
+ Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) {
+ return new BinaryConverter<TimestampWritable>(type, parent, index) {
+ @Override
+ protected TimestampWritable convert(Binary binary) {
+ NanoTime nt = NanoTime.fromBinary(binary);
+ Timestamp ts = NanoTimeUtils.getTimestamp(nt);
+ return new TimestampWritable(ts);
+ }
+ };
+ }
};
final Class<?> _type;
@@ -143,6 +160,10 @@ public enum ETypeConverter {
abstract Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent);
public static Converter getNewConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) {
+ if (type.isPrimitive() && (type.asPrimitiveType().getPrimitiveTypeName().equals(PrimitiveType.PrimitiveTypeName.INT96))) {
+ //TODO- cleanup once parquet support Timestamp type annotation.
+ return ETypeConverter.ETIMESTAMP_CONVERTER.getConverter(type, index, parent);
+ }
if (OriginalType.DECIMAL == type.getOriginalType()) {
return EDECIMAL_CONVERTER.getConverter(type, index, parent);
} else if (OriginalType.UTF8 == type.getOriginalType()) {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java?rev=1604077&r1=1604076&r2=1604077&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java Fri Jun 20 02:55:24 2014
@@ -78,7 +78,7 @@ public class HiveSchemaConverter {
// TODO : binaryTypeInfo is a byte array. Need to map it
throw new UnsupportedOperationException("Binary type not implemented");
} else if (typeInfo.equals(TypeInfoFactory.timestampTypeInfo)) {
- throw new UnsupportedOperationException("Timestamp type not implemented");
+ return new PrimitiveType(repetition, PrimitiveTypeName.INT96, name);
} else if (typeInfo.equals(TypeInfoFactory.voidTypeInfo)) {
throw new UnsupportedOperationException("Void type not implemented");
} else if (typeInfo instanceof DecimalTypeInfo) {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java?rev=1604077&r1=1604076&r2=1604077&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java Fri Jun 20 02:55:24 2014
@@ -21,6 +21,7 @@ import java.util.List;
import org.apache.hadoop.hive.ql.io.parquet.serde.primitive.ParquetPrimitiveInspectorFactory;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
@@ -98,7 +99,7 @@ public class ArrayWritableObjectInspecto
} else if (typeInfo.equals(TypeInfoFactory.shortTypeInfo)) {
return ParquetPrimitiveInspectorFactory.parquetShortInspector;
} else if (typeInfo.equals(TypeInfoFactory.timestampTypeInfo)) {
- throw new UnsupportedOperationException("Parquet does not support timestamp. See HIVE-6384");
+ return PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
} else if (typeInfo.equals(TypeInfoFactory.dateTypeInfo)) {
throw new UnsupportedOperationException("Parquet does not support date. See HIVE-6384");
} else if (typeInfo.getTypeName().toLowerCase().startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java?rev=1604077&r1=1604076&r2=1604077&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java Fri Jun 20 02:55:24 2014
@@ -30,6 +30,7 @@ import org.apache.hadoop.hive.serde2.Ser
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -46,6 +47,7 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
@@ -276,6 +278,8 @@ public class ParquetHiveSerDe extends Ab
}
System.arraycopy(src, 0, tgt, bytes - src.length, src.length); // Padding leading zeroes/ones.
return new BytesWritable(tgt);
+ case TIMESTAMP:
+ return new TimestampWritable(((TimestampObjectInspector) inspector).getPrimitiveJavaObject(obj));
default:
throw new SerDeException("Unknown primitive : " + inspector.getPrimitiveCategory());
}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/utils/NanoTimeUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/utils/NanoTimeUtils.java?rev=1604077&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/utils/NanoTimeUtils.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/utils/NanoTimeUtils.java Fri Jun 20 02:55:24 2014
@@ -0,0 +1,87 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.parquet.utils;
+
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.Calendar;
+import java.util.TimeZone;
+
+import jodd.datetime.JDateTime;
+import jodd.datetime.TimeUtil;
+import parquet.example.data.simple.NanoTime;
+
+/**
+ * Utilities for converting from java.sql.Timestamp to parquet timestamp.
+ * This utilizes the Jodd library.
+ */
+public class NanoTimeUtils {
+ static final long NANOS_PER_SECOND = 1000000000;
+ static final long SECONDS_PER_MINUTE = 60;
+ static final long MINUTES_PER_HOUR = 60;
+
+ private static final ThreadLocal<Calendar> parquetTsCalendar = new ThreadLocal<Calendar>();
+
+ private static Calendar getCalendar() {
+ //Calendar.getInstance calculates the current-time needlessly, so cache an instance.
+ if (parquetTsCalendar.get() == null) {
+ parquetTsCalendar.set(Calendar.getInstance(TimeZone.getTimeZone("GMT")));
+ }
+ return parquetTsCalendar.get();
+ }
+
+ public static NanoTime getNanoTime(Timestamp ts) {
+
+ Calendar calendar = getCalendar();
+ calendar.setTime(ts);
+ JDateTime jDateTime = new JDateTime(calendar.get(Calendar.YEAR),
+ calendar.get(Calendar.MONTH) + 1, //java calendar index starting at 1.
+ calendar.get(Calendar.DAY_OF_MONTH));
+ int days = jDateTime.getJulianDayNumber();
+
+ long hour = calendar.get(Calendar.HOUR_OF_DAY);
+ long minute = calendar.get(Calendar.MINUTE);
+ long second = calendar.get(Calendar.SECOND);
+ long nanos = ts.getNanos();
+ long nanosOfDay = nanos + NANOS_PER_SECOND * second + NANOS_PER_SECOND * SECONDS_PER_MINUTE * minute +
+ NANOS_PER_SECOND * SECONDS_PER_MINUTE * MINUTES_PER_HOUR * hour;
+ return new NanoTime(days, nanosOfDay);
+ }
+
+ public static Timestamp getTimestamp(NanoTime nt) {
+ int julianDay = nt.getJulianDay();
+ long nanosOfDay = nt.getTimeOfDayNanos();
+
+ JDateTime jDateTime = new JDateTime((double) julianDay);
+ Calendar calendar = getCalendar();
+ calendar.set(Calendar.YEAR, jDateTime.getYear());
+ calendar.set(Calendar.MONTH, jDateTime.getMonth() - 1); //java calender index starting at 1.
+ calendar.set(Calendar.DAY_OF_MONTH, jDateTime.getDay());
+
+ long remainder = nanosOfDay;
+ int hour = (int) (remainder / (NANOS_PER_SECOND * SECONDS_PER_MINUTE * MINUTES_PER_HOUR));
+ remainder = remainder % (NANOS_PER_SECOND * SECONDS_PER_MINUTE * MINUTES_PER_HOUR);
+ int minutes = (int) (remainder / (NANOS_PER_SECOND * SECONDS_PER_MINUTE));
+ remainder = remainder % (NANOS_PER_SECOND * SECONDS_PER_MINUTE);
+ int seconds = (int) (remainder / (NANOS_PER_SECOND));
+ long nanos = remainder % NANOS_PER_SECOND;
+
+ calendar.set(Calendar.HOUR_OF_DAY, hour);
+ calendar.set(Calendar.MINUTE, minutes);
+ calendar.set(Calendar.SECOND, seconds);
+ Timestamp ts = new Timestamp(calendar.getTimeInMillis());
+ ts.setNanos((int) nanos);
+ return ts;
+ }
+}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java?rev=1604077&r1=1604076&r2=1604077&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java Fri Jun 20 02:55:24 2014
@@ -13,10 +13,14 @@
*/
package org.apache.hadoop.hive.ql.io.parquet.write;
+import java.sql.Timestamp;
+
+import org.apache.hadoop.hive.ql.io.parquet.utils.NanoTimeUtils;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.BytesWritable;
@@ -25,6 +29,7 @@ import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Writable;
+import parquet.example.data.simple.NanoTime;
import parquet.io.ParquetEncodingException;
import parquet.io.api.Binary;
import parquet.io.api.RecordConsumer;
@@ -149,6 +154,10 @@ public class DataWritableWriter {
throw new UnsupportedOperationException("HiveDecimalWritable writing not implemented");
} else if (value instanceof BytesWritable) {
recordConsumer.addBinary((Binary.fromByteArray(((BytesWritable) value).getBytes())));
+ } else if (value instanceof TimestampWritable) {
+ Timestamp ts = ((TimestampWritable) value).getTimestamp();
+ NanoTime nt = NanoTimeUtils.getNanoTime(ts);
+ nt.writeValue(recordConsumer);
} else {
throw new IllegalArgumentException("Unknown value type: " + value + " " + value.getClass());
}
Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java?rev=1604077&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java Fri Jun 20 02:55:24 2014
@@ -0,0 +1,201 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.parquet.serde;
+
+import java.sql.Timestamp;
+import java.util.Calendar;
+import java.util.Date;
+import java.util.TimeZone;
+
+import junit.framework.Assert;
+import junit.framework.TestCase;
+
+import org.apache.hadoop.hive.ql.io.parquet.utils.NanoTimeUtils;
+
+import parquet.example.data.simple.NanoTime;
+
+/**
+ * Tests util-libraries used for parquet-timestamp.
+ */
+public class TestParquetTimestampUtils extends TestCase {
+
+ public void testJulianDay() {
+ //check if May 23, 1968 is Julian Day 2440000
+ Calendar cal = Calendar.getInstance();
+ cal.set(Calendar.YEAR, 1968);
+ cal.set(Calendar.MONTH, Calendar.MAY);
+ cal.set(Calendar.DAY_OF_MONTH, 23);
+ cal.set(Calendar.HOUR_OF_DAY, 0);
+ cal.setTimeZone(TimeZone.getTimeZone("GMT"));
+
+ Timestamp ts = new Timestamp(cal.getTimeInMillis());
+ NanoTime nt = NanoTimeUtils.getNanoTime(ts);
+ Assert.assertEquals(nt.getJulianDay(), 2440000);
+
+ Timestamp tsFetched = NanoTimeUtils.getTimestamp(nt);
+ Assert.assertEquals(tsFetched, ts);
+
+ //check if 30 Julian Days between Jan 1, 2005 and Jan 31, 2005.
+ Calendar cal1 = Calendar.getInstance();
+ cal1.set(Calendar.YEAR, 2005);
+ cal1.set(Calendar.MONTH, Calendar.JANUARY);
+ cal1.set(Calendar.DAY_OF_MONTH, 1);
+ cal1.set(Calendar.HOUR_OF_DAY, 0);
+ cal1.setTimeZone(TimeZone.getTimeZone("GMT"));
+
+ Timestamp ts1 = new Timestamp(cal1.getTimeInMillis());
+ NanoTime nt1 = NanoTimeUtils.getNanoTime(ts1);
+
+ Timestamp ts1Fetched = NanoTimeUtils.getTimestamp(nt1);
+ Assert.assertEquals(ts1Fetched, ts1);
+
+ Calendar cal2 = Calendar.getInstance();
+ cal2.set(Calendar.YEAR, 2005);
+ cal2.set(Calendar.MONTH, Calendar.JANUARY);
+ cal2.set(Calendar.DAY_OF_MONTH, 31);
+ cal2.set(Calendar.HOUR_OF_DAY, 0);
+ cal2.setTimeZone(TimeZone.getTimeZone("UTC"));
+
+ Timestamp ts2 = new Timestamp(cal2.getTimeInMillis());
+ NanoTime nt2 = NanoTimeUtils.getNanoTime(ts2);
+
+ Timestamp ts2Fetched = NanoTimeUtils.getTimestamp(nt2);
+ Assert.assertEquals(ts2Fetched, ts2);
+ Assert.assertEquals(nt2.getJulianDay() - nt1.getJulianDay(), 30);
+ }
+
+ public void testNanos() {
+ //case 1: 01:01:01.0000000001
+ Calendar cal = Calendar.getInstance();
+ cal.set(Calendar.YEAR, 1968);
+ cal.set(Calendar.MONTH, Calendar.MAY);
+ cal.set(Calendar.DAY_OF_MONTH, 23);
+ cal.set(Calendar.HOUR_OF_DAY, 1);
+ cal.set(Calendar.MINUTE, 1);
+ cal.set(Calendar.SECOND, 1);
+ cal.setTimeZone(TimeZone.getTimeZone("GMT"));
+ Timestamp ts = new Timestamp(cal.getTimeInMillis());
+ ts.setNanos(1);
+
+ //(1*60*60 + 1*60 + 1) * 10e9 + 1
+ NanoTime nt = NanoTimeUtils.getNanoTime(ts);
+ Assert.assertEquals(nt.getTimeOfDayNanos(), 3661000000001L);
+
+ //case 2: 23:59:59.999999999
+ cal = Calendar.getInstance();
+ cal.set(Calendar.YEAR, 1968);
+ cal.set(Calendar.MONTH, Calendar.MAY);
+ cal.set(Calendar.DAY_OF_MONTH, 23);
+ cal.set(Calendar.HOUR_OF_DAY, 23);
+ cal.set(Calendar.MINUTE, 59);
+ cal.set(Calendar.SECOND, 59);
+ cal.setTimeZone(TimeZone.getTimeZone("GMT"));
+ ts = new Timestamp(cal.getTimeInMillis());
+ ts.setNanos(999999999);
+
+ //(23*60*60 + 59*60 + 59)*10e9 + 999999999
+ nt = NanoTimeUtils.getNanoTime(ts);
+ Assert.assertEquals(nt.getTimeOfDayNanos(), 86399999999999L);
+
+ //case 3: verify the difference.
+ Calendar cal2 = Calendar.getInstance();
+ cal2.set(Calendar.YEAR, 1968);
+ cal2.set(Calendar.MONTH, Calendar.MAY);
+ cal2.set(Calendar.DAY_OF_MONTH, 23);
+ cal2.set(Calendar.HOUR_OF_DAY, 0);
+ cal2.set(Calendar.MINUTE, 10);
+ cal2.set(Calendar.SECOND, 0);
+ cal2.setTimeZone(TimeZone.getTimeZone("GMT"));
+ Timestamp ts2 = new Timestamp(cal2.getTimeInMillis());
+ ts2.setNanos(10);
+
+ Calendar cal1 = Calendar.getInstance();
+ cal1.set(Calendar.YEAR, 1968);
+ cal1.set(Calendar.MONTH, Calendar.MAY);
+ cal1.set(Calendar.DAY_OF_MONTH, 23);
+ cal1.set(Calendar.HOUR_OF_DAY, 0);
+ cal1.set(Calendar.MINUTE, 0);
+ cal1.set(Calendar.SECOND, 0);
+ cal1.setTimeZone(TimeZone.getTimeZone("GMT"));
+ Timestamp ts1 = new Timestamp(cal1.getTimeInMillis());
+ ts1.setNanos(1);
+
+ NanoTime n2 = NanoTimeUtils.getNanoTime(ts2);
+ NanoTime n1 = NanoTimeUtils.getNanoTime(ts1);
+
+ Assert.assertEquals(n2.getTimeOfDayNanos() - n1.getTimeOfDayNanos(), 600000000009L);
+ }
+
+ public void testTimezone() {
+ Calendar cal = Calendar.getInstance();
+ cal.set(Calendar.YEAR, 1968);
+ cal.set(Calendar.MONTH, Calendar.MAY);
+ cal.set(Calendar.DAY_OF_MONTH, 23);
+ if ((TimeZone.getTimeZone("US/Pacific").inDaylightTime(new Date()))) {
+ cal.set(Calendar.HOUR_OF_DAY, 18);
+ } else {
+ cal.set(Calendar.HOUR_OF_DAY, 17);
+ }
+ cal.set(Calendar.MINUTE, 1);
+ cal.set(Calendar.SECOND, 1);
+ cal.setTimeZone(TimeZone.getTimeZone("US/Pacific"));
+ Timestamp ts = new Timestamp(cal.getTimeInMillis());
+ ts.setNanos(1);
+
+ //18:00 PST = 01:00 GMT (if daylight-savings)
+ //17:00 PST = 01:00 GMT (if not daylight savings)
+ //(1*60*60 + 1*60 + 1)*10e9 + 1
+ NanoTime nt = NanoTimeUtils.getNanoTime(ts);
+ Assert.assertEquals(nt.getTimeOfDayNanos(), 3661000000001L);
+
+ //in both cases, this will be the next day in GMT
+ Assert.assertEquals(nt.getJulianDay(), 2440001);
+ }
+
+ public void testValues() {
+ //exercise a broad range of timestamps close to the present.
+ verifyTsString("2011-01-01 01:01:01.111111111");
+ verifyTsString("2012-02-02 02:02:02.222222222");
+ verifyTsString("2013-03-03 03:03:03.333333333");
+ verifyTsString("2014-04-04 04:04:04.444444444");
+ verifyTsString("2015-05-05 05:05:05.555555555");
+ verifyTsString("2016-06-06 06:06:06.666666666");
+ verifyTsString("2017-07-07 07:07:07.777777777");
+ verifyTsString("2018-08-08 08:08:08.888888888");
+ verifyTsString("2019-09-09 09:09:09.999999999");
+ verifyTsString("2020-10-10 10:10:10.101010101");
+ verifyTsString("2021-11-11 11:11:11.111111111");
+ verifyTsString("2022-12-12 12:12:12.121212121");
+ verifyTsString("2023-01-02 13:13:13.131313131");
+ verifyTsString("2024-02-02 14:14:14.141414141");
+ verifyTsString("2025-03-03 15:15:15.151515151");
+ verifyTsString("2026-04-04 16:16:16.161616161");
+ verifyTsString("2027-05-05 17:17:17.171717171");
+ verifyTsString("2028-06-06 18:18:18.181818181");
+ verifyTsString("2029-07-07 19:19:19.191919191");
+ verifyTsString("2030-08-08 20:20:20.202020202");
+ verifyTsString("2031-09-09 21:21:21.212121212");
+
+ //test some extreme cases.
+ verifyTsString("9999-09-09 09:09:09.999999999");
+ verifyTsString("0001-01-01 00:00:00.0");
+ }
+
+ private void verifyTsString(String tsString) {
+ Timestamp ts = Timestamp.valueOf(tsString);
+ NanoTime nt = NanoTimeUtils.getNanoTime(ts);
+ Timestamp tsFetched = NanoTimeUtils.getTimestamp(nt);
+ Assert.assertEquals(tsString, tsFetched.toString());
+ }
+}
Modified: hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q?rev=1604077&r1=1604076&r2=1604077&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q Fri Jun 20 02:55:24 2014
@@ -7,7 +7,8 @@ CREATE TABLE parquet_types_staging (
csmallint smallint,
cfloat float,
cdouble double,
- cstring1 string
+ cstring1 string,
+ t timestamp
) ROW FORMAT DELIMITED
FIELDS TERMINATED BY '|';
@@ -17,7 +18,8 @@ CREATE TABLE parquet_types (
csmallint smallint,
cfloat float,
cdouble double,
- cstring1 string
+ cstring1 string,
+ t timestamp
) STORED AS PARQUET;
LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging;
Modified: hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out?rev=1604077&r1=1604076&r2=1604077&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out Fri Jun 20 02:55:24 2014
@@ -12,7 +12,8 @@ PREHOOK: query: CREATE TABLE parquet_typ
csmallint smallint,
cfloat float,
cdouble double,
- cstring1 string
+ cstring1 string,
+ t timestamp
) ROW FORMAT DELIMITED
FIELDS TERMINATED BY '|'
PREHOOK: type: CREATETABLE
@@ -23,7 +24,8 @@ POSTHOOK: query: CREATE TABLE parquet_ty
csmallint smallint,
cfloat float,
cdouble double,
- cstring1 string
+ cstring1 string,
+ t timestamp
) ROW FORMAT DELIMITED
FIELDS TERMINATED BY '|'
POSTHOOK: type: CREATETABLE
@@ -35,7 +37,8 @@ PREHOOK: query: CREATE TABLE parquet_typ
csmallint smallint,
cfloat float,
cdouble double,
- cstring1 string
+ cstring1 string,
+ t timestamp
) STORED AS PARQUET
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
@@ -45,7 +48,8 @@ POSTHOOK: query: CREATE TABLE parquet_ty
csmallint smallint,
cfloat float,
cdouble double,
- cstring1 string
+ cstring1 string,
+ t timestamp
) STORED AS PARQUET
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
@@ -72,6 +76,7 @@ POSTHOOK: Lineage: parquet_types.cint SI
POSTHOOK: Lineage: parquet_types.csmallint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:csmallint, type:smallint, comment:null), ]
POSTHOOK: Lineage: parquet_types.cstring1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cstring1, type:string, comment:null), ]
POSTHOOK: Lineage: parquet_types.ctinyint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:ctinyint, type:tinyint, comment:null), ]
+POSTHOOK: Lineage: parquet_types.t SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:t, type:timestamp, comment:null), ]
PREHOOK: query: SELECT * FROM parquet_types
PREHOOK: type: QUERY
PREHOOK: Input: default@parquet_types
@@ -80,27 +85,27 @@ POSTHOOK: query: SELECT * FROM parquet_t
POSTHOOK: type: QUERY
POSTHOOK: Input: default@parquet_types
#### A masked pattern was here ####
-100 1 1 1.0 0.0 abc
-101 2 2 1.1 0.3 def
-102 3 3 1.2 0.6 ghi
-103 1 4 1.3 0.9 jkl
-104 2 5 1.4 1.2 mno
-105 3 1 1.0 1.5 pqr
-106 1 2 1.1 1.8 stu
-107 2 3 1.2 2.1 vwx
-108 3 4 1.3 2.4 yza
-109 1 5 1.4 2.7 bcd
-110 2 1 1.0 3.0 efg
-111 3 2 1.1 3.3 hij
-112 1 3 1.2 3.6 klm
-113 2 4 1.3 3.9 nop
-114 3 5 1.4 4.2 qrs
-115 1 1 1.0 4.5 tuv
-116 2 2 1.1 4.8 wxy
-117 3 3 1.2 5.1 zab
-118 1 4 1.3 5.4 cde
-119 2 5 1.4 5.7 fgh
-120 3 1 1.0 6.0 ijk
+100 1 1 1.0 0.0 abc 2011-01-01 01:01:01.111111111
+101 2 2 1.1 0.3 def 2012-02-02 02:02:02.222222222
+102 3 3 1.2 0.6 ghi 2013-03-03 03:03:03.333333333
+103 1 4 1.3 0.9 jkl 2014-04-04 04:04:04.444444444
+104 2 5 1.4 1.2 mno 2015-05-05 05:05:05.555555555
+105 3 1 1.0 1.5 pqr 2016-06-06 06:06:06.666666666
+106 1 2 1.1 1.8 stu 2017-07-07 07:07:07.777777777
+107 2 3 1.2 2.1 vwx 2018-08-08 08:08:08.888888888
+108 3 4 1.3 2.4 yza 2019-09-09 09:09:09.999999999
+109 1 5 1.4 2.7 bcd 2020-10-10 10:10:10.101010101
+110 2 1 1.0 3.0 efg 2021-11-11 11:11:11.111111111
+111 3 2 1.1 3.3 hij 2022-12-12 12:12:12.121212121
+112 1 3 1.2 3.6 klm 2023-01-02 13:13:13.131313131
+113 2 4 1.3 3.9 nop 2024-02-02 14:14:14.141414141
+114 3 5 1.4 4.2 qrs 2025-03-03 15:15:15.151515151
+115 1 1 1.0 4.5 tuv 2026-04-04 16:16:16.161616161
+116 2 2 1.1 4.8 wxy 2027-05-05 17:17:17.171717171
+117 3 3 1.2 5.1 zab 2028-06-06 18:18:18.181818181
+118 1 4 1.3 5.4 cde 2029-07-07 19:19:19.191919191
+119 2 5 1.4 5.7 fgh 2030-08-08 20:20:20.202020202
+120 3 1 1.0 6.0 ijk 2031-09-09 21:21:21.212121212
PREHOOK: query: SELECT ctinyint,
MAX(cint),
MIN(csmallint),