You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2014/06/22 18:55:58 UTC
svn commit: r1604612 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/io/parquet/convert/
java/org/apache/hadoop/hive/ql/io/parquet/timestamp/
java/org/apache/hadoop/hive/ql/io/parquet/utils/
java/org/apache/hadoop/hive/ql/io/parquet/write/ test...
Author: brock
Date: Sun Jun 22 16:55:58 2014
New Revision: 1604612
URL: http://svn.apache.org/r1604612
Log:
HIVE-7263 - Missing fixes from review of parquet-timestamp (Szehon via Brock)
Added:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTime.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java
Removed:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/utils/
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java?rev=1604612&r1=1604611&r2=1604612&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java Sun Jun 22 16:55:58 2014
@@ -17,7 +17,8 @@ import java.math.BigDecimal;
import java.sql.Timestamp;
import java.util.ArrayList;
-import org.apache.hadoop.hive.ql.io.parquet.utils.NanoTimeUtils;
+import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime;
+import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
@@ -30,7 +31,6 @@ import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import parquet.column.Dictionary;
-import parquet.example.data.simple.NanoTime;
import parquet.io.api.Binary;
import parquet.io.api.Converter;
import parquet.io.api.PrimitiveConverter;
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTime.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTime.java?rev=1604612&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTime.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTime.java Sun Jun 22 16:55:58 2014
@@ -0,0 +1,63 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.parquet.timestamp;
+
+import java.nio.ByteBuffer;
+
+import parquet.Preconditions;
+import parquet.io.api.Binary;
+import parquet.io.api.RecordConsumer;
+/**
+ * Provides a wrapper representing a parquet-timestamp, with methods to
+ * convert to and from binary.
+ */
+public class NanoTime {
+ private final int julianDay;
+ private final long timeOfDayNanos;
+ public static NanoTime fromBinary(Binary bytes) {
+ Preconditions.checkArgument(bytes.length() == 12, "Must be 12 bytes");
+ ByteBuffer buf = bytes.toByteBuffer();
+ return new NanoTime(buf.getInt(), buf.getLong());
+ }
+
+ public NanoTime(int julianDay, long timeOfDayNanos) {
+ this.julianDay = julianDay;
+ this.timeOfDayNanos = timeOfDayNanos;
+ }
+
+ public int getJulianDay() {
+ return julianDay;
+ }
+
+ public long getTimeOfDayNanos() {
+ return timeOfDayNanos;
+ }
+
+ public Binary toBinary() {
+ ByteBuffer buf = ByteBuffer.allocate(12);
+ buf.putInt(julianDay);
+ buf.putLong(timeOfDayNanos);
+ buf.flip();
+ return Binary.fromByteBuffer(buf);
+ }
+
+ public void writeValue(RecordConsumer recordConsumer) {
+ recordConsumer.addBinary(toBinary());
+ }
+
+ @Override
+ public String toString() {
+ return "NanoTime{julianDay="+julianDay+", timeOfDayNanos="+timeOfDayNanos+"}";
+ }
+}
\ No newline at end of file
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java?rev=1604612&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java Sun Jun 22 16:55:58 2014
@@ -0,0 +1,84 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.parquet.timestamp;
+
+import java.sql.Timestamp;
+import java.util.Calendar;
+import java.util.TimeZone;
+
+import jodd.datetime.JDateTime;
+
+/**
+ * Utilities for converting from java.sql.Timestamp to parquet timestamp.
+ * This utilizes the Jodd library.
+ */
+public class NanoTimeUtils {
+ static final long NANOS_PER_SECOND = 1000000000;
+ static final long SECONDS_PER_MINUTE = 60;
+ static final long MINUTES_PER_HOUR = 60;
+
+ private static final ThreadLocal<Calendar> parquetTsCalendar = new ThreadLocal<Calendar>();
+
+ private static Calendar getCalendar() {
+ //Calendar.getInstance calculates the current-time needlessly, so cache an instance.
+ if (parquetTsCalendar.get() == null) {
+ parquetTsCalendar.set(Calendar.getInstance(TimeZone.getTimeZone("GMT")));
+ }
+ return parquetTsCalendar.get();
+ }
+
+ public static NanoTime getNanoTime(Timestamp ts) {
+
+ Calendar calendar = getCalendar();
+ calendar.setTime(ts);
+ JDateTime jDateTime = new JDateTime(calendar.get(Calendar.YEAR),
+ calendar.get(Calendar.MONTH) + 1, //java calendar index starting at 1.
+ calendar.get(Calendar.DAY_OF_MONTH));
+ int days = jDateTime.getJulianDayNumber();
+
+ long hour = calendar.get(Calendar.HOUR_OF_DAY);
+ long minute = calendar.get(Calendar.MINUTE);
+ long second = calendar.get(Calendar.SECOND);
+ long nanos = ts.getNanos();
+ long nanosOfDay = nanos + NANOS_PER_SECOND * second + NANOS_PER_SECOND * SECONDS_PER_MINUTE * minute +
+ NANOS_PER_SECOND * SECONDS_PER_MINUTE * MINUTES_PER_HOUR * hour;
+ return new NanoTime(days, nanosOfDay);
+ }
+
+ public static Timestamp getTimestamp(NanoTime nt) {
+ int julianDay = nt.getJulianDay();
+ long nanosOfDay = nt.getTimeOfDayNanos();
+
+ JDateTime jDateTime = new JDateTime((double) julianDay);
+ Calendar calendar = getCalendar();
+ calendar.set(Calendar.YEAR, jDateTime.getYear());
+ calendar.set(Calendar.MONTH, jDateTime.getMonth() - 1); //java calender index starting at 1.
+ calendar.set(Calendar.DAY_OF_MONTH, jDateTime.getDay());
+
+ long remainder = nanosOfDay;
+ int hour = (int) (remainder / (NANOS_PER_SECOND * SECONDS_PER_MINUTE * MINUTES_PER_HOUR));
+ remainder = remainder % (NANOS_PER_SECOND * SECONDS_PER_MINUTE * MINUTES_PER_HOUR);
+ int minutes = (int) (remainder / (NANOS_PER_SECOND * SECONDS_PER_MINUTE));
+ remainder = remainder % (NANOS_PER_SECOND * SECONDS_PER_MINUTE);
+ int seconds = (int) (remainder / (NANOS_PER_SECOND));
+ long nanos = remainder % NANOS_PER_SECOND;
+
+ calendar.set(Calendar.HOUR_OF_DAY, hour);
+ calendar.set(Calendar.MINUTE, minutes);
+ calendar.set(Calendar.SECOND, seconds);
+ Timestamp ts = new Timestamp(calendar.getTimeInMillis());
+ ts.setNanos((int) nanos);
+ return ts;
+ }
+}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java?rev=1604612&r1=1604611&r2=1604612&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java Sun Jun 22 16:55:58 2014
@@ -15,7 +15,8 @@ package org.apache.hadoop.hive.ql.io.par
import java.sql.Timestamp;
-import org.apache.hadoop.hive.ql.io.parquet.utils.NanoTimeUtils;
+import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime;
+import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
@@ -29,7 +30,6 @@ import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Writable;
-import parquet.example.data.simple.NanoTime;
import parquet.io.ParquetEncodingException;
import parquet.io.api.Binary;
import parquet.io.api.RecordConsumer;
Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java?rev=1604612&r1=1604611&r2=1604612&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java Sun Jun 22 16:55:58 2014
@@ -21,9 +21,10 @@ import java.util.TimeZone;
import junit.framework.Assert;
import junit.framework.TestCase;
-import org.apache.hadoop.hive.ql.io.parquet.utils.NanoTimeUtils;
+import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime;
+import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils;
+
-import parquet.example.data.simple.NanoTime;
/**
* Tests util-libraries used for parquet-timestamp.