You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2014/06/22 18:55:58 UTC

svn commit: r1604612 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/io/parquet/convert/ java/org/apache/hadoop/hive/ql/io/parquet/timestamp/ java/org/apache/hadoop/hive/ql/io/parquet/utils/ java/org/apache/hadoop/hive/ql/io/parquet/write/ test...

Author: brock
Date: Sun Jun 22 16:55:58 2014
New Revision: 1604612

URL: http://svn.apache.org/r1604612
Log:
HIVE-7263 - Missing fixes from review of parquet-timestamp (Szehon via Brock)

Added:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTime.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java
Removed:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/utils/
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java?rev=1604612&r1=1604611&r2=1604612&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java Sun Jun 22 16:55:58 2014
@@ -17,7 +17,8 @@ import java.math.BigDecimal;
 import java.sql.Timestamp;
 import java.util.ArrayList;
 
-import org.apache.hadoop.hive.ql.io.parquet.utils.NanoTimeUtils;
+import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime;
+import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
 import org.apache.hadoop.hive.serde2.io.TimestampWritable;
@@ -30,7 +31,6 @@ import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 
 import parquet.column.Dictionary;
-import parquet.example.data.simple.NanoTime;
 import parquet.io.api.Binary;
 import parquet.io.api.Converter;
 import parquet.io.api.PrimitiveConverter;

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTime.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTime.java?rev=1604612&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTime.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTime.java Sun Jun 22 16:55:58 2014
@@ -0,0 +1,63 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.parquet.timestamp;
+
+import java.nio.ByteBuffer;
+
+import parquet.Preconditions;
+import parquet.io.api.Binary;
+import parquet.io.api.RecordConsumer;
+/**
+ * Provides a wrapper representing a parquet-timestamp, with methods to
+ * convert to and from binary.
+ */
+public class NanoTime {
+  private final int julianDay;
+  private final long timeOfDayNanos;
+  public static NanoTime fromBinary(Binary bytes) {
+    Preconditions.checkArgument(bytes.length() == 12, "Must be 12 bytes");
+    ByteBuffer buf = bytes.toByteBuffer();
+    return new NanoTime(buf.getInt(), buf.getLong());
+  }
+
+  public NanoTime(int julianDay, long timeOfDayNanos) {
+    this.julianDay = julianDay;
+    this.timeOfDayNanos = timeOfDayNanos;
+  }
+
+  public int getJulianDay() {
+    return julianDay;
+  }
+
+  public long getTimeOfDayNanos() {
+    return timeOfDayNanos;
+  }
+
+  public Binary toBinary() {
+    ByteBuffer buf = ByteBuffer.allocate(12);
+    buf.putInt(julianDay);
+    buf.putLong(timeOfDayNanos);
+    buf.flip();
+    return Binary.fromByteBuffer(buf);
+  }
+
+  public void writeValue(RecordConsumer recordConsumer) {
+    recordConsumer.addBinary(toBinary());
+  }
+
+  @Override
+  public String toString() {
+    return "NanoTime{julianDay="+julianDay+", timeOfDayNanos="+timeOfDayNanos+"}";
+  }
+}
\ No newline at end of file

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java?rev=1604612&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java Sun Jun 22 16:55:58 2014
@@ -0,0 +1,84 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.parquet.timestamp;
+
+import java.sql.Timestamp;
+import java.util.Calendar;
+import java.util.TimeZone;
+
+import jodd.datetime.JDateTime;
+
+/**
+ * Utilities for converting from java.sql.Timestamp to parquet timestamp.
+ * This utilizes the Jodd library.
+ */
+public class NanoTimeUtils {
+   static final long NANOS_PER_SECOND = 1000000000;
+   static final long SECONDS_PER_MINUTE = 60;
+   static final long MINUTES_PER_HOUR = 60;
+
+   private static final ThreadLocal<Calendar> parquetTsCalendar = new ThreadLocal<Calendar>();
+
+   private static Calendar getCalendar() {
+     //Calendar.getInstance calculates the current-time needlessly, so cache an instance.
+     if (parquetTsCalendar.get() == null) {
+       parquetTsCalendar.set(Calendar.getInstance(TimeZone.getTimeZone("GMT")));
+     }
+     return parquetTsCalendar.get();
+   }
+
+   public static NanoTime getNanoTime(Timestamp ts) {
+
+     Calendar calendar = getCalendar();
+     calendar.setTime(ts);
+     JDateTime jDateTime = new JDateTime(calendar.get(Calendar.YEAR),
+       calendar.get(Calendar.MONTH) + 1,  //java calendar index starting at 1.
+       calendar.get(Calendar.DAY_OF_MONTH));
+     int days = jDateTime.getJulianDayNumber();
+
+     long hour = calendar.get(Calendar.HOUR_OF_DAY);
+     long minute = calendar.get(Calendar.MINUTE);
+     long second = calendar.get(Calendar.SECOND);
+     long nanos = ts.getNanos();
+     long nanosOfDay = nanos + NANOS_PER_SECOND * second + NANOS_PER_SECOND * SECONDS_PER_MINUTE * minute +
+         NANOS_PER_SECOND * SECONDS_PER_MINUTE * MINUTES_PER_HOUR * hour;
+     return new NanoTime(days, nanosOfDay);
+   }
+
+   public static Timestamp getTimestamp(NanoTime nt) {
+     int julianDay = nt.getJulianDay();
+     long nanosOfDay = nt.getTimeOfDayNanos();
+
+     JDateTime jDateTime = new JDateTime((double) julianDay);
+     Calendar calendar = getCalendar();
+     calendar.set(Calendar.YEAR, jDateTime.getYear());
+     calendar.set(Calendar.MONTH, jDateTime.getMonth() - 1); //java calender index starting at 1.
+     calendar.set(Calendar.DAY_OF_MONTH, jDateTime.getDay());
+
+     long remainder = nanosOfDay;
+     int hour = (int) (remainder / (NANOS_PER_SECOND * SECONDS_PER_MINUTE * MINUTES_PER_HOUR));
+     remainder = remainder % (NANOS_PER_SECOND * SECONDS_PER_MINUTE * MINUTES_PER_HOUR);
+     int minutes = (int) (remainder / (NANOS_PER_SECOND * SECONDS_PER_MINUTE));
+     remainder = remainder % (NANOS_PER_SECOND * SECONDS_PER_MINUTE);
+     int seconds = (int) (remainder / (NANOS_PER_SECOND));
+     long nanos = remainder % NANOS_PER_SECOND;
+
+     calendar.set(Calendar.HOUR_OF_DAY, hour);
+     calendar.set(Calendar.MINUTE, minutes);
+     calendar.set(Calendar.SECOND, seconds);
+     Timestamp ts = new Timestamp(calendar.getTimeInMillis());
+     ts.setNanos((int) nanos);
+     return ts;
+   }
+}

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java?rev=1604612&r1=1604611&r2=1604612&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java Sun Jun 22 16:55:58 2014
@@ -15,7 +15,8 @@ package org.apache.hadoop.hive.ql.io.par
 
 import java.sql.Timestamp;
 
-import org.apache.hadoop.hive.ql.io.parquet.utils.NanoTimeUtils;
+import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime;
+import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils;
 import org.apache.hadoop.hive.serde2.io.ByteWritable;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
@@ -29,7 +30,6 @@ import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Writable;
 
-import parquet.example.data.simple.NanoTime;
 import parquet.io.ParquetEncodingException;
 import parquet.io.api.Binary;
 import parquet.io.api.RecordConsumer;

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java?rev=1604612&r1=1604611&r2=1604612&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java Sun Jun 22 16:55:58 2014
@@ -21,9 +21,10 @@ import java.util.TimeZone;
 import junit.framework.Assert;
 import junit.framework.TestCase;
 
-import org.apache.hadoop.hive.ql.io.parquet.utils.NanoTimeUtils;
+import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime;
+import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils;
+
 
-import parquet.example.data.simple.NanoTime;
 
 /**
  * Tests util-libraries used for parquet-timestamp.