You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2017/07/24 20:36:32 UTC

orc git commit: ORC-212. Fix timestamp pattern and conversion in schema/convert tool

Repository: orc
Updated Branches:
  refs/heads/master 590245a0b -> b010f4dee


ORC-212. Fix timestamp pattern and conversion in schema/convert tool

Fixes #130

Signed-off-by: Owen O'Malley <om...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/b010f4de
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/b010f4de
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/b010f4de

Branch: refs/heads/master
Commit: b010f4dee805929441c1913edb5dd6bbe06d321b
Parents: 590245a
Author: Seshu Pasam <sp...@uptycs.com>
Authored: Wed Jun 7 14:35:47 2017 -0400
Committer: Owen O'Malley <om...@apache.org>
Committed: Mon Jul 24 13:32:25 2017 -0700

----------------------------------------------------------------------
 java/pom.xml                                    |  5 +++++
 java/tools/pom.xml                              |  4 ++++
 .../apache/orc/tools/convert/JsonReader.java    | 20 ++++++++++++++++++--
 .../apache/orc/tools/json/JsonSchemaFinder.java |  2 +-
 .../orc/tools/json/TestJsonSchemaFinder.java    | 10 ++++++++--
 5 files changed, 36 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/b010f4de/java/pom.xml
----------------------------------------------------------------------
diff --git a/java/pom.xml b/java/pom.xml
index c0c212a..698b5ba 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -631,6 +631,11 @@
         <artifactId>slf4j-simple</artifactId>
         <version>1.7.5</version>
       </dependency>
+      <dependency>
+        <groupId>org.threeten</groupId>
+        <artifactId>threetenbp</artifactId>
+        <version>1.3.5</version>
+      </dependency>
 
       <!-- test inter-project -->
       <dependency>

http://git-wip-us.apache.org/repos/asf/orc/blob/b010f4de/java/tools/pom.xml
----------------------------------------------------------------------
diff --git a/java/tools/pom.xml b/java/tools/pom.xml
index 6366e1e..9f448ee 100644
--- a/java/tools/pom.xml
+++ b/java/tools/pom.xml
@@ -74,6 +74,10 @@
       <groupId>org.slf4j</groupId>
       <artifactId>slf4j-api</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.threeten</groupId>
+      <artifactId>threetenbp</artifactId>
+    </dependency>
 
     <!-- test inter-project -->
     <dependency>

http://git-wip-us.apache.org/repos/asf/orc/blob/b010f4de/java/tools/src/java/org/apache/orc/tools/convert/JsonReader.java
----------------------------------------------------------------------
diff --git a/java/tools/src/java/org/apache/orc/tools/convert/JsonReader.java b/java/tools/src/java/org/apache/orc/tools/convert/JsonReader.java
index 9da5e3a..5daec01 100644
--- a/java/tools/src/java/org/apache/orc/tools/convert/JsonReader.java
+++ b/java/tools/src/java/org/apache/orc/tools/convert/JsonReader.java
@@ -38,6 +38,11 @@ import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.orc.RecordReader;
 import org.apache.orc.TypeDescription;
+import org.threeten.bp.LocalDateTime;
+import org.threeten.bp.ZonedDateTime;
+import org.threeten.bp.ZoneId;
+import org.threeten.bp.format.DateTimeFormatter;
+import org.threeten.bp.temporal.TemporalAccessor;
 
 import java.io.IOException;
 import java.io.InputStream;
@@ -49,6 +54,9 @@ import java.util.List;
 import java.util.zip.GZIPInputStream;
 
 public class JsonReader implements RecordReader {
+  private static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormatter.ofPattern(
+    "yyyy[[-][/]]MM[[-][/]]dd[['T'][ ]]HH:mm:ss[ ][XXX][X]");
+
   private final TypeDescription schema;
   private final JsonStreamParser parser;
   private final JsonConverter[] converters;
@@ -133,8 +141,16 @@ public class JsonReader implements RecordReader {
         vect.isNull[row] = true;
       } else {
         TimestampColumnVector vector = (TimestampColumnVector) vect;
-        vector.set(row, Timestamp.valueOf(value.getAsString()
-            .replaceAll("[TZ]", " ")));
+        TemporalAccessor temporalAccessor = DATE_TIME_FORMATTER.parseBest(value.getAsString(),
+          ZonedDateTime.FROM, LocalDateTime.FROM);
+        if (temporalAccessor instanceof ZonedDateTime) {
+          vector.set(row, new Timestamp(((ZonedDateTime) temporalAccessor).toEpochSecond() * 1000L));
+        } else if (temporalAccessor instanceof LocalDateTime) {
+          vector.set(row, new Timestamp(((LocalDateTime) temporalAccessor).atZone(ZoneId.systemDefault()).toEpochSecond() * 1000L));
+        } else {
+          vect.noNulls = false;
+          vect.isNull[row] = true;
+        }
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/orc/blob/b010f4de/java/tools/src/java/org/apache/orc/tools/json/JsonSchemaFinder.java
----------------------------------------------------------------------
diff --git a/java/tools/src/java/org/apache/orc/tools/json/JsonSchemaFinder.java b/java/tools/src/java/org/apache/orc/tools/json/JsonSchemaFinder.java
index 66254fe..de36254 100644
--- a/java/tools/src/java/org/apache/orc/tools/json/JsonSchemaFinder.java
+++ b/java/tools/src/java/org/apache/orc/tools/json/JsonSchemaFinder.java
@@ -59,7 +59,7 @@ public class JsonSchemaFinder {
   private static final Pattern TIMESTAMP_PATTERN =
       Pattern.compile("^[\"]?([0-9]{4}[-/][0-9]{2}[-/][0-9]{2})[T ]" +
           "([0-9]{2}:[0-9]{2}:[0-9]{2})" +
-          "(([ ][-+]?[0-9]{2}([:][0-9]{2})?)|Z)?[\"]?$");
+          "(( [-+]?|[-+])([0-9]{2}(:[0-9]{2})?)|Z)?[\"]?$");
   private static final Pattern DECIMAL_PATTERN =
       Pattern.compile("^-?(?<int>[0-9]+)([.](?<fraction>[0-9]+))?$");
   private static final int INDENT = 2;

http://git-wip-us.apache.org/repos/asf/orc/blob/b010f4de/java/tools/src/test/org/apache/orc/tools/json/TestJsonSchemaFinder.java
----------------------------------------------------------------------
diff --git a/java/tools/src/test/org/apache/orc/tools/json/TestJsonSchemaFinder.java b/java/tools/src/test/org/apache/orc/tools/json/TestJsonSchemaFinder.java
index 7dd9367..6e54881 100644
--- a/java/tools/src/test/org/apache/orc/tools/json/TestJsonSchemaFinder.java
+++ b/java/tools/src/test/org/apache/orc/tools/json/TestJsonSchemaFinder.java
@@ -52,9 +52,15 @@ public class TestJsonSchemaFinder {
     assertEquals("string",
         JsonSchemaFinder.pickType(new JsonPrimitive("2016/01/05")).toString());
     assertEquals("timestamp",
-        JsonSchemaFinder.pickType(new JsonPrimitive("2016-01-01 56:00:00 +08")).toString());
+        JsonSchemaFinder.pickType(new JsonPrimitive("2016-01-01 16:00:00 +08")).toString());
     assertEquals("timestamp",
-        JsonSchemaFinder.pickType(new JsonPrimitive("2016-01-01 56:00:00 -08:30")).toString());
+        JsonSchemaFinder.pickType(new JsonPrimitive("2016-01-01 16:00:00+08")).toString());
+    assertEquals("string",
+        JsonSchemaFinder.pickType(new JsonPrimitive("2016-01-01 16:00:0008")).toString());
+    assertEquals("timestamp",
+        JsonSchemaFinder.pickType(new JsonPrimitive("2016-01-01 06:00:00 -08:30")).toString());
+    assertEquals("timestamp",
+        JsonSchemaFinder.pickType(new JsonPrimitive("2017-05-31T12:44:40-04:00")).toString());
   }
 
   @Test