You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2019/10/08 22:44:51 UTC

[orc] branch branch-1.5 updated (4c76965 -> 2f201a5)

This is an automated email from the ASF dual-hosted git repository.

omalley pushed a change to branch branch-1.5
in repository https://gitbox.apache.org/repos/asf/orc.git.


    from 4c76965  ORC-550: Use default system time zone if it is not present in the file footer
     new df55c2f  ORC-455. Support timestampformat option in JsonReader
     new 2f201a5  ORC-526: Make orc-tools convert respect second fractions.

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../org/apache/orc/tools/convert/ConvertTool.java  |  2 +-
 .../org/apache/orc/tools/convert/CsvReader.java    | 12 +++++---
 .../org/apache/orc/tools/convert/JsonReader.java   | 34 +++++++++++++---------
 .../apache/orc/tools/convert/TestCsvReader.java    | 11 +++----
 .../apache/orc/tools/convert/TestJsonReader.java   | 30 +++++++++++++++++++
 5 files changed, 66 insertions(+), 23 deletions(-)
 create mode 100644 java/tools/src/test/org/apache/orc/tools/convert/TestJsonReader.java


[orc] 02/02: ORC-526: Make orc-tools convert respect second fractions.

Posted by om...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

omalley pushed a commit to branch branch-1.5
in repository https://gitbox.apache.org/repos/asf/orc.git

commit 2f201a515227a1dfb2ac1934d9ae1895c482c94c
Author: Yukihiro Okada <ca...@gmail.com>
AuthorDate: Sat Aug 31 13:20:57 2019 +0900

    ORC-526: Make orc-tools convert respect second fractions.
    
    Fixes #425
    
    Signed-off-by: Owen O'Malley <om...@apache.org>
---
 .../org/apache/orc/tools/convert/CsvReader.java    | 12 ++++++---
 .../org/apache/orc/tools/convert/JsonReader.java   | 10 ++++++--
 .../apache/orc/tools/convert/TestCsvReader.java    | 11 ++++----
 .../apache/orc/tools/convert/TestJsonReader.java   | 30 ++++++++++++++++++++++
 4 files changed, 52 insertions(+), 11 deletions(-)

diff --git a/java/tools/src/java/org/apache/orc/tools/convert/CsvReader.java b/java/tools/src/java/org/apache/orc/tools/convert/CsvReader.java
index 6644150..45959f2 100644
--- a/java/tools/src/java/org/apache/orc/tools/convert/CsvReader.java
+++ b/java/tools/src/java/org/apache/orc/tools/convert/CsvReader.java
@@ -253,11 +253,15 @@ public class CsvReader implements RecordReader {
             dateTimeFormatter.parseBest(values[offset],
                 ZonedDateTime.FROM, LocalDateTime.FROM);
         if (temporalAccessor instanceof ZonedDateTime) {
-          vector.set(row, new Timestamp(
-              ((ZonedDateTime) temporalAccessor).toEpochSecond() * 1000L));
+          ZonedDateTime zonedDateTime = ((ZonedDateTime) temporalAccessor);
+          Timestamp timestamp = new Timestamp(zonedDateTime.toEpochSecond() * 1000L);
+          timestamp.setNanos(zonedDateTime.getNano());
+          vector.set(row, timestamp);
         } else if (temporalAccessor instanceof LocalDateTime) {
-          vector.set(row, new Timestamp(((LocalDateTime) temporalAccessor)
-              .atZone(ZoneId.systemDefault()).toEpochSecond() * 1000L));
+          ZonedDateTime tz = ((LocalDateTime) temporalAccessor).atZone(ZoneId.systemDefault());
+          Timestamp timestamp = new Timestamp(tz.toEpochSecond() * 1000L);
+          timestamp.setNanos(tz.getNano());
+          vector.set(row, timestamp);
         } else {
           column.noNulls = false;
           column.isNull[row] = true;
diff --git a/java/tools/src/java/org/apache/orc/tools/convert/JsonReader.java b/java/tools/src/java/org/apache/orc/tools/convert/JsonReader.java
index d6e188a..cdb9270 100644
--- a/java/tools/src/java/org/apache/orc/tools/convert/JsonReader.java
+++ b/java/tools/src/java/org/apache/orc/tools/convert/JsonReader.java
@@ -147,9 +147,15 @@ public class JsonReader implements RecordReader {
         TemporalAccessor temporalAccessor = dateTimeFormatter.parseBest(value.getAsString(),
           ZonedDateTime.FROM, LocalDateTime.FROM);
         if (temporalAccessor instanceof ZonedDateTime) {
-          vector.set(row, new Timestamp(((ZonedDateTime) temporalAccessor).toEpochSecond() * 1000L));
+          ZonedDateTime zonedDateTime = ((ZonedDateTime) temporalAccessor);
+          Timestamp timestamp = new Timestamp(zonedDateTime.toEpochSecond() * 1000L);
+          timestamp.setNanos(zonedDateTime.getNano());
+          vector.set(row, timestamp);
         } else if (temporalAccessor instanceof LocalDateTime) {
-          vector.set(row, new Timestamp(((LocalDateTime) temporalAccessor).atZone(ZoneId.systemDefault()).toEpochSecond() * 1000L));
+          ZonedDateTime tz = ((LocalDateTime) temporalAccessor).atZone(ZoneId.systemDefault());
+          Timestamp timestamp = new Timestamp(tz.toEpochSecond() * 1000L);
+          timestamp.setNanos(tz.getNano());
+          vector.set(row, timestamp);
         } else {
           vect.noNulls = false;
           vect.isNull[row] = true;
diff --git a/java/tools/src/test/org/apache/orc/tools/convert/TestCsvReader.java b/java/tools/src/test/org/apache/orc/tools/convert/TestCsvReader.java
index 12a72dc..89efe5f 100644
--- a/java/tools/src/test/org/apache/orc/tools/convert/TestCsvReader.java
+++ b/java/tools/src/test/org/apache/orc/tools/convert/TestCsvReader.java
@@ -190,10 +190,11 @@ public class TestCsvReader {
 
   @Test
   public void testCustomTimestampFormat() throws Exception {
-    String tsFormat = "d[d] MMM yyyy HH:mm:ss";
+    String tsFormat = "d[d] MMM yyyy HH:mm:ss.SSSSSS";
+
     StringReader input = new StringReader(
-            "'21 Mar 2018 12:23:34'\n" +
-                    "'3 Feb 2018 18:04:51'\n"
+            "'21 Mar 2018 12:23:34.123456'\n" +
+                    "'3 Feb 2018 18:04:51.456789'\n"
     );
     TypeDescription schema = TypeDescription.fromString(
             "struct<a:timestamp>");
@@ -203,7 +204,7 @@ public class TestCsvReader {
     assertEquals(true, reader.nextBatch(batch));
     assertEquals(2, batch.size);
     TimestampColumnVector cv = (TimestampColumnVector) batch.cols[0];
-    assertEquals("2018-03-21 12:23:34.0", cv.asScratchTimestamp(0).toString());
-    assertEquals("2018-02-03 18:04:51.0", cv.asScratchTimestamp(1).toString());
+    assertEquals("2018-03-21 12:23:34.123456", cv.asScratchTimestamp(0).toString());
+    assertEquals("2018-02-03 18:04:51.456789", cv.asScratchTimestamp(1).toString());
   }
 }
diff --git a/java/tools/src/test/org/apache/orc/tools/convert/TestJsonReader.java b/java/tools/src/test/org/apache/orc/tools/convert/TestJsonReader.java
new file mode 100644
index 0000000..10529d8
--- /dev/null
+++ b/java/tools/src/test/org/apache/orc/tools/convert/TestJsonReader.java
@@ -0,0 +1,30 @@
+package org.apache.orc.tools.convert;
+
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.orc.TypeDescription;
+import org.junit.Test;
+
+import java.io.StringReader;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestJsonReader {
+    @Test
+    public void testCustomTimestampFormat() throws Exception {
+        String tsFormat = "yyyy-MM-dd HH:mm:ss.SSSSSS";
+
+        String s = "{\"a\":\"2018-03-21 12:23:34.123456\"}\n" +
+                "{\"a\":\"2018-02-03 18:04:51.456789\"}\n";
+        StringReader input = new StringReader(s);
+        TypeDescription schema = TypeDescription.fromString(
+                "struct<a:timestamp>");
+        JsonReader reader = new JsonReader(input, null, 1, schema, tsFormat);
+        VectorizedRowBatch batch = schema.createRowBatch(2);
+        assertEquals(true, reader.nextBatch(batch));
+        assertEquals(2, batch.size);
+        TimestampColumnVector cv = (TimestampColumnVector) batch.cols[0];
+        assertEquals("2018-03-21 12:23:34.123456", cv.asScratchTimestamp(0).toString());
+        assertEquals("2018-02-03 18:04:51.456789", cv.asScratchTimestamp(1).toString());
+    }
+}


[orc] 01/02: ORC-455. Support timestampformat option in JsonReader

Posted by om...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

omalley pushed a commit to branch branch-1.5
in repository https://gitbox.apache.org/repos/asf/orc.git

commit df55c2f8f2ebfd5bfd0c864fa8ae9833da23af7e
Author: yuokada <ca...@gmail.com>
AuthorDate: Sat Jan 12 12:02:20 2019 +0900

    ORC-455. Support timestampformat option in JsonReader
    
    Signed-off-by: Owen O'Malley <om...@apache.org>
---
 .../org/apache/orc/tools/convert/ConvertTool.java  |  2 +-
 .../org/apache/orc/tools/convert/JsonReader.java   | 24 ++++++++++++----------
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/java/tools/src/java/org/apache/orc/tools/convert/ConvertTool.java b/java/tools/src/java/org/apache/orc/tools/convert/ConvertTool.java
index 02d7ee8..2e6ba10 100644
--- a/java/tools/src/java/org/apache/orc/tools/convert/ConvertTool.java
+++ b/java/tools/src/java/org/apache/orc/tools/convert/ConvertTool.java
@@ -146,7 +146,7 @@ public class ConvertTool {
         }
         case JSON: {
           FSDataInputStream underlying = filesystem.open(path);
-          return new JsonReader(getReader(underlying), underlying, size, schema);
+          return new JsonReader(getReader(underlying), underlying, size, schema, timestampFormat);
         }
         case CSV: {
           FSDataInputStream underlying = filesystem.open(path);
diff --git a/java/tools/src/java/org/apache/orc/tools/convert/JsonReader.java b/java/tools/src/java/org/apache/orc/tools/convert/JsonReader.java
index c020d11..d6e188a 100644
--- a/java/tools/src/java/org/apache/orc/tools/convert/JsonReader.java
+++ b/java/tools/src/java/org/apache/orc/tools/convert/JsonReader.java
@@ -58,8 +58,6 @@ import java.util.Map;
 import java.util.zip.GZIPInputStream;
 
 public class JsonReader implements RecordReader {
-  private static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormatter.ofPattern(
-    "yyyy[[-][/]]MM[[-][/]]dd[['T'][ ]]HH:mm:ss[ ][XXX][X]");
 
   private final TypeDescription schema;
   private final Iterator<JsonElement> parser;
@@ -67,6 +65,7 @@ public class JsonReader implements RecordReader {
   private final long totalSize;
   private final FSDataInputStream input;
   private long rowNumber = 0;
+  private final DateTimeFormatter dateTimeFormatter;
 
   interface JsonConverter {
     void convert(JsonElement value, ColumnVector vect, int row);
@@ -138,14 +137,14 @@ public class JsonReader implements RecordReader {
     }
   }
 
-  static class TimestampColumnConverter implements JsonConverter {
+  class TimestampColumnConverter implements JsonConverter {
     public void convert(JsonElement value, ColumnVector vect, int row) {
       if (value == null || value.isJsonNull()) {
         vect.noNulls = false;
         vect.isNull[row] = true;
       } else {
         TimestampColumnVector vector = (TimestampColumnVector) vect;
-        TemporalAccessor temporalAccessor = DATE_TIME_FORMATTER.parseBest(value.getAsString(),
+        TemporalAccessor temporalAccessor = dateTimeFormatter.parseBest(value.getAsString(),
           ZonedDateTime.FROM, LocalDateTime.FROM);
         if (temporalAccessor instanceof ZonedDateTime) {
           vector.set(row, new Timestamp(((ZonedDateTime) temporalAccessor).toEpochSecond() * 1000L));
@@ -171,7 +170,7 @@ public class JsonReader implements RecordReader {
     }
   }
 
-  static class StructColumnConverter implements JsonConverter {
+  class StructColumnConverter implements JsonConverter {
     private JsonConverter[] childrenConverters;
     private List<String> fieldNames;
 
@@ -199,7 +198,7 @@ public class JsonReader implements RecordReader {
     }
   }
 
-  static class ListColumnConverter implements JsonConverter {
+  class ListColumnConverter implements JsonConverter {
     private JsonConverter childrenConverter;
 
     public ListColumnConverter(TypeDescription schema) {
@@ -225,7 +224,7 @@ public class JsonReader implements RecordReader {
     }
   }
 
-  static class MapColumnConverter implements JsonConverter {
+  class MapColumnConverter implements JsonConverter {
     private JsonConverter keyConverter;
     private JsonConverter valueConverter;
 
@@ -259,7 +258,7 @@ public class JsonReader implements RecordReader {
     }
   }
 
-  static JsonConverter createConverter(TypeDescription schema) {
+  JsonConverter createConverter(TypeDescription schema) {
     switch (schema.getCategory()) {
       case BYTE:
       case SHORT:
@@ -295,14 +294,16 @@ public class JsonReader implements RecordReader {
   public JsonReader(Reader reader,
                     FSDataInputStream underlying,
                     long size,
-                    TypeDescription schema) throws IOException {
-    this(new JsonStreamParser(reader), underlying, size, schema);
+                    TypeDescription schema,
+                    String timestampFormat) throws IOException {
+    this(new JsonStreamParser(reader), underlying, size, schema, timestampFormat);
   }
 
   public JsonReader(Iterator<JsonElement> parser,
                     FSDataInputStream underlying,
                     long size,
-                    TypeDescription schema) throws IOException {
+                    TypeDescription schema,
+                    String timestampFormat) throws IOException {
     this.schema = schema;
     if (schema.getCategory() != TypeDescription.Category.STRUCT) {
       throw new IllegalArgumentException("Root must be struct - " + schema);
@@ -310,6 +311,7 @@ public class JsonReader implements RecordReader {
     this.input = underlying;
     this.totalSize = size;
     this.parser = parser;
+    this.dateTimeFormatter = DateTimeFormatter.ofPattern(timestampFormat);
     List<TypeDescription> fieldTypes = schema.getChildren();
     converters = new JsonConverter[fieldTypes.size()];
     for(int c = 0; c < converters.length; ++c) {