You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by ja...@apache.org on 2015/09/14 07:28:55 UTC

[08/15] drill git commit: DRILL-2879: Enhancing extended json support for date in millies and binary with type info

DRILL-2879: Enhancing extended json support for date in millies and binary with type info

Ignore project push down Mongo test until test completes correctly on Linux


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/22148940
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/22148940
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/22148940

Branch: refs/heads/master
Commit: 221489406505d738f1746dbf49f8dd0d67df3fc2
Parents: 97615e5
Author: Kamesh <ka...@gmail.com>
Authored: Wed Jul 15 16:34:41 2015 +0530
Committer: Jacques Nadeau <ja...@apache.org>
Committed: Sun Sep 13 21:58:07 2015 -0700

----------------------------------------------------------------------
 .../store/mongo/TestMongoProjectPushDown.java   | 11 ++-
 .../exec/store/easy/json/JSONRecordReader.java  |  7 +-
 .../exec/vector/complex/fn/ExtendedType.java    |  1 +
 .../vector/complex/fn/ExtendedTypeName.java     |  1 +
 .../exec/vector/complex/fn/VectorOutput.java    | 81 +++++++++++++++++---
 .../complex/writer/TestExtendedTypes.java       | 26 +++++++
 .../vector/complex/mongo_extended.json          |  8 ++
 7 files changed, 115 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/22148940/contrib/storage-mongo/src/test/java/org/apache/drill/exec/store/mongo/TestMongoProjectPushDown.java
----------------------------------------------------------------------
diff --git a/contrib/storage-mongo/src/test/java/org/apache/drill/exec/store/mongo/TestMongoProjectPushDown.java b/contrib/storage-mongo/src/test/java/org/apache/drill/exec/store/mongo/TestMongoProjectPushDown.java
index b17cf2f..32666fc 100644
--- a/contrib/storage-mongo/src/test/java/org/apache/drill/exec/store/mongo/TestMongoProjectPushDown.java
+++ b/contrib/storage-mongo/src/test/java/org/apache/drill/exec/store/mongo/TestMongoProjectPushDown.java
@@ -17,15 +17,14 @@
  */
 package org.apache.drill.exec.store.mongo;
 
-import org.apache.drill.exec.ExecConstants;
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-import java.io.IOException;
-
 import static org.apache.drill.TestBuilder.listOf;
 import static org.apache.drill.TestBuilder.mapOf;
 
+import org.apache.drill.exec.ExecConstants;
+import org.junit.Ignore;
+import org.junit.Test;
+
+@Ignore("DRILL-3775")
 public class TestMongoProjectPushDown extends MongoTestBase {
 
   /**

http://git-wip-us.apache.org/repos/asf/drill/blob/22148940/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java
index dfc4f3a..8e78cf1 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java
@@ -155,8 +155,11 @@ public class JSONRecordReader extends AbstractRecordReader {
     if (columnNr > 0) {
       exceptionBuilder.pushContext("Column ", columnNr);
     }
-    exceptionBuilder.pushContext("Record ", currentRecordNumberInFile())
-            .pushContext("File ", hadoopPath.toUri().getPath());
+
+    if (hadoopPath != null) {
+      exceptionBuilder.pushContext("Record ", currentRecordNumberInFile())
+          .pushContext("File ", hadoopPath.toUri().getPath());
+    }
 
     throw exceptionBuilder.build(logger);
   }

http://git-wip-us.apache.org/repos/asf/drill/blob/22148940/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/ExtendedType.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/ExtendedType.java b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/ExtendedType.java
index bec0fd2..13df44f 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/ExtendedType.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/ExtendedType.java
@@ -22,6 +22,7 @@ import com.fasterxml.jackson.core.io.SerializedString;
 public enum ExtendedType {
 
   BINARY(ExtendedTypeName.BINARY),
+  TYPE(ExtendedTypeName.TYPE),
   DATE(ExtendedTypeName.DATE),
   TIME(ExtendedTypeName.TIME),
   TIMESTAMP(ExtendedTypeName.TIMESTAMP),

http://git-wip-us.apache.org/repos/asf/drill/blob/22148940/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/ExtendedTypeName.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/ExtendedTypeName.java b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/ExtendedTypeName.java
index fcef24b..e432d56 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/ExtendedTypeName.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/ExtendedTypeName.java
@@ -19,6 +19,7 @@ package org.apache.drill.exec.vector.complex.fn;
 
 public interface ExtendedTypeName {
   public static final String BINARY = "$binary";      // base64 encoded binary (ZHJpbGw=)  [from Mongo]
+  public static final String TYPE = "$type";          // type of binary data
   public static final String DATE = "$dateDay";       // ISO date with no time. such as (12-24-27)
   public static final String TIME = "$time";          // ISO time with no timezone (19:20:30.45Z)
   public static final String TIMESTAMP = "$date";     // ISO standard time (2009-02-23T00:00:00.000-08:00) [from Mongo]

http://git-wip-us.apache.org/repos/asf/drill/blob/22148940/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/VectorOutput.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/VectorOutput.java b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/VectorOutput.java
index 651de3d..769f341 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/VectorOutput.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/VectorOutput.java
@@ -81,7 +81,7 @@ abstract class VectorOutput {
       switch(possibleTypeName){
       case ExtendedTypeName.BINARY:
         writeBinary(checkNextToken(JsonToken.VALUE_STRING));
-        checkNextToken(JsonToken.END_OBJECT);
+        checkCurrentToken(JsonToken.END_OBJECT);
         return true;
       case ExtendedTypeName.DATE:
         writeDate(checkNextToken(JsonToken.VALUE_STRING));
@@ -92,7 +92,7 @@ abstract class VectorOutput {
         checkNextToken(JsonToken.END_OBJECT);
         return true;
       case ExtendedTypeName.TIMESTAMP:
-        writeTimestamp(checkNextToken(JsonToken.VALUE_STRING));
+        writeTimestamp(checkNextToken(JsonToken.VALUE_STRING, JsonToken.VALUE_NUMBER_INT));
         checkNextToken(JsonToken.END_OBJECT);
         return true;
       case ExtendedTypeName.INTERVAL:
@@ -100,7 +100,7 @@ abstract class VectorOutput {
         checkNextToken(JsonToken.END_OBJECT);
         return true;
       case ExtendedTypeName.INTEGER:
-        writeInteger(checkNextToken(JsonToken.VALUE_NUMBER_INT));
+        writeInteger(checkNextToken(JsonToken.VALUE_STRING, JsonToken.VALUE_NUMBER_INT));
         checkNextToken(JsonToken.END_OBJECT);
         return true;
       case ExtendedTypeName.DECIMAL:
@@ -116,8 +116,35 @@ abstract class VectorOutput {
     return checkNextToken(expected, expected);
   }
 
+  public boolean checkCurrentToken(final JsonToken expected) throws IOException{
+    return checkCurrentToken(expected, expected);
+  }
+
   public boolean checkNextToken(final JsonToken expected1, final JsonToken expected2) throws IOException{
-    JsonToken t = parser.nextToken();
+    return checkToken(parser.nextToken(), expected1, expected2);
+  }
+
+  public boolean checkCurrentToken(final JsonToken expected1, final JsonToken expected2) throws IOException{
+    return checkToken(parser.getCurrentToken(), expected1, expected2);
+  }
+
+  boolean hasType() throws JsonParseException, IOException {
+    JsonToken token = parser.nextToken();
+    return token == JsonToken.FIELD_NAME && parser.getText().equals(ExtendedTypeName.TYPE);
+  }
+
+  long getType() throws JsonParseException, IOException {
+    if (!checkNextToken(JsonToken.VALUE_NUMBER_INT, JsonToken.VALUE_STRING)) {
+      long type = parser.getValueAsLong();
+      //Advancing the token, as checking current token in binary
+      parser.nextToken();
+      return type;
+    }
+    throw new JsonParseException("Failure while reading $type value. Expected a NUMBER or STRING",
+        parser.getCurrentLocation());
+  }
+
+  public boolean checkToken(final JsonToken t, final JsonToken expected1, final JsonToken expected2) throws IOException{
     if(t == JsonToken.VALUE_NULL){
       return true;
     }else if(t == expected1){
@@ -154,7 +181,12 @@ abstract class VectorOutput {
     public void writeBinary(boolean isNull) throws IOException {
       VarBinaryWriter bin = writer.varBinary();
       if(!isNull){
-        work.prepareBinary(parser.getBinaryValue(), binary);
+        byte[] binaryData = parser.getBinaryValue();
+        if (hasType()) {
+          //Ignoring type info as of now.
+          getType();
+        }
+        work.prepareBinary(binaryData, binary);
         bin.write(binary);
       }
     }
@@ -181,8 +213,18 @@ abstract class VectorOutput {
     public void writeTimestamp(boolean isNull) throws IOException {
       TimeStampWriter ts = writer.timeStamp();
       if(!isNull){
-        DateTimeFormatter f = ISODateTimeFormat.dateTime();
-        ts.writeTimeStamp(DateTime.parse(parser.getValueAsString(), f).withZoneRetainFields(org.joda.time.DateTimeZone.UTC).getMillis());
+        switch (parser.getCurrentToken()) {
+        case VALUE_NUMBER_INT:
+          DateTime dt = new DateTime(parser.getLongValue(), org.joda.time.DateTimeZone.UTC);
+          ts.writeTimeStamp(dt.getMillis());
+          break;
+        case VALUE_STRING:
+          DateTimeFormatter f = ISODateTimeFormat.dateTime();
+          ts.writeTimeStamp(DateTime.parse(parser.getValueAsString(), f).withZoneRetainFields(org.joda.time.DateTimeZone.UTC).getMillis());
+          break;
+        default:
+          break;
+        }
       }
     }
 
@@ -202,7 +244,7 @@ abstract class VectorOutput {
     public void writeInteger(boolean isNull) throws IOException {
       BigIntWriter intWriter = writer.bigInt();
       if(!isNull){
-        intWriter.writeBigInt(parser.getLongValue());
+        intWriter.writeBigInt(Long.parseLong(parser.getValueAsString()));
       }
     }
 
@@ -232,7 +274,12 @@ abstract class VectorOutput {
     public void writeBinary(boolean isNull) throws IOException {
       VarBinaryWriter bin = writer.varBinary(fieldName);
       if(!isNull){
-        work.prepareBinary(parser.getBinaryValue(), binary);
+        byte[] binaryData = parser.getBinaryValue();
+        if (hasType()) {
+          //Ignoring type info as of now.
+          getType();
+        }
+        work.prepareBinary(binaryData, binary);
         bin.write(binary);
       }
     }
@@ -260,8 +307,18 @@ abstract class VectorOutput {
     public void writeTimestamp(boolean isNull) throws IOException {
       TimeStampWriter ts = writer.timeStamp(fieldName);
       if(!isNull){
-        DateTimeFormatter f = ISODateTimeFormat.dateTime();
-        ts.writeTimeStamp(DateTime.parse(parser.getValueAsString(), f).withZoneRetainFields(org.joda.time.DateTimeZone.UTC).getMillis());
+        switch (parser.getCurrentToken()) {
+        case VALUE_NUMBER_INT:
+          DateTime dt = new DateTime(parser.getLongValue(), org.joda.time.DateTimeZone.UTC);
+          ts.writeTimeStamp(dt.getMillis());
+          break;
+        case VALUE_STRING:
+          DateTimeFormatter f = ISODateTimeFormat.dateTime();
+          ts.writeTimeStamp(DateTime.parse(parser.getValueAsString(), f).withZoneRetainFields(org.joda.time.DateTimeZone.UTC).getMillis());
+          break;
+        default:
+          break;
+        }
       }
     }
 
@@ -281,7 +338,7 @@ abstract class VectorOutput {
     public void writeInteger(boolean isNull) throws IOException {
       BigIntWriter intWriter = writer.bigInt(fieldName);
       if(!isNull){
-        intWriter.writeBigInt(parser.getLongValue());
+        intWriter.writeBigInt(Long.parseLong(parser.getValueAsString()));
       }
     }
 

http://git-wip-us.apache.org/repos/asf/drill/blob/22148940/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestExtendedTypes.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestExtendedTypes.java b/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestExtendedTypes.java
index f403108..51ecec5 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestExtendedTypes.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestExtendedTypes.java
@@ -64,4 +64,30 @@ public class TestExtendedTypes extends BaseTestQuery {
           ExecConstants.JSON_EXTENDED_TYPES.getDefault().getValue()));
     }
   }
+
+  @Test
+  public void testMongoExtendedTypes() throws Exception {
+
+    final String originalFile = "${WORKING_PATH}/src/test/resources/vector/complex/mongo_extended.json".replaceAll(
+        Pattern.quote("${WORKING_PATH}"),
+        Matcher.quoteReplacement(TestTools.getWorkingPath()));
+
+    try {
+      testNoResult(String.format("ALTER SESSION SET `%s` = 'json'", ExecConstants.OUTPUT_FORMAT_VALIDATOR.getOptionName()));
+      testNoResult(String.format("ALTER SESSION SET `%s` = true", ExecConstants.JSON_EXTENDED_TYPES.getOptionName()));
+
+      int actualRecordCount = testSql(String.format("select * from dfs.`%s`", originalFile));
+      assertEquals(
+          String.format(
+              "Received unexpected number of rows in output: expected=%d, received=%s",
+              1, actualRecordCount), 1, actualRecordCount);
+    } finally {
+      testNoResult(String.format("ALTER SESSION SET `%s` = '%s'",
+          ExecConstants.OUTPUT_FORMAT_VALIDATOR.getOptionName(),
+          ExecConstants.OUTPUT_FORMAT_VALIDATOR.getDefault().getValue()));
+      testNoResult(String.format("ALTER SESSION SET `%s` = %s",
+          ExecConstants.JSON_EXTENDED_TYPES.getOptionName(),
+          ExecConstants.JSON_EXTENDED_TYPES.getDefault().getValue()));
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/drill/blob/22148940/exec/java-exec/src/test/resources/vector/complex/mongo_extended.json
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/vector/complex/mongo_extended.json b/exec/java-exec/src/test/resources/vector/complex/mongo_extended.json
new file mode 100644
index 0000000..a38a83f
--- /dev/null
+++ b/exec/java-exec/src/test/resources/vector/complex/mongo_extended.json
@@ -0,0 +1,8 @@
+{
+  "drill_timestamp_millies" : {
+    "$date" : 1436241583488
+  },
+  "bin" : {
+      "$binary" : "ZHJpbGw=", "$type" : 1
+  }
+}