You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by js...@apache.org on 2015/05/07 20:02:38 UTC

[6/8] drill git commit: DRILL-1460: Implement "read_numbers_as_double" option for JSON reader

DRILL-1460: Implement "read_numbers_as_double" option for JSON reader

Conflicts:
	contrib/storage-mongo/src/main/java/org/apache/drill/exec/store/mongo/MongoRecordReader.java
	exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
	exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/JsonConvertFrom.java
	exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
	exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java
	exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java

Conflicts:
	exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java
	exec/java-exec/src/test/java/org/apache/drill/exec/store/json/TestJsonRecordReader.java


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/4fa02c4c
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/4fa02c4c
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/4fa02c4c

Branch: refs/heads/master
Commit: 4fa02c4c50f6a0a9d9b1b62637a9c8de04eb410f
Parents: 6b98db3
Author: AdamPD <ad...@pharmadata.net.au>
Authored: Mon Feb 9 17:14:31 2015 +1000
Committer: Jason Altekruse <al...@gmail.com>
Committed: Thu May 7 10:06:23 2015 -0700

----------------------------------------------------------------------
 .../exec/store/mongo/MongoRecordReader.java     |  4 +-
 .../org/apache/drill/exec/ExecConstants.java    |  5 +++
 .../exec/expr/fn/impl/conv/JsonConvertFrom.java |  4 +-
 .../server/options/SystemOptionManager.java     |  2 +
 .../exec/store/easy/json/JSONRecordReader.java  |  6 ++-
 .../exec/vector/complex/fn/JsonReader.java      | 22 +++++++---
 .../exec/store/json/TestJsonRecordReader.java   | 44 ++++++++++++++++++++
 .../resources/jsoninput/mixed_number_types.json |  2 +
 8 files changed, 79 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/4fa02c4c/contrib/storage-mongo/src/main/java/org/apache/drill/exec/store/mongo/MongoRecordReader.java
----------------------------------------------------------------------
diff --git a/contrib/storage-mongo/src/main/java/org/apache/drill/exec/store/mongo/MongoRecordReader.java b/contrib/storage-mongo/src/main/java/org/apache/drill/exec/store/mongo/MongoRecordReader.java
index 53c576e..182f5a4 100644
--- a/contrib/storage-mongo/src/main/java/org/apache/drill/exec/store/mongo/MongoRecordReader.java
+++ b/contrib/storage-mongo/src/main/java/org/apache/drill/exec/store/mongo/MongoRecordReader.java
@@ -73,6 +73,7 @@ public class MongoRecordReader extends AbstractRecordReader {
   private OperatorContext operatorContext;
 
   private Boolean enableAllTextMode;
+  private Boolean readNumbersAsDouble;
 
   public MongoRecordReader(MongoSubScan.MongoSubScanSpec subScanSpec,
       List<SchemaPath> projectedColumns, FragmentContext context,
@@ -89,6 +90,7 @@ public class MongoRecordReader extends AbstractRecordReader {
         subScanSpec.getMinFilters(), subScanSpec.getMaxFilters());
     buildFilters(subScanSpec.getFilter(), mergedFilters);
     enableAllTextMode = fragmentContext.getOptions().getOption(ExecConstants.MONGO_ALL_TEXT_MODE).bool_val;
+    readNumbersAsDouble = fragmentContext.getOptions().getOption(ExecConstants.MONGO_READER_READ_NUMBERS_AS_DOUBLE).bool_val;
     init(subScanSpec);
   }
 
@@ -150,7 +152,7 @@ public class MongoRecordReader extends AbstractRecordReader {
   public void setup(OperatorContext context, OutputMutator output) throws ExecutionSetupException {
     this.operatorContext = context;
     this.writer = new VectorContainerWriter(output);
-    this.jsonReader = new JsonReader(fragmentContext.getManagedBuffer(), Lists.newArrayList(getColumns()), enableAllTextMode, false);
+    this.jsonReader = new JsonReader(fragmentContext.getManagedBuffer(), Lists.newArrayList(getColumns()), enableAllTextMode, false, readNumbersAsDouble);
     logger.info("Filters Applied : " + filters);
     logger.info("Fields Selected :" + fields);
     cursor = collection.find(filters, fields);

http://git-wip-us.apache.org/repos/asf/drill/blob/4fa02c4c/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
index 1591079..be8c7a0 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
@@ -125,8 +125,13 @@ public interface ExecConstants {
   public static final String FILESYSTEM_PARTITION_COLUMN_LABEL = "drill.exec.storage.file.partition.column.label";
   public static final OptionValidator FILESYSTEM_PARTITION_COLUMN_LABEL_VALIDATOR = new StringValidator(FILESYSTEM_PARTITION_COLUMN_LABEL, "dir");
 
+  public static String JSON_READ_NUMBERS_AS_DOUBLE = "store.json.read_numbers_as_double";
+  public static OptionValidator JSON_READ_NUMBERS_AS_DOUBLE_VALIDATOR = new BooleanValidator(JSON_READ_NUMBERS_AS_DOUBLE, false);
+
   public static String MONGO_ALL_TEXT_MODE = "store.mongo.all_text_mode";
   public static OptionValidator MONGO_READER_ALL_TEXT_MODE_VALIDATOR = new BooleanValidator(MONGO_ALL_TEXT_MODE, false);
+  public static String MONGO_READER_READ_NUMBERS_AS_DOUBLE = "store.mongo.read_numbers_as_double";
+  public static OptionValidator MONGO_READER_READ_NUMBERS_AS_DOUBLE_VALIDATOR = new BooleanValidator(MONGO_READER_READ_NUMBERS_AS_DOUBLE, false);
 
   public static final String SLICE_TARGET = "planner.slice_target";
   public static final long SLICE_TARGET_DEFAULT = 100000l;

http://git-wip-us.apache.org/repos/asf/drill/blob/4fa02c4c/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/JsonConvertFrom.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/JsonConvertFrom.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/JsonConvertFrom.java
index 3be2c9d..bea6e0f 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/JsonConvertFrom.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/JsonConvertFrom.java
@@ -50,7 +50,7 @@ public class JsonConvertFrom {
     @Output ComplexWriter writer;
 
     public void setup(){
-      jsonReader = new org.apache.drill.exec.vector.complex.fn.JsonReader(buffer, false, false);
+      jsonReader = new org.apache.drill.exec.vector.complex.fn.JsonReader(buffer, false, false, true /* read numbers as doubles */);
     }
 
     public void eval(){
@@ -76,7 +76,7 @@ public class JsonConvertFrom {
     @Output ComplexWriter writer;
 
     public void setup(){
-      jsonReader = new org.apache.drill.exec.vector.complex.fn.JsonReader(buffer, false, false);
+      jsonReader = new org.apache.drill.exec.vector.complex.fn.JsonReader(buffer, false, false, true /* read numbers as doubles */);
     }
 
     public void eval(){

http://git-wip-us.apache.org/repos/asf/drill/blob/4fa02c4c/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
index 5dff828..07ad328 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
@@ -80,8 +80,10 @@ public class SystemOptionManager extends BaseOptionManager {
       ExecConstants.PARQUET_RECORD_READER_IMPLEMENTATION_VALIDATOR,
       ExecConstants.JSON_READER_ALL_TEXT_MODE_VALIDATOR,
       ExecConstants.JSON_EXTENDED_TYPES,
+      ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE_VALIDATOR,
       ExecConstants.FILESYSTEM_PARTITION_COLUMN_LABEL_VALIDATOR,
       ExecConstants.MONGO_READER_ALL_TEXT_MODE_VALIDATOR,
+      ExecConstants.MONGO_READER_READ_NUMBERS_AS_DOUBLE_VALIDATOR,
       ExecConstants.SLICE_TARGET_OPTION,
       ExecConstants.AFFINITY_FACTOR,
       ExecConstants.MAX_WIDTH_GLOBAL,

http://git-wip-us.apache.org/repos/asf/drill/blob/4fa02c4c/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java
index 554c633..3d789eb 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java
@@ -58,6 +58,7 @@ public class JSONRecordReader extends AbstractRecordReader {
   private final FragmentContext fragmentContext;
   private OperatorContext operatorContext;
   private final boolean enableAllTextMode;
+  private final boolean readNumbersAsDouble;
 
   /**
    * Create a JSON Record Reader that uses a file based input stream.
@@ -75,7 +76,7 @@ public class JSONRecordReader extends AbstractRecordReader {
   /**
    * Create a new JSON Record Reader that uses a in memory materialized JSON stream.
    * @param fragmentContext
-   * @param inputPath
+   * @param embeddedContent
    * @param fileSystem
    * @param columns
    * @throws OutOfMemoryException
@@ -105,6 +106,7 @@ public class JSONRecordReader extends AbstractRecordReader {
 
     // only enable all text mode if we aren't using embedded content mode.
     this.enableAllTextMode = embeddedContent == null && fragmentContext.getOptions().getOption(ExecConstants.JSON_READER_ALL_TEXT_MODE_VALIDATOR);
+    this.readNumbersAsDouble = fragmentContext.getOptions().getOption(ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE).bool_val;
     setColumns(columns);
   }
 
@@ -120,7 +122,7 @@ public class JSONRecordReader extends AbstractRecordReader {
       if (isSkipQuery()) {
         this.jsonReader = new CountingJsonReader(fragmentContext.getManagedBuffer());
       } else {
-        this.jsonReader = new JsonReader(fragmentContext.getManagedBuffer(), ImmutableList.copyOf(getColumns()), enableAllTextMode, true);
+        this.jsonReader = new JsonReader(fragmentContext.getManagedBuffer(), ImmutableList.copyOf(getColumns()), enableAllTextMode, true, readNumbersAsDouble);
       }
       setupParser();
     }catch(final Exception e){

http://git-wip-us.apache.org/repos/asf/drill/blob/4fa02c4c/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java
index 696fe7a..095d8c6 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java
@@ -55,6 +55,7 @@ public class JsonReader extends BaseJsonProcessor {
   private final MapVectorOutput mapOutput;
   private final ListVectorOutput listOutput;
   private final boolean extended = true;
+  private final boolean readNumbersAsDouble;
 
   /**
    * Describes whether or not this reader can unwrap a single root array record and treat it like a set of distinct records.
@@ -72,11 +73,11 @@ public class JsonReader extends BaseJsonProcessor {
 
   private FieldSelection selection;
 
-  public JsonReader(DrillBuf managedBuf, boolean allTextMode, boolean skipOuterList) {
-    this(managedBuf, GroupScan.ALL_COLUMNS, allTextMode, skipOuterList);
+  public JsonReader(DrillBuf managedBuf, boolean allTextMode, boolean skipOuterList, boolean readNumbersAsDouble) {
+    this(managedBuf, GroupScan.ALL_COLUMNS, allTextMode, skipOuterList, readNumbersAsDouble);
   }
 
-  public JsonReader(DrillBuf managedBuf, List<SchemaPath> columns, boolean allTextMode, boolean skipOuterList) {
+  public JsonReader(DrillBuf managedBuf, List<SchemaPath> columns, boolean allTextMode, boolean skipOuterList, boolean readNumbersAsDouble) {
     super(managedBuf);
     assert Preconditions.checkNotNull(columns).size() > 0 : "json record reader requires at least a column";
     this.selection = FieldSelection.getFieldSelection(columns);
@@ -87,6 +88,7 @@ public class JsonReader extends BaseJsonProcessor {
     this.mapOutput = new MapVectorOutput(workingBuffer);
     this.listOutput = new ListVectorOutput(workingBuffer);
     this.currentFieldName="<none>";
+    this.readNumbersAsDouble = readNumbersAsDouble;
   }
 
   @Override
@@ -330,7 +332,12 @@ public class JsonReader extends BaseJsonProcessor {
         atLeastOneWrite = true;
         break;
       case VALUE_NUMBER_INT:
-        map.bigInt(fieldName).writeBigInt(parser.getLongValue());
+        if (this.readNumbersAsDouble) {
+          map.float8(fieldName).writeFloat8(parser.getDoubleValue());
+        }
+        else {
+          map.bigInt(fieldName).writeBigInt(parser.getLongValue());
+        }
         atLeastOneWrite = true;
         break;
       case VALUE_STRING:
@@ -496,7 +503,12 @@ public class JsonReader extends BaseJsonProcessor {
         atLeastOneWrite = true;
         break;
       case VALUE_NUMBER_INT:
-        list.bigInt().writeBigInt(parser.getLongValue());
+        if (this.readNumbersAsDouble) {
+          list.float8().writeFloat8(parser.getDoubleValue());
+        }
+        else {
+          list.bigInt().writeBigInt(parser.getLongValue());
+        }
         atLeastOneWrite = true;
         break;
       case VALUE_STRING:

http://git-wip-us.apache.org/repos/asf/drill/blob/4fa02c4c/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/TestJsonRecordReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/TestJsonRecordReader.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/TestJsonRecordReader.java
index 27631c3..bb1af9e 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/TestJsonRecordReader.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/TestJsonRecordReader.java
@@ -24,6 +24,7 @@ import org.junit.Test;
 import org.junit.Assert;
 import static org.junit.Assert.assertEquals;
 
+import static org.junit.Assert.assertTrue;
 
 public class TestJsonRecordReader extends BaseTestQuery{
   static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TestJsonRecordReader.class);
@@ -110,4 +111,47 @@ public class TestJsonRecordReader extends BaseTestQuery{
             .go();
   }
 
+  @Test
+  public void testMixedNumberTypes() throws Exception {
+    try {
+      testBuilder()
+          .sqlQuery("select * from cp.`jsoninput/mixed_number_types.json`")
+          .unOrdered()
+          .jsonBaselineFile("jsoninput/mixed_number_types.json")
+          .build().run();
+    } catch (Exception ex) {
+      assertTrue(ex.getMessage().contains("DATA_READ ERROR: Error parsing JSON - You tried to write a BigInt type when you are using a ValueWriter of type NullableFloat8WriterImpl."));
+      // this indicates successful completion of the test
+      return;
+    }
+    throw new Exception("Mixed number types verification failed, expected failure on conflicting number types.");
+  }
+
+  @Test
+  public void testMixedNumberTypesInAllTextMode() throws Exception {
+    testNoResult("alter session set `store.json.all_text_mode`= true");
+    testBuilder()
+        .sqlQuery("select * from cp.`jsoninput/mixed_number_types.json`")
+        .unOrdered()
+        .baselineColumns("a")
+        .baselineValues("5.2")
+        .baselineValues("6")
+        .build().run();
+  }
+
+  @Test
+  public void testMixedNumberTypesWhenReadingNumbersAsDouble() throws Exception {
+    try {
+    testNoResult("alter session set `store.json.read_numbers_as_double`= true");
+    testBuilder()
+        .sqlQuery("select * from cp.`jsoninput/mixed_number_types.json`")
+        .unOrdered()
+        .baselineColumns("a")
+        .baselineValues(5.2D)
+        .baselineValues(6D)
+        .build().run();
+    } finally {
+      testNoResult("alter session set `store.json.read_numbers_as_double`= false");
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/drill/blob/4fa02c4c/exec/java-exec/src/test/resources/jsoninput/mixed_number_types.json
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/jsoninput/mixed_number_types.json b/exec/java-exec/src/test/resources/jsoninput/mixed_number_types.json
new file mode 100644
index 0000000..699ad0b
--- /dev/null
+++ b/exec/java-exec/src/test/resources/jsoninput/mixed_number_types.json
@@ -0,0 +1,2 @@
+{ "a": 5.2 }
+{ "a": 6 }
\ No newline at end of file