You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by js...@apache.org on 2015/05/07 20:02:38 UTC
[6/8] drill git commit: DRILL-1460: Implement
"read_numbers_as_double" option for JSON reader
DRILL-1460: Implement "read_numbers_as_double" option for JSON reader
Conflicts:
contrib/storage-mongo/src/main/java/org/apache/drill/exec/store/mongo/MongoRecordReader.java
exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/JsonConvertFrom.java
exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java
exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java
Conflicts:
exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java
exec/java-exec/src/test/java/org/apache/drill/exec/store/json/TestJsonRecordReader.java
Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/4fa02c4c
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/4fa02c4c
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/4fa02c4c
Branch: refs/heads/master
Commit: 4fa02c4c50f6a0a9d9b1b62637a9c8de04eb410f
Parents: 6b98db3
Author: AdamPD <ad...@pharmadata.net.au>
Authored: Mon Feb 9 17:14:31 2015 +1000
Committer: Jason Altekruse <al...@gmail.com>
Committed: Thu May 7 10:06:23 2015 -0700
----------------------------------------------------------------------
.../exec/store/mongo/MongoRecordReader.java | 4 +-
.../org/apache/drill/exec/ExecConstants.java | 5 +++
.../exec/expr/fn/impl/conv/JsonConvertFrom.java | 4 +-
.../server/options/SystemOptionManager.java | 2 +
.../exec/store/easy/json/JSONRecordReader.java | 6 ++-
.../exec/vector/complex/fn/JsonReader.java | 22 +++++++---
.../exec/store/json/TestJsonRecordReader.java | 44 ++++++++++++++++++++
.../resources/jsoninput/mixed_number_types.json | 2 +
8 files changed, 79 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/drill/blob/4fa02c4c/contrib/storage-mongo/src/main/java/org/apache/drill/exec/store/mongo/MongoRecordReader.java
----------------------------------------------------------------------
diff --git a/contrib/storage-mongo/src/main/java/org/apache/drill/exec/store/mongo/MongoRecordReader.java b/contrib/storage-mongo/src/main/java/org/apache/drill/exec/store/mongo/MongoRecordReader.java
index 53c576e..182f5a4 100644
--- a/contrib/storage-mongo/src/main/java/org/apache/drill/exec/store/mongo/MongoRecordReader.java
+++ b/contrib/storage-mongo/src/main/java/org/apache/drill/exec/store/mongo/MongoRecordReader.java
@@ -73,6 +73,7 @@ public class MongoRecordReader extends AbstractRecordReader {
private OperatorContext operatorContext;
private Boolean enableAllTextMode;
+ private Boolean readNumbersAsDouble;
public MongoRecordReader(MongoSubScan.MongoSubScanSpec subScanSpec,
List<SchemaPath> projectedColumns, FragmentContext context,
@@ -89,6 +90,7 @@ public class MongoRecordReader extends AbstractRecordReader {
subScanSpec.getMinFilters(), subScanSpec.getMaxFilters());
buildFilters(subScanSpec.getFilter(), mergedFilters);
enableAllTextMode = fragmentContext.getOptions().getOption(ExecConstants.MONGO_ALL_TEXT_MODE).bool_val;
+ readNumbersAsDouble = fragmentContext.getOptions().getOption(ExecConstants.MONGO_READER_READ_NUMBERS_AS_DOUBLE).bool_val;
init(subScanSpec);
}
@@ -150,7 +152,7 @@ public class MongoRecordReader extends AbstractRecordReader {
public void setup(OperatorContext context, OutputMutator output) throws ExecutionSetupException {
this.operatorContext = context;
this.writer = new VectorContainerWriter(output);
- this.jsonReader = new JsonReader(fragmentContext.getManagedBuffer(), Lists.newArrayList(getColumns()), enableAllTextMode, false);
+ this.jsonReader = new JsonReader(fragmentContext.getManagedBuffer(), Lists.newArrayList(getColumns()), enableAllTextMode, false, readNumbersAsDouble);
logger.info("Filters Applied : " + filters);
logger.info("Fields Selected :" + fields);
cursor = collection.find(filters, fields);
http://git-wip-us.apache.org/repos/asf/drill/blob/4fa02c4c/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
index 1591079..be8c7a0 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
@@ -125,8 +125,13 @@ public interface ExecConstants {
public static final String FILESYSTEM_PARTITION_COLUMN_LABEL = "drill.exec.storage.file.partition.column.label";
public static final OptionValidator FILESYSTEM_PARTITION_COLUMN_LABEL_VALIDATOR = new StringValidator(FILESYSTEM_PARTITION_COLUMN_LABEL, "dir");
+ public static String JSON_READ_NUMBERS_AS_DOUBLE = "store.json.read_numbers_as_double";
+ public static OptionValidator JSON_READ_NUMBERS_AS_DOUBLE_VALIDATOR = new BooleanValidator(JSON_READ_NUMBERS_AS_DOUBLE, false);
+
public static String MONGO_ALL_TEXT_MODE = "store.mongo.all_text_mode";
public static OptionValidator MONGO_READER_ALL_TEXT_MODE_VALIDATOR = new BooleanValidator(MONGO_ALL_TEXT_MODE, false);
+ public static String MONGO_READER_READ_NUMBERS_AS_DOUBLE = "store.mongo.read_numbers_as_double";
+ public static OptionValidator MONGO_READER_READ_NUMBERS_AS_DOUBLE_VALIDATOR = new BooleanValidator(MONGO_READER_READ_NUMBERS_AS_DOUBLE, false);
public static final String SLICE_TARGET = "planner.slice_target";
public static final long SLICE_TARGET_DEFAULT = 100000l;
http://git-wip-us.apache.org/repos/asf/drill/blob/4fa02c4c/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/JsonConvertFrom.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/JsonConvertFrom.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/JsonConvertFrom.java
index 3be2c9d..bea6e0f 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/JsonConvertFrom.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/JsonConvertFrom.java
@@ -50,7 +50,7 @@ public class JsonConvertFrom {
@Output ComplexWriter writer;
public void setup(){
- jsonReader = new org.apache.drill.exec.vector.complex.fn.JsonReader(buffer, false, false);
+ jsonReader = new org.apache.drill.exec.vector.complex.fn.JsonReader(buffer, false, false, true /* read numbers as doubles */);
}
public void eval(){
@@ -76,7 +76,7 @@ public class JsonConvertFrom {
@Output ComplexWriter writer;
public void setup(){
- jsonReader = new org.apache.drill.exec.vector.complex.fn.JsonReader(buffer, false, false);
+ jsonReader = new org.apache.drill.exec.vector.complex.fn.JsonReader(buffer, false, false, true /* read numbers as doubles */);
}
public void eval(){
http://git-wip-us.apache.org/repos/asf/drill/blob/4fa02c4c/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
index 5dff828..07ad328 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
@@ -80,8 +80,10 @@ public class SystemOptionManager extends BaseOptionManager {
ExecConstants.PARQUET_RECORD_READER_IMPLEMENTATION_VALIDATOR,
ExecConstants.JSON_READER_ALL_TEXT_MODE_VALIDATOR,
ExecConstants.JSON_EXTENDED_TYPES,
+ ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE_VALIDATOR,
ExecConstants.FILESYSTEM_PARTITION_COLUMN_LABEL_VALIDATOR,
ExecConstants.MONGO_READER_ALL_TEXT_MODE_VALIDATOR,
+ ExecConstants.MONGO_READER_READ_NUMBERS_AS_DOUBLE_VALIDATOR,
ExecConstants.SLICE_TARGET_OPTION,
ExecConstants.AFFINITY_FACTOR,
ExecConstants.MAX_WIDTH_GLOBAL,
http://git-wip-us.apache.org/repos/asf/drill/blob/4fa02c4c/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java
index 554c633..3d789eb 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java
@@ -58,6 +58,7 @@ public class JSONRecordReader extends AbstractRecordReader {
private final FragmentContext fragmentContext;
private OperatorContext operatorContext;
private final boolean enableAllTextMode;
+ private final boolean readNumbersAsDouble;
/**
* Create a JSON Record Reader that uses a file based input stream.
@@ -75,7 +76,7 @@ public class JSONRecordReader extends AbstractRecordReader {
/**
* Create a new JSON Record Reader that uses a in memory materialized JSON stream.
* @param fragmentContext
- * @param inputPath
+ * @param embeddedContent
* @param fileSystem
* @param columns
* @throws OutOfMemoryException
@@ -105,6 +106,7 @@ public class JSONRecordReader extends AbstractRecordReader {
// only enable all text mode if we aren't using embedded content mode.
this.enableAllTextMode = embeddedContent == null && fragmentContext.getOptions().getOption(ExecConstants.JSON_READER_ALL_TEXT_MODE_VALIDATOR);
+ this.readNumbersAsDouble = fragmentContext.getOptions().getOption(ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE).bool_val;
setColumns(columns);
}
@@ -120,7 +122,7 @@ public class JSONRecordReader extends AbstractRecordReader {
if (isSkipQuery()) {
this.jsonReader = new CountingJsonReader(fragmentContext.getManagedBuffer());
} else {
- this.jsonReader = new JsonReader(fragmentContext.getManagedBuffer(), ImmutableList.copyOf(getColumns()), enableAllTextMode, true);
+ this.jsonReader = new JsonReader(fragmentContext.getManagedBuffer(), ImmutableList.copyOf(getColumns()), enableAllTextMode, true, readNumbersAsDouble);
}
setupParser();
}catch(final Exception e){
http://git-wip-us.apache.org/repos/asf/drill/blob/4fa02c4c/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java
index 696fe7a..095d8c6 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java
@@ -55,6 +55,7 @@ public class JsonReader extends BaseJsonProcessor {
private final MapVectorOutput mapOutput;
private final ListVectorOutput listOutput;
private final boolean extended = true;
+ private final boolean readNumbersAsDouble;
/**
* Describes whether or not this reader can unwrap a single root array record and treat it like a set of distinct records.
@@ -72,11 +73,11 @@ public class JsonReader extends BaseJsonProcessor {
private FieldSelection selection;
- public JsonReader(DrillBuf managedBuf, boolean allTextMode, boolean skipOuterList) {
- this(managedBuf, GroupScan.ALL_COLUMNS, allTextMode, skipOuterList);
+ public JsonReader(DrillBuf managedBuf, boolean allTextMode, boolean skipOuterList, boolean readNumbersAsDouble) {
+ this(managedBuf, GroupScan.ALL_COLUMNS, allTextMode, skipOuterList, readNumbersAsDouble);
}
- public JsonReader(DrillBuf managedBuf, List<SchemaPath> columns, boolean allTextMode, boolean skipOuterList) {
+ public JsonReader(DrillBuf managedBuf, List<SchemaPath> columns, boolean allTextMode, boolean skipOuterList, boolean readNumbersAsDouble) {
super(managedBuf);
assert Preconditions.checkNotNull(columns).size() > 0 : "json record reader requires at least a column";
this.selection = FieldSelection.getFieldSelection(columns);
@@ -87,6 +88,7 @@ public class JsonReader extends BaseJsonProcessor {
this.mapOutput = new MapVectorOutput(workingBuffer);
this.listOutput = new ListVectorOutput(workingBuffer);
this.currentFieldName="<none>";
+ this.readNumbersAsDouble = readNumbersAsDouble;
}
@Override
@@ -330,7 +332,12 @@ public class JsonReader extends BaseJsonProcessor {
atLeastOneWrite = true;
break;
case VALUE_NUMBER_INT:
- map.bigInt(fieldName).writeBigInt(parser.getLongValue());
+ if (this.readNumbersAsDouble) {
+ map.float8(fieldName).writeFloat8(parser.getDoubleValue());
+ }
+ else {
+ map.bigInt(fieldName).writeBigInt(parser.getLongValue());
+ }
atLeastOneWrite = true;
break;
case VALUE_STRING:
@@ -496,7 +503,12 @@ public class JsonReader extends BaseJsonProcessor {
atLeastOneWrite = true;
break;
case VALUE_NUMBER_INT:
- list.bigInt().writeBigInt(parser.getLongValue());
+ if (this.readNumbersAsDouble) {
+ list.float8().writeFloat8(parser.getDoubleValue());
+ }
+ else {
+ list.bigInt().writeBigInt(parser.getLongValue());
+ }
atLeastOneWrite = true;
break;
case VALUE_STRING:
http://git-wip-us.apache.org/repos/asf/drill/blob/4fa02c4c/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/TestJsonRecordReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/TestJsonRecordReader.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/TestJsonRecordReader.java
index 27631c3..bb1af9e 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/TestJsonRecordReader.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/TestJsonRecordReader.java
@@ -24,6 +24,7 @@ import org.junit.Test;
import org.junit.Assert;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
public class TestJsonRecordReader extends BaseTestQuery{
static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TestJsonRecordReader.class);
@@ -110,4 +111,47 @@ public class TestJsonRecordReader extends BaseTestQuery{
.go();
}
+ @Test
+ public void testMixedNumberTypes() throws Exception {
+ try {
+ testBuilder()
+ .sqlQuery("select * from cp.`jsoninput/mixed_number_types.json`")
+ .unOrdered()
+ .jsonBaselineFile("jsoninput/mixed_number_types.json")
+ .build().run();
+ } catch (Exception ex) {
+ assertTrue(ex.getMessage().contains("DATA_READ ERROR: Error parsing JSON - You tried to write a BigInt type when you are using a ValueWriter of type NullableFloat8WriterImpl."));
+ // this indicates successful completion of the test
+ return;
+ }
+ throw new Exception("Mixed number types verification failed, expected failure on conflicting number types.");
+ }
+
+ @Test
+ public void testMixedNumberTypesInAllTextMode() throws Exception {
+ testNoResult("alter session set `store.json.all_text_mode`= true");
+ testBuilder()
+ .sqlQuery("select * from cp.`jsoninput/mixed_number_types.json`")
+ .unOrdered()
+ .baselineColumns("a")
+ .baselineValues("5.2")
+ .baselineValues("6")
+ .build().run();
+ }
+
+ @Test
+ public void testMixedNumberTypesWhenReadingNumbersAsDouble() throws Exception {
+ try {
+ testNoResult("alter session set `store.json.read_numbers_as_double`= true");
+ testBuilder()
+ .sqlQuery("select * from cp.`jsoninput/mixed_number_types.json`")
+ .unOrdered()
+ .baselineColumns("a")
+ .baselineValues(5.2D)
+ .baselineValues(6D)
+ .build().run();
+ } finally {
+ testNoResult("alter session set `store.json.read_numbers_as_double`= false");
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/drill/blob/4fa02c4c/exec/java-exec/src/test/resources/jsoninput/mixed_number_types.json
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/jsoninput/mixed_number_types.json b/exec/java-exec/src/test/resources/jsoninput/mixed_number_types.json
new file mode 100644
index 0000000..699ad0b
--- /dev/null
+++ b/exec/java-exec/src/test/resources/jsoninput/mixed_number_types.json
@@ -0,0 +1,2 @@
+{ "a": 5.2 }
+{ "a": 6 }
\ No newline at end of file