You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by pa...@apache.org on 2015/07/27 23:30:20 UTC
drill git commit: DRILL-3537: Whe scanning files in ScanBatch,
ignore all the empty files before reach a non-empty file
Repository: drill
Updated Branches:
refs/heads/master 1b69869d9 -> a219f8784
DRILL-3537: Whe scanning files in ScanBatch, ignore all the empty files before reach a non-empty file
Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/a219f878
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/a219f878
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/a219f878
Branch: refs/heads/master
Commit: a219f8784c55ce3bc15b9bb3a19d7b33e4021c00
Parents: 1b69869
Author: Hsuan-Yi Chu <hs...@usc.edu>
Authored: Thu Jul 23 17:20:17 2015 -0700
Committer: Parth Chandra <pa...@apache.org>
Committed: Mon Jul 27 14:23:33 2015 -0700
----------------------------------------------------------------------
.../apache/drill/exec/physical/impl/ScanBatch.java | 12 +++++++++++-
.../exec/vector/complex/writer/TestJsonReader.java | 16 ++++++++++++++++
.../store/json/jsonDirectoryWithEmpyFile/a.json | 0
.../store/json/jsonDirectoryWithEmpyFile/b.json | 3 +++
4 files changed, 30 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/drill/blob/a219f878/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ScanBatch.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ScanBatch.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ScanBatch.java
index 6bf1280..4b91e1f 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ScanBatch.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ScanBatch.java
@@ -83,7 +83,7 @@ public class ScanBatch implements CloseableRecordBatch {
private String partitionColumnDesignator;
private boolean done = false;
private SchemaChangeCallBack callBack = new SchemaChangeCallBack();
-
+ private boolean hasReadNonEmptyFile = false;
public ScanBatch(PhysicalOperator subScanConfig, FragmentContext context, OperatorContext oContext,
Iterator<RecordReader> readers, List<String[]> partitionColumns, List<Integer> selectedPartitionColumns) throws ExecutionSetupException {
this.context = context;
@@ -186,6 +186,15 @@ public class ScanBatch implements CloseableRecordBatch {
return IterOutcome.NONE;
}
+ // If all the files we have read so far are just empty, the schema is not useful
+ if(!hasReadNonEmptyFile) {
+ container.clear();
+ for (ValueVector v : fieldVectorMap.values()) {
+ v.clear();
+ }
+ fieldVectorMap.clear();
+ }
+
currentReader.cleanup();
currentReader = readers.next();
partitionValues = partitionColumns.hasNext() ? partitionColumns.next() : null;
@@ -208,6 +217,7 @@ public class ScanBatch implements CloseableRecordBatch {
}
}
+ hasReadNonEmptyFile = true;
populatePartitionVectors();
// this is a slight misuse of this metric but it will allow Readers to report how many records they generated.
http://git-wip-us.apache.org/repos/asf/drill/blob/a219f878/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonReader.java b/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonReader.java
index 912a5f0..7d6c71c 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonReader.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonReader.java
@@ -296,6 +296,22 @@ public class TestJsonReader extends BaseTestQuery {
batchLoader.clear();
}
+ @Test
+ public void testJsonDirectoryWithEmptyFile() throws Exception {
+ String root = FileUtils.getResourceAsFile("/store/json/jsonDirectoryWithEmpyFile").toURI().toString();
+
+ String queryRightEmpty = String.format(
+ "select * from dfs_test.`%s`", root);
+
+ testBuilder()
+ .sqlQuery(queryRightEmpty)
+ .unOrdered()
+ .baselineColumns("a")
+ .baselineValues(1l)
+ .build()
+ .run();
+ }
+
private void testExistentColumns(RecordBatchLoader batchLoader) throws SchemaChangeException {
VectorWrapper<?> vw = batchLoader.getValueAccessorById(
RepeatedBigIntVector.class, //
http://git-wip-us.apache.org/repos/asf/drill/blob/a219f878/exec/java-exec/src/test/resources/store/json/jsonDirectoryWithEmpyFile/a.json
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/store/json/jsonDirectoryWithEmpyFile/a.json b/exec/java-exec/src/test/resources/store/json/jsonDirectoryWithEmpyFile/a.json
new file mode 100644
index 0000000..e69de29
http://git-wip-us.apache.org/repos/asf/drill/blob/a219f878/exec/java-exec/src/test/resources/store/json/jsonDirectoryWithEmpyFile/b.json
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/store/json/jsonDirectoryWithEmpyFile/b.json b/exec/java-exec/src/test/resources/store/json/jsonDirectoryWithEmpyFile/b.json
new file mode 100644
index 0000000..73d0e9c
--- /dev/null
+++ b/exec/java-exec/src/test/resources/store/json/jsonDirectoryWithEmpyFile/b.json
@@ -0,0 +1,3 @@
+{
+ a : 1
+}
\ No newline at end of file