You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by sm...@apache.org on 2015/12/02 11:12:56 UTC
drill git commit: DRILL-4108: Handle non existing cols for query w
extractHeader
Repository: drill
Updated Branches:
refs/heads/master e3f331a28 -> 36bd63786
DRILL-4108: Handle non existing cols for query w extractHeader
Closes #269
Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/36bd6378
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/36bd6378
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/36bd6378
Branch: refs/heads/master
Commit: 36bd6378614e174d6302f70da47a075d1acbb59d
Parents: e3f331a
Author: Abhi P <ab...@dremio.com>
Authored: Tue Nov 17 18:14:18 2015 -0800
Committer: Steven Phillips <sm...@apache.org>
Committed: Wed Dec 2 02:11:25 2015 -0800
----------------------------------------------------------------------
.../compliant/CompliantTextRecordReader.java | 3 ++
.../easy/text/compliant/FieldVarCharOutput.java | 27 ++++++++----
.../drill/exec/store/text/TestCsvHeader.java | 43 ++++++++++++++++++++
.../resources/bootstrap-storage-plugins.json | 7 ++++
.../resources/store/text/data/cars.csvh-test | 6 +++
.../resources/store/text/data/d2/cars1.csvh | 6 +++
.../resources/store/text/data/d2/cars2.csvh | 5 +++
pom.xml | 1 +
8 files changed, 91 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/drill/blob/36bd6378/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
index f0363da..f6dab89 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
@@ -146,6 +146,9 @@ public class CompliantTextRecordReader extends AbstractRecordReader {
assert (settings.isHeaderExtractionEnabled());
assert (oContext != null);
+ // don't skip header in case skipFirstLine is set true
+ settings.setSkipFirstLine(false);
+
// setup Output using OutputMutator
// we should use a separate output mutator to avoid reshaping query output with header data
HeaderOutputMutator hOutputMutator = new HeaderOutputMutator();
http://git-wip-us.apache.org/repos/asf/drill/blob/36bd6378/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/FieldVarCharOutput.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/FieldVarCharOutput.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/FieldVarCharOutput.java
index f86d40f..494c593 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/FieldVarCharOutput.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/FieldVarCharOutput.java
@@ -76,11 +76,11 @@ class FieldVarCharOutput extends TextOutput {
public FieldVarCharOutput(OutputMutator outputMutator, String [] fieldNames, Collection<SchemaPath> columns, boolean isStarQuery) throws SchemaChangeException {
int totalFields = fieldNames.length;
- this.selectedFields = new boolean[totalFields];
- this.vectors = new VarCharVector[totalFields];
+ List<String> outputColumns = new ArrayList<>(Arrays.asList(fieldNames));
if (isStarQuery) {
maxField = totalFields - 1;
+ this.selectedFields = new boolean[totalFields];
Arrays.fill(selectedFields, true);
} else {
List<Integer> columnIds = new ArrayList<Integer>();
@@ -90,25 +90,38 @@ class FieldVarCharOutput extends TextOutput {
for (SchemaPath path : columns) {
pathStr = path.getRootSegment().getPath();
if (pathStr.equals(COL_NAME) && path.getRootSegment().getChild() != null) {
- //TODO: support both field names and columns index in predicate pushdown
- index = path.getRootSegment().getChild().getArraySegment().getIndex();
+ //TODO: support both field names and columns index along with predicate pushdown
+ throw UserException
+ .unsupportedError()
+ .message("With extractHeader enabled, only header names are supported")
+ .addContext("column name", pathStr)
+ .addContext("column index", path.getRootSegment().getChild())
+ .build(logger);
} else {
- index = Arrays.asList(fieldNames).indexOf(pathStr);
+ index = outputColumns.indexOf(pathStr);
+ if (index < 0) {
+ // found col that is not a part of fieldNames, add it
+ // this col might be part of some another scanner
+ index = totalFields++;
+ outputColumns.add(pathStr);
+ }
}
- assert index >= 0 && index < totalFields : "Invalid column index encountered";
columnIds.add(index);
}
Collections.sort(columnIds);
+ this.selectedFields = new boolean[totalFields];
for(Integer i : columnIds) {
selectedFields[i] = true;
maxField = i;
}
}
+ this.vectors = new VarCharVector[totalFields];
+
for (int i = 0; i <= maxField; i++) {
if (selectedFields[i]) {
- MaterializedField field = MaterializedField.create(fieldNames[i], Types.required(TypeProtos.MinorType.VARCHAR));
+ MaterializedField field = MaterializedField.create(outputColumns.get(i), Types.required(TypeProtos.MinorType.VARCHAR));
this.vectors[i] = outputMutator.addField(field, VarCharVector.class);
}
}
http://git-wip-us.apache.org/repos/asf/drill/blob/36bd6378/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java
index 5dcc9a8..92aa02b 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java
@@ -119,6 +119,49 @@ public class TestCsvHeader extends BaseTestQuery{
validateResults (batches, expectedOutput);
}
+ @Test //DRILL-4108
+ public void testCsvHeaderNonExistingColumn() throws Exception {
+ String query = String.format("select `Year`, Model, Category from dfs_test.`%s` where Make = 'Chevy'", root);
+ List<QueryDataBatch> batches = testSqlWithResults(query);
+
+ String expectedOutput = "Year|Model|Category\n" +
+ "1999|Venture \"Extended Edition\"|\n" +
+ "1999|Venture \"Extended Edition, Very Large\"|\n";
+
+ validateResults (batches, expectedOutput);
+ }
+
+ @Test //DRILL-4108
+ public void testCsvHeaderMismatch() throws Exception {
+ String ddir = FileUtils.getResourceAsFile("/store/text/data/d2").toURI().toString();
+ String query = String.format("select `Year`, Model, Category from dfs_test.`%s` where Make = 'Chevy'", ddir);
+ List<QueryDataBatch> batches = testSqlWithResults(query);
+ // double header is unique to this test framework, doesn't happen with sqlline
+ String expectedOutput = "Year|Model|Category\n" +
+ "1999|Venture \"Extended Edition\"|\n" +
+ "1999|Venture \"Extended Edition, Very Large\"|\n" +
+ "Year|Model|Category\n" +
+ "1999||Venture \"Extended Edition\"\n" +
+ "1999||Venture \"Extended Edition, Very Large\"\n";
+
+ validateResults (batches, expectedOutput);
+ }
+
+ @Test //DRILL-4108
+ public void testCsvHeaderSkipFirstLine() throws Exception {
+ // test that header is not skipped when skipFirstLine is true
+ // testing by defining new format plugin with skipFirstLine set to true and diff file extension
+ String dfile = FileUtils.getResourceAsFile("/store/text/data/cars.csvh-test").toURI().toString();
+ String query = String.format("select `Year`, Model from dfs_test.`%s` where Make = 'Chevy'", dfile);
+ List<QueryDataBatch> batches = testSqlWithResults(query);
+
+ String expectedOutput = "Year|Model\n" +
+ "1999|Venture \"Extended Edition\"\n" +
+ "1999|Venture \"Extended Edition, Very Large\"\n";
+
+ validateResults (batches, expectedOutput);
+ }
+
private void validateResults (List<QueryDataBatch> batches, String expectedOutput) throws SchemaChangeException {
String actualOutput = getResultString(batches, OUTPUT_DELIMITER);
//for your and machine's eyes
http://git-wip-us.apache.org/repos/asf/drill/blob/36bd6378/exec/java-exec/src/test/resources/bootstrap-storage-plugins.json
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/bootstrap-storage-plugins.json b/exec/java-exec/src/test/resources/bootstrap-storage-plugins.json
index 76f3116..ec840a6 100644
--- a/exec/java-exec/src/test/resources/bootstrap-storage-plugins.json
+++ b/exec/java-exec/src/test/resources/bootstrap-storage-plugins.json
@@ -57,6 +57,13 @@
extensions: [ "csvh" ],
delimiter: ",",
extractHeader: true
+ },
+ "csvh-test" : {
+ type: "text",
+ extensions: [ "csvh-test" ],
+ delimiter: ",",
+ extractHeader: true,
+ skipFirstLine: true
}
}
}
http://git-wip-us.apache.org/repos/asf/drill/blob/36bd6378/exec/java-exec/src/test/resources/store/text/data/cars.csvh-test
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/store/text/data/cars.csvh-test b/exec/java-exec/src/test/resources/store/text/data/cars.csvh-test
new file mode 100644
index 0000000..37b82a0
--- /dev/null
+++ b/exec/java-exec/src/test/resources/store/text/data/cars.csvh-test
@@ -0,0 +1,6 @@
+Year,Make,Model,Description,Price
+1997,Ford,E350,"ac, abs, moon",3000.00
+1999,Chevy,"Venture ""Extended Edition""","",4900.00
+1999,Chevy,"Venture ""Extended Edition, Very Large""",,5000.00
+1996,Jeep,Grand Cherokee,"MUST SELL!
+air, moon roof, loaded",4799.00
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/drill/blob/36bd6378/exec/java-exec/src/test/resources/store/text/data/d2/cars1.csvh
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/store/text/data/d2/cars1.csvh b/exec/java-exec/src/test/resources/store/text/data/d2/cars1.csvh
new file mode 100644
index 0000000..37b82a0
--- /dev/null
+++ b/exec/java-exec/src/test/resources/store/text/data/d2/cars1.csvh
@@ -0,0 +1,6 @@
+Year,Make,Model,Description,Price
+1997,Ford,E350,"ac, abs, moon",3000.00
+1999,Chevy,"Venture ""Extended Edition""","",4900.00
+1999,Chevy,"Venture ""Extended Edition, Very Large""",,5000.00
+1996,Jeep,Grand Cherokee,"MUST SELL!
+air, moon roof, loaded",4799.00
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/drill/blob/36bd6378/exec/java-exec/src/test/resources/store/text/data/d2/cars2.csvh
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/store/text/data/d2/cars2.csvh b/exec/java-exec/src/test/resources/store/text/data/d2/cars2.csvh
new file mode 100644
index 0000000..4d6d947
--- /dev/null
+++ b/exec/java-exec/src/test/resources/store/text/data/d2/cars2.csvh
@@ -0,0 +1,5 @@
+Year,Make,Category,Description,Price
+1997,Ford,E350,"ac, abs, moon",3000.00
+1999,Chevy,"Venture ""Extended Edition""","",4900.00
+1999,Chevy,"Venture ""Extended Edition, Very Large""",,5000.00
+1996,Jeep,Grand Cherokee,"MUST SELL! air, moon roof, loaded",4799.00
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/drill/blob/36bd6378/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index b2f2507..9876351 100644
--- a/pom.xml
+++ b/pom.xml
@@ -183,6 +183,7 @@
<exclude>**/git.properties</exclude>
<exclude>**/*.csv</exclude>
<exclude>**/*.csvh</exclude>
+ <exclude>**/*.csvh-test</exclude>
<exclude>**/*.tsv</exclude>
<exclude>**/*.txt</exclude>
<exclude>**/*.ssv</exclude>