You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by sm...@apache.org on 2015/12/02 11:12:56 UTC

drill git commit: DRILL-4108: Handle non existing cols for query w extractHeader

Repository: drill
Updated Branches:
  refs/heads/master e3f331a28 -> 36bd63786


DRILL-4108: Handle non existing cols for query w extractHeader

Closes #269


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/36bd6378
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/36bd6378
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/36bd6378

Branch: refs/heads/master
Commit: 36bd6378614e174d6302f70da47a075d1acbb59d
Parents: e3f331a
Author: Abhi P <ab...@dremio.com>
Authored: Tue Nov 17 18:14:18 2015 -0800
Committer: Steven Phillips <sm...@apache.org>
Committed: Wed Dec 2 02:11:25 2015 -0800

----------------------------------------------------------------------
 .../compliant/CompliantTextRecordReader.java    |  3 ++
 .../easy/text/compliant/FieldVarCharOutput.java | 27 ++++++++----
 .../drill/exec/store/text/TestCsvHeader.java    | 43 ++++++++++++++++++++
 .../resources/bootstrap-storage-plugins.json    |  7 ++++
 .../resources/store/text/data/cars.csvh-test    |  6 +++
 .../resources/store/text/data/d2/cars1.csvh     |  6 +++
 .../resources/store/text/data/d2/cars2.csvh     |  5 +++
 pom.xml                                         |  1 +
 8 files changed, 91 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/36bd6378/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
index f0363da..f6dab89 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
@@ -146,6 +146,9 @@ public class CompliantTextRecordReader extends AbstractRecordReader {
     assert (settings.isHeaderExtractionEnabled());
     assert (oContext != null);
 
+    // don't skip header in case skipFirstLine is set true
+    settings.setSkipFirstLine(false);
+
     // setup Output using OutputMutator
     // we should use a separate output mutator to avoid reshaping query output with header data
     HeaderOutputMutator hOutputMutator = new HeaderOutputMutator();

http://git-wip-us.apache.org/repos/asf/drill/blob/36bd6378/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/FieldVarCharOutput.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/FieldVarCharOutput.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/FieldVarCharOutput.java
index f86d40f..494c593 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/FieldVarCharOutput.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/FieldVarCharOutput.java
@@ -76,11 +76,11 @@ class FieldVarCharOutput extends TextOutput {
   public FieldVarCharOutput(OutputMutator outputMutator, String [] fieldNames, Collection<SchemaPath> columns, boolean isStarQuery) throws SchemaChangeException {
 
     int totalFields = fieldNames.length;
-    this.selectedFields = new boolean[totalFields];
-    this.vectors = new VarCharVector[totalFields];
+    List<String> outputColumns = new ArrayList<>(Arrays.asList(fieldNames));
 
     if (isStarQuery) {
       maxField = totalFields - 1;
+      this.selectedFields = new boolean[totalFields];
       Arrays.fill(selectedFields, true);
     } else {
       List<Integer> columnIds = new ArrayList<Integer>();
@@ -90,25 +90,38 @@ class FieldVarCharOutput extends TextOutput {
       for (SchemaPath path : columns) {
         pathStr = path.getRootSegment().getPath();
         if (pathStr.equals(COL_NAME) && path.getRootSegment().getChild() != null) {
-          //TODO: support both field names and columns index in predicate pushdown
-          index = path.getRootSegment().getChild().getArraySegment().getIndex();
+          //TODO: support both field names and columns index along with predicate pushdown
+          throw UserException
+              .unsupportedError()
+              .message("With extractHeader enabled, only header names are supported")
+              .addContext("column name", pathStr)
+              .addContext("column index", path.getRootSegment().getChild())
+              .build(logger);
         } else {
-          index = Arrays.asList(fieldNames).indexOf(pathStr);
+          index = outputColumns.indexOf(pathStr);
+          if (index < 0) {
+            // found col that is not a part of fieldNames, add it
+            // this col might be part of some another scanner
+            index = totalFields++;
+            outputColumns.add(pathStr);
+          }
         }
-        assert index >= 0 && index < totalFields : "Invalid column index encountered";
         columnIds.add(index);
       }
       Collections.sort(columnIds);
 
+      this.selectedFields = new boolean[totalFields];
       for(Integer i : columnIds) {
         selectedFields[i] = true;
         maxField = i;
       }
     }
 
+    this.vectors = new VarCharVector[totalFields];
+
     for (int i = 0; i <= maxField; i++) {
       if (selectedFields[i]) {
-        MaterializedField field = MaterializedField.create(fieldNames[i], Types.required(TypeProtos.MinorType.VARCHAR));
+        MaterializedField field = MaterializedField.create(outputColumns.get(i), Types.required(TypeProtos.MinorType.VARCHAR));
         this.vectors[i] = outputMutator.addField(field, VarCharVector.class);
       }
     }

http://git-wip-us.apache.org/repos/asf/drill/blob/36bd6378/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java
index 5dcc9a8..92aa02b 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java
@@ -119,6 +119,49 @@ public class TestCsvHeader extends BaseTestQuery{
     validateResults (batches, expectedOutput);
   }
 
+  @Test //DRILL-4108
+  public void testCsvHeaderNonExistingColumn() throws Exception {
+    String query = String.format("select `Year`, Model, Category from dfs_test.`%s` where Make = 'Chevy'", root);
+    List<QueryDataBatch> batches = testSqlWithResults(query);
+
+    String expectedOutput = "Year|Model|Category\n" +
+        "1999|Venture \"Extended Edition\"|\n" +
+        "1999|Venture \"Extended Edition, Very Large\"|\n";
+
+    validateResults (batches, expectedOutput);
+  }
+
+  @Test //DRILL-4108
+  public void testCsvHeaderMismatch() throws Exception {
+    String ddir = FileUtils.getResourceAsFile("/store/text/data/d2").toURI().toString();
+    String query = String.format("select `Year`, Model, Category from dfs_test.`%s` where Make = 'Chevy'", ddir);
+    List<QueryDataBatch> batches = testSqlWithResults(query);
+    // double header is unique to this test framework, doesn't happen with sqlline
+    String expectedOutput = "Year|Model|Category\n" +
+        "1999|Venture \"Extended Edition\"|\n" +
+        "1999|Venture \"Extended Edition, Very Large\"|\n" +
+        "Year|Model|Category\n" +
+        "1999||Venture \"Extended Edition\"\n" +
+        "1999||Venture \"Extended Edition, Very Large\"\n";
+
+    validateResults (batches, expectedOutput);
+  }
+
+  @Test //DRILL-4108
+  public void testCsvHeaderSkipFirstLine() throws Exception {
+    // test that header is not skipped when skipFirstLine is true
+    // testing by defining new format plugin with skipFirstLine set to true and diff file extension
+    String dfile = FileUtils.getResourceAsFile("/store/text/data/cars.csvh-test").toURI().toString();
+    String query = String.format("select `Year`, Model from dfs_test.`%s` where Make = 'Chevy'", dfile);
+    List<QueryDataBatch> batches = testSqlWithResults(query);
+
+    String expectedOutput = "Year|Model\n" +
+        "1999|Venture \"Extended Edition\"\n" +
+        "1999|Venture \"Extended Edition, Very Large\"\n";
+
+    validateResults (batches, expectedOutput);
+  }
+
   private void validateResults (List<QueryDataBatch> batches, String expectedOutput) throws SchemaChangeException {
     String actualOutput = getResultString(batches, OUTPUT_DELIMITER);
     //for your and machine's eyes

http://git-wip-us.apache.org/repos/asf/drill/blob/36bd6378/exec/java-exec/src/test/resources/bootstrap-storage-plugins.json
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/bootstrap-storage-plugins.json b/exec/java-exec/src/test/resources/bootstrap-storage-plugins.json
index 76f3116..ec840a6 100644
--- a/exec/java-exec/src/test/resources/bootstrap-storage-plugins.json
+++ b/exec/java-exec/src/test/resources/bootstrap-storage-plugins.json
@@ -57,6 +57,13 @@
           extensions: [ "csvh" ],
           delimiter: ",",
           extractHeader: true
+        },
+        "csvh-test" : {
+          type: "text",
+          extensions: [ "csvh-test" ],
+          delimiter: ",",
+          extractHeader: true,
+          skipFirstLine: true
         }
       }
     }

http://git-wip-us.apache.org/repos/asf/drill/blob/36bd6378/exec/java-exec/src/test/resources/store/text/data/cars.csvh-test
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/store/text/data/cars.csvh-test b/exec/java-exec/src/test/resources/store/text/data/cars.csvh-test
new file mode 100644
index 0000000..37b82a0
--- /dev/null
+++ b/exec/java-exec/src/test/resources/store/text/data/cars.csvh-test
@@ -0,0 +1,6 @@
+Year,Make,Model,Description,Price
+1997,Ford,E350,"ac, abs, moon",3000.00
+1999,Chevy,"Venture ""Extended Edition""","",4900.00
+1999,Chevy,"Venture ""Extended Edition, Very Large""",,5000.00
+1996,Jeep,Grand Cherokee,"MUST SELL!
+air, moon roof, loaded",4799.00
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/drill/blob/36bd6378/exec/java-exec/src/test/resources/store/text/data/d2/cars1.csvh
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/store/text/data/d2/cars1.csvh b/exec/java-exec/src/test/resources/store/text/data/d2/cars1.csvh
new file mode 100644
index 0000000..37b82a0
--- /dev/null
+++ b/exec/java-exec/src/test/resources/store/text/data/d2/cars1.csvh
@@ -0,0 +1,6 @@
+Year,Make,Model,Description,Price
+1997,Ford,E350,"ac, abs, moon",3000.00
+1999,Chevy,"Venture ""Extended Edition""","",4900.00
+1999,Chevy,"Venture ""Extended Edition, Very Large""",,5000.00
+1996,Jeep,Grand Cherokee,"MUST SELL!
+air, moon roof, loaded",4799.00
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/drill/blob/36bd6378/exec/java-exec/src/test/resources/store/text/data/d2/cars2.csvh
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/store/text/data/d2/cars2.csvh b/exec/java-exec/src/test/resources/store/text/data/d2/cars2.csvh
new file mode 100644
index 0000000..4d6d947
--- /dev/null
+++ b/exec/java-exec/src/test/resources/store/text/data/d2/cars2.csvh
@@ -0,0 +1,5 @@
+Year,Make,Category,Description,Price
+1997,Ford,E350,"ac, abs, moon",3000.00
+1999,Chevy,"Venture ""Extended Edition""","",4900.00
+1999,Chevy,"Venture ""Extended Edition, Very Large""",,5000.00
+1996,Jeep,Grand Cherokee,"MUST SELL! air, moon roof, loaded",4799.00
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/drill/blob/36bd6378/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index b2f2507..9876351 100644
--- a/pom.xml
+++ b/pom.xml
@@ -183,6 +183,7 @@
             <exclude>**/git.properties</exclude>
             <exclude>**/*.csv</exclude>
             <exclude>**/*.csvh</exclude>
+            <exclude>**/*.csvh-test</exclude>
             <exclude>**/*.tsv</exclude>
             <exclude>**/*.txt</exclude>
             <exclude>**/*.ssv</exclude>