You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@drill.apache.org by pa...@apache.org on 2017/01/14 01:47:44 UTC

[1/4] drill git commit: DRILL-4996: Parquet Date auto-correction is not working in auto-partitioned parquet files generated by drill-1.6

Repository: drill
Updated Branches:
  refs/heads/master ee399317a -> 34969583b


DRILL-4996: Parquet Date auto-correction is not working in auto-partitioned parquet files generated by drill-1.6

- Changed detection approach of corrupted date values for the case, when parquet files are generated by drill:
  the corruption status is determined by looking at the min/max values in the metadata;
- Appropriate refactoring of TestCorruptParquetDateCorrection.

This closes #687


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/eef3b3fb
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/eef3b3fb
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/eef3b3fb

Branch: refs/heads/master
Commit: eef3b3fb6f4e76e95510253d155d0659e387fc99
Parents: ee39931
Author: Vitalii Diravka <vi...@gmail.com>
Authored: Mon Dec 12 04:41:49 2016 +0000
Committer: Parth Chandra <pa...@apache.org>
Committed: Fri Jan 13 17:44:29 2017 -0800

----------------------------------------------------------------------
 .../store/parquet/ParquetReaderUtility.java     |  18 +-
 .../TestCorruptParquetDateCorrection.java       | 284 +++++++++----------
 ...t_dates_and_old_drill_parquet_writer.parquet | Bin 0 -> 4241 bytes
 3 files changed, 140 insertions(+), 162 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/eef3b3fb/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetReaderUtility.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetReaderUtility.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetReaderUtility.java
index b22e666..a94e220 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetReaderUtility.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetReaderUtility.java
@@ -195,26 +195,26 @@ public class ParquetReaderUtility {
 
     String createdBy = footer.getFileMetaData().getCreatedBy();
     String drillVersion = footer.getFileMetaData().getKeyValueMetaData().get(ParquetRecordWriter.DRILL_VERSION_PROPERTY);
-    String stringWriterVersion = footer.getFileMetaData().getKeyValueMetaData().get(ParquetRecordWriter.WRITER_VERSION_PROPERTY);
+    String writerVersionValue = footer.getFileMetaData().getKeyValueMetaData().get(ParquetRecordWriter.WRITER_VERSION_PROPERTY);
     // This flag can be present in parquet files which were generated with 1.9.0-SNAPSHOT and 1.9.0 drill versions.
     // If this flag is present it means that the version of the drill parquet writer is 2
     final String isDateCorrectFlag = "is.date.correct";
     String isDateCorrect = footer.getFileMetaData().getKeyValueMetaData().get(isDateCorrectFlag);
     if (drillVersion != null) {
       int writerVersion = 1;
-      if (stringWriterVersion != null) {
-        writerVersion = Integer.parseInt(stringWriterVersion);
+      if (writerVersionValue != null) {
+        writerVersion = Integer.parseInt(writerVersionValue);
       }
       else if (Boolean.valueOf(isDateCorrect)) {
         writerVersion = DRILL_WRITER_VERSION_STD_DATE_FORMAT;
       }
       return writerVersion >= DRILL_WRITER_VERSION_STD_DATE_FORMAT ? DateCorruptionStatus.META_SHOWS_NO_CORRUPTION
-          : DateCorruptionStatus.META_SHOWS_CORRUPTION;
+          // loop through parquet column metadata to find date columns, check for corrupt values
+          : checkForCorruptDateValuesInStatistics(footer, columns, autoCorrectCorruptDates);
     } else {
       // Possibly an old, un-migrated Drill file, check the column statistics to see if min/max values look corrupt
       // only applies if there is a date column selected
       if (createdBy == null || createdBy.equals("parquet-mr")) {
-        // loop through parquet column metadata to find date columns, check for corrupt values
         return checkForCorruptDateValuesInStatistics(footer, columns, autoCorrectCorruptDates);
       } else {
         // check the created by to see if it is a migrated Drill file
@@ -226,7 +226,7 @@ public class ParquetReaderUtility {
             SemanticVersion semVer = parsedCreatedByVersion.getSemanticVersion();
             String pre = semVer.pre + "";
             if (semVer.major == 1 && semVer.minor == 8 && semVer.patch == 1 && pre.contains("drill")) {
-              return DateCorruptionStatus.META_SHOWS_CORRUPTION;
+              return checkForCorruptDateValuesInStatistics(footer, columns, autoCorrectCorruptDates);
             }
           }
           // written by a tool that wasn't Drill, the dates are not corrupted
@@ -244,9 +244,9 @@ public class ParquetReaderUtility {
    * Detect corrupt date values by looking at the min/max values in the metadata.
    *
    * This should only be used when a file does not have enough metadata to determine if
-   * the data was written with an older version of Drill, or an external tool. Drill
-   * versions 1.3 and beyond should have enough metadata to confirm that the data was written
-   * by Drill.
+   * the data was written with an external tool or an older version of Drill
+   * ({@link org.apache.drill.exec.store.parquet.ParquetRecordWriter#WRITER_VERSION_PROPERTY} <
+   * {@link org.apache.drill.exec.store.parquet.ParquetReaderUtility#DRILL_WRITER_VERSION_STD_DATE_FORMAT})
    *
    * This method only checks the first Row Group, because Drill has only ever written
    * a single Row Group per file.

http://git-wip-us.apache.org/repos/asf/drill/blob/eef3b3fb/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestCorruptParquetDateCorrection.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestCorruptParquetDateCorrection.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestCorruptParquetDateCorrection.java
index 0ab247d..8cd1a85 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestCorruptParquetDateCorrection.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestCorruptParquetDateCorrection.java
@@ -17,6 +17,8 @@
  */
 package org.apache.drill.exec.physical.impl.writer;
 
+import static java.lang.String.format;
+
 import org.apache.drill.PlanTestBase;
 import org.apache.drill.TestBuilder;
 import org.apache.drill.common.util.TestTools;
@@ -37,10 +39,11 @@ import java.util.regex.Pattern;
  * Tests for compatibility reading old parquet files after date corruption
  * issue was fixed in DRILL-4203.
  *
- * Drill was writing non-standard dates into parquet files for all releases
- * before 1.9.0. The values have been read by Drill correctly by Drill, but
- * external tools like Spark reading the files will see corrupted values for
- * all dates that have been written by Drill.
+ * Drill could write non-standard dates into parquet files. This issue is related to
+ * all drill releases where {@link org.apache.drill.exec.store.parquet.ParquetRecordWriter#WRITER_VERSION_PROPERTY} <
+ * {@link org.apache.drill.exec.store.parquet.ParquetReaderUtility#DRILL_WRITER_VERSION_STD_DATE_FORMAT}
+ * The values have been read correctly by Drill, but external tools like Spark reading the files will see
+ * corrupted values for all dates that have been written by Drill.
  *
  * This change corrects the behavior of the Drill parquet writer to correctly
  * store dates in the format given in the parquet specification.
@@ -59,8 +62,7 @@ import java.util.regex.Pattern;
  * While the old behavior was a consistent shift into an unlikely range
  * to be used in a modern database (over 10,000 years in the future), these are still
  * valid date values. In the case where these may have been written into
- * files intentionally, and we cannot be certain from the metadata if Drill
- * produced the files, an option is included to turn off the auto-correction.
+ * files intentionally, an option is included to turn off the auto-correction.
  * Use of this option is assumed to be extremely unlikely, but it is included
  * for completeness.
  */
@@ -76,11 +78,8 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
   //    - one from the 0.6 version of Drill, before files had min/max statistics
   //        - detecting corrupt values must be deferred to actual data page reading
   //    - one from 1.4, where there is a proper created-by, but the corruption is present
-  private static final String MIXED_CORRUPTED_AND_CORRECTED_DATES_PATH =
+  private static final String MIXED_CORRUPTED_AND_CORRECT_DATES_PATH =
       "[WORKING_PATH]/src/test/resources/parquet/4203_corrupt_dates/mixed_drill_versions";
-  // partitioned with 1.4.0, date values are known to be corrupt
-  private static final String CORRUPTED_PARTITIONED_DATES_1_4_0_PATH =
-      "[WORKING_PATH]/src/test/resources/parquet/4203_corrupt_dates/partitioned_with_corruption_4203";
   // partitioned with 1.2.0, no certain metadata that these were written with Drill
   // the value will be checked to see that they look corrupt and they will be corrected
   // by default. Users can use the format plugin option autoCorrectCorruptDates to disable
@@ -88,9 +87,13 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
   // in the similar range as Drill's corrupt values
   private static final String CORRUPTED_PARTITIONED_DATES_1_2_PATH =
       "[WORKING_PATH]/src/test/resources/parquet/4203_corrupt_dates/partitioned_with_corruption_4203_1_2";
+  // partitioned with 1.4.0, no certain metadata regarding the date corruption status.
+  // The same detection approach of the corrupt date values as for the files partitioned with 1.2.0
+  private static final String CORRUPTED_PARTITIONED_DATES_1_4_0_PATH =
+      "[WORKING_PATH]/src/test/resources/parquet/4203_corrupt_dates/partitioned_with_corruption_4203";
   private static final String PARQUET_DATE_FILE_WITH_NULL_FILLED_COLS =
       "[WORKING_PATH]/src/test/resources/parquet/4203_corrupt_dates/null_date_cols_with_corruption_4203.parquet";
-  private static final String CORRECTED_PARTITIONED_DATES_1_9_PATH =
+  private static final String CORRECT_PARTITIONED_DATES_1_9_PATH =
       "[WORKING_PATH]/src/test/resources/parquet/4203_corrupt_dates/1_9_0_partitioned_no_corruption";
   private static final String VARCHAR_PARTITIONED =
       "[WORKING_PATH]/src/test/resources/parquet/4203_corrupt_dates/fewtypes_varcharpartition";
@@ -98,11 +101,13 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
       "[WORKING_PATH]/src/test/resources/parquet/4203_corrupt_dates/fewtypes_datepartition";
   private static final String EXCEPTION_WHILE_PARSING_CREATED_BY_META =
       "[WORKING_PATH]/src/test/resources/parquet/4203_corrupt_dates/hive1dot2_fewtypes_null";
+  private static final String CORRECT_DATES_1_6_0_PATH =
+      "[WORKING_PATH]/src/test/resources/parquet/4203_corrupt_dates/correct_dates_and_old_drill_parquet_writer.parquet";
+  private static final String PARTITIONED_1_2_FOLDER = "partitioned_with_corruption_4203_1_2";
+  private static final String MIXED_CORRUPTED_AND_CORRECT_PARTITIONED_FOLDER = "mixed_partitioned";
 
   private static FileSystem fs;
   private static Path path;
-  static String PARTITIONED_1_2_FOLDER = "partitioned_with_corruption_4203_1_2";
-  static String MIXED_CORRUPTED_AND_CORRECTED_PARTITIONED_FOLDER = "mixed_partitioned";
 
   @BeforeClass
   public static void initFs() throws Exception {
@@ -116,9 +121,9 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
     copyDirectoryIntoTempSpace(CORRUPTED_PARTITIONED_DATES_1_2_PATH);
     copyMetaDataCacheToTempReplacingInternalPaths("parquet/4203_corrupt_dates/drill.parquet.metadata_1_2.requires_replace.txt",
         PARTITIONED_1_2_FOLDER);
-    copyDirectoryIntoTempSpace(CORRUPTED_PARTITIONED_DATES_1_2_PATH, MIXED_CORRUPTED_AND_CORRECTED_PARTITIONED_FOLDER);
-    copyDirectoryIntoTempSpace(CORRECTED_PARTITIONED_DATES_1_9_PATH, MIXED_CORRUPTED_AND_CORRECTED_PARTITIONED_FOLDER);
-    copyDirectoryIntoTempSpace(CORRUPTED_PARTITIONED_DATES_1_4_0_PATH, MIXED_CORRUPTED_AND_CORRECTED_PARTITIONED_FOLDER);
+    copyDirectoryIntoTempSpace(CORRUPTED_PARTITIONED_DATES_1_2_PATH, MIXED_CORRUPTED_AND_CORRECT_PARTITIONED_FOLDER);
+    copyDirectoryIntoTempSpace(CORRECT_PARTITIONED_DATES_1_9_PATH, MIXED_CORRUPTED_AND_CORRECT_PARTITIONED_FOLDER);
+    copyDirectoryIntoTempSpace(CORRUPTED_PARTITIONED_DATES_1_4_0_PATH, MIXED_CORRUPTED_AND_CORRECT_PARTITIONED_FOLDER);
   }
 
   /**
@@ -128,20 +133,20 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
    * in the case where we are certain correction is NOT needed. For more info see DRILL-4203.
    */
   @Test
-  public void testReadPartitionedOnCorrectedDates() throws Exception {
+  public void testReadPartitionedOnCorrectDates() throws Exception {
     try {
       for (String selection : new String[]{"*", "date_col"}) {
         // for sanity, try reading all partitions without a filter
         TestBuilder builder = testBuilder()
-            .sqlQuery("select " + selection + " from table(dfs.`" + CORRECTED_PARTITIONED_DATES_1_9_PATH + "`" +
-                "(type => 'parquet', autoCorrectCorruptDates => false))")
+            .sqlQuery("select %s from table(dfs.`%s` (type => 'parquet', autoCorrectCorruptDates => false))",
+                selection, CORRECT_PARTITIONED_DATES_1_9_PATH)
             .unOrdered()
             .baselineColumns("date_col");
-        addDateBaselineVals(builder);
+        addDateBaselineValues(builder);
         builder.go();
 
-        String query = "select " + selection + " from table(dfs.`" + CORRECTED_PARTITIONED_DATES_1_9_PATH + "` " +
-            "(type => 'parquet', autoCorrectCorruptDates => false))" + " where date_col = date '1970-01-01'";
+        String query = format("select %s from table(dfs.`%s` (type => 'parquet', autoCorrectCorruptDates => false))" +
+            " where date_col = date '1970-01-01'", selection, CORRECT_PARTITIONED_DATES_1_9_PATH);
         // verify that pruning is actually taking place
         testPlanMatchingPatterns(query, new String[]{"numFiles=1"}, null);
 
@@ -161,9 +166,7 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
   @Test
   public void testVarcharPartitionedReadWithCorruption() throws Exception {
     testBuilder()
-        .sqlQuery("select date_col from " +
-            "dfs.`" + VARCHAR_PARTITIONED + "`" +
-            "where length(varchar_col) = 12")
+        .sqlQuery("select date_col from dfs.`%s` where length(varchar_col) = 12", VARCHAR_PARTITIONED)
         .baselineColumns("date_col")
         .unOrdered()
         .baselineValues(new DateTime(2039, 4, 9, 0, 0))
@@ -174,24 +177,21 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
   @Test
   public void testDatePartitionedReadWithCorruption() throws Exception {
     testBuilder()
-        .sqlQuery("select date_col from " +
-            "dfs.`" + DATE_PARTITIONED + "`" +
-            "where date_col = '1999-04-08'")
+        .sqlQuery("select date_col from dfs.`%s` where date_col = '1999-04-08'", DATE_PARTITIONED)
         .baselineColumns("date_col")
         .unOrdered()
         .baselineValues(new DateTime(1999, 4, 8, 0, 0))
         .go();
 
-    String sql = "select date_col from dfs.`" + DATE_PARTITIONED + "` where date_col > '1999-04-08'";
-    testPlanMatchingPatterns(sql, new String[]{"numFiles=6"}, null);
+    String query = format("select date_col from dfs.`%s` where date_col > '1999-04-08'", DATE_PARTITIONED);
+    testPlanMatchingPatterns(query, new String[]{"numFiles=6"}, null);
   }
 
   @Test
   public void testCorrectDatesAndExceptionWhileParsingCreatedBy() throws Exception {
     testBuilder()
-        .sqlQuery("select date_col from " +
-            "dfs.`" + EXCEPTION_WHILE_PARSING_CREATED_BY_META +
-            "` where to_date(date_col, 'yyyy-mm-dd') < '1997-01-02'")
+        .sqlQuery("select date_col from dfs.`%s` where to_date(date_col, 'yyyy-mm-dd') < '1997-01-02'",
+            EXCEPTION_WHILE_PARSING_CREATED_BY_META)
         .baselineColumns("date_col")
         .unOrdered()
         .baselineValues(new DateTime(1996, 1, 29, 0, 0))
@@ -201,68 +201,34 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
         .go();
   }
 
-  /**
-   * Test reading a directory full of partitioned parquet files with dates, these files have a drill version
-   * number of 1.4.0 in their footers, so we can be certain they are corrupt. The option to disable the
-   * correction is passed, but it will not change the result in the case where we are certain correction
-   * is needed. For more info see DRILL-4203.
-   */
-  @Test
-  public void testReadPartitionedOnCorruptedDates() throws Exception {
-    try {
-      for (String selection : new String[]{"*", "date_col"}) {
-        // for sanity, try reading all partitions without a filter
-        TestBuilder builder = testBuilder()
-            .sqlQuery("select " + selection + " from table(dfs.`" + CORRUPTED_PARTITIONED_DATES_1_4_0_PATH + "`" +
-                "(type => 'parquet', autoCorrectCorruptDates => false))")
-            .unOrdered()
-            .baselineColumns("date_col");
-        addDateBaselineVals(builder);
-        builder.go();
-
-        String query = "select " + selection + " from table(dfs.`" + CORRUPTED_PARTITIONED_DATES_1_4_0_PATH + "` " +
-            "(type => 'parquet', autoCorrectCorruptDates => false))" + " where date_col = date '1970-01-01'";
-        // verify that pruning is actually taking place
-        testPlanMatchingPatterns(query, new String[]{"numFiles=1"}, null);
-
-        // read with a filter on the partition column
-        testBuilder()
-            .sqlQuery(query)
-            .unOrdered()
-            .baselineColumns("date_col")
-            .baselineValues(new DateTime(1970, 1, 1, 0, 0))
-            .go();
-      }
-    } finally {
-      test("alter session reset all");
-    }
-  }
 
   @Test
   public void testReadPartitionedOnCorruptedDates_UserDisabledCorrection() throws Exception {
     try {
       for (String selection : new String[]{"*", "date_col"}) {
-        // for sanity, try reading all partitions without a filter
-        TestBuilder builder = testBuilder()
-            .sqlQuery("select " + selection + " from table(dfs.`" + CORRUPTED_PARTITIONED_DATES_1_2_PATH + "`" +
-                "(type => 'parquet', autoCorrectCorruptDates => false))")
-            .unOrdered()
-            .baselineColumns("date_col");
-        addCorruptedDateBaselineVals(builder);
-        builder.go();
-
-        String query = "select " + selection + " from table(dfs.`" + CORRUPTED_PARTITIONED_DATES_1_2_PATH + "` " +
-            "(type => 'parquet', autoCorrectCorruptDates => false))" + " where date_col = cast('15334-03-17' as date)";
-        // verify that pruning is actually taking place
-        testPlanMatchingPatterns(query, new String[]{"numFiles=1"}, null);
-
-        // read with a filter on the partition column
-        testBuilder()
-            .sqlQuery(query)
-            .unOrdered()
-            .baselineColumns("date_col")
-            .baselineValues(new DateTime(15334, 03, 17, 0, 0))
-            .go();
+        for (String table : new String[]{CORRUPTED_PARTITIONED_DATES_1_2_PATH, CORRUPTED_PARTITIONED_DATES_1_4_0_PATH}) {
+          // for sanity, try reading all partitions without a filter
+          TestBuilder builder = testBuilder()
+              .sqlQuery("select %s from table(dfs.`%s` (type => 'parquet', autoCorrectCorruptDates => false))",
+                  selection, table)
+              .unOrdered()
+              .baselineColumns("date_col");
+          addCorruptedDateBaselineValues(builder);
+          builder.go();
+
+          String query = format("select %s from table(dfs.`%s` (type => 'parquet', " +
+              "autoCorrectCorruptDates => false)) where date_col = cast('15334-03-17' as date)", selection, table);
+          // verify that pruning is actually taking place
+          testPlanMatchingPatterns(query, new String[]{"numFiles=1"}, null);
+
+          // read with a filter on the partition column
+          testBuilder()
+              .sqlQuery(query)
+              .unOrdered()
+              .baselineColumns("date_col")
+              .baselineValues(new DateTime(15334, 3, 17, 0, 0))
+              .go();
+        }
       }
     } finally {
       test("alter session reset all");
@@ -270,29 +236,31 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
   }
 
   @Test
-  public void testCorruptValDetectionDuringPruning() throws Exception {
+  public void testCorruptValueDetectionDuringPruning() throws Exception {
     try {
       for (String selection : new String[]{"*", "date_col"}) {
-        // for sanity, try reading all partitions without a filter
-        TestBuilder builder = testBuilder()
-            .sqlQuery("select " + selection + " from dfs.`" + CORRUPTED_PARTITIONED_DATES_1_2_PATH + "`")
-            .unOrdered()
-            .baselineColumns("date_col");
-        addDateBaselineVals(builder);
-        builder.go();
-
-        String query = "select " + selection + " from dfs.`" + CORRUPTED_PARTITIONED_DATES_1_2_PATH + "`" +
-            " where date_col = date '1970-01-01'";
-        // verify that pruning is actually taking place
-        testPlanMatchingPatterns(query, new String[]{"numFiles=1"}, null);
-
-        // read with a filter on the partition column
-        testBuilder()
-            .sqlQuery(query)
-            .unOrdered()
-            .baselineColumns("date_col")
-            .baselineValues(new DateTime(1970, 1, 1, 0, 0))
-            .go();
+        for (String table : new String[]{CORRUPTED_PARTITIONED_DATES_1_2_PATH, CORRUPTED_PARTITIONED_DATES_1_4_0_PATH}) {
+          // for sanity, try reading all partitions without a filter
+          TestBuilder builder = testBuilder()
+              .sqlQuery("select %s from dfs.`%s`", selection, table)
+              .unOrdered()
+              .baselineColumns("date_col");
+          addDateBaselineValues(builder);
+          builder.go();
+
+          String query = format("select %s from dfs.`%s`" +
+              " where date_col = date '1970-01-01'", selection, table);
+          // verify that pruning is actually taking place
+          testPlanMatchingPatterns(query, new String[]{"numFiles=1"}, null);
+
+          // read with a filter on the partition column
+          testBuilder()
+              .sqlQuery(query)
+              .unOrdered()
+              .baselineColumns("date_col")
+              .baselineValues(new DateTime(1970, 1, 1, 0, 0))
+              .go();
+        }
       }
     } finally {
       test("alter session reset all");
@@ -313,8 +281,8 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
   @Test
   public void testReadCorruptDatesWithNullFilledColumns() throws Exception {
     testBuilder()
-        .sqlQuery("select null_dates_1, null_dates_2, non_existent_field, date_col from dfs.`" +
-            PARQUET_DATE_FILE_WITH_NULL_FILLED_COLS + "`")
+        .sqlQuery("select null_dates_1, null_dates_2, non_existent_field, date_col from dfs.`%s`",
+            PARQUET_DATE_FILE_WITH_NULL_FILLED_COLS)
         .unOrdered()
         .baselineColumns("null_dates_1", "null_dates_2", "non_existent_field", "date_col")
         .baselineValues(null, null, null, new DateTime(1970, 1, 1, 0, 0))
@@ -332,7 +300,7 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
     readFilesWithUserDisabledAutoCorrection();
 
     try {
-      test(String.format("alter session set %s = true", ExecConstants.PARQUET_NEW_RECORD_READER));
+      test("alter session set %s = true", ExecConstants.PARQUET_NEW_RECORD_READER);
       // read all of the types with the complex reader
       readFilesWithUserDisabledAutoCorrection();
     } finally {
@@ -352,34 +320,34 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
   @Test
   public void testReadMixedOldAndNewBothReaders() throws Exception {
     /// read once with the flat reader
-    readMixedCorruptedAndCorrectedDates();
+    readMixedCorruptedAndCorrectDates();
 
     try {
       // read all of the types with the complex reader
-      test(String.format("alter session set %s = true", ExecConstants.PARQUET_NEW_RECORD_READER));
-      readMixedCorruptedAndCorrectedDates();
+      test("alter session set %s = true", ExecConstants.PARQUET_NEW_RECORD_READER);
+      readMixedCorruptedAndCorrectDates();
     } finally {
-      test(String.format("alter session set %s = false", ExecConstants.PARQUET_NEW_RECORD_READER));
+      test("alter session set %s = false", ExecConstants.PARQUET_NEW_RECORD_READER);
     }
   }
 
   @Test
   public void testReadOldMetadataCacheFile() throws Exception {
     // for sanity, try reading all partitions without a filter
-    String query = "select date_col from dfs.`" + new Path(path, PARTITIONED_1_2_FOLDER) + "`";
+    String query = format("select date_col from dfs.`%s`", new Path(path, PARTITIONED_1_2_FOLDER));
     TestBuilder builder = testBuilder()
         .sqlQuery(query)
         .unOrdered()
         .baselineColumns("date_col");
-    addDateBaselineVals(builder);
+    addDateBaselineValues(builder);
     builder.go();
     testPlanMatchingPatterns(query, new String[]{"usedMetadataFile=true"}, null);
   }
 
   @Test
   public void testReadOldMetadataCacheFileWithPruning() throws Exception {
-    String query = "select date_col from dfs.`" + new Path(path, PARTITIONED_1_2_FOLDER) + "`" +
-        " where date_col = date '1970-01-01'";
+    String query = format("select date_col from dfs.`%s` where date_col = date '1970-01-01'",
+        new Path(path, PARTITIONED_1_2_FOLDER));
     // verify that pruning is actually taking place
     testPlanMatchingPatterns(query, new String[]{"numFiles=1", "usedMetadataFile=true"}, null);
 
@@ -396,15 +364,16 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
   public void testReadOldMetadataCacheFileOverrideCorrection() throws Exception {
     // for sanity, try reading all partitions without a filter
     TestBuilder builder = testBuilder()
-        .sqlQuery("select date_col from table(dfs.`" + new Path(path, PARTITIONED_1_2_FOLDER) + "`" +
-            "(type => 'parquet', autoCorrectCorruptDates => false))")
+        .sqlQuery("select date_col from table(dfs.`%s` (type => 'parquet', autoCorrectCorruptDates => false))",
+            new Path(path, PARTITIONED_1_2_FOLDER))
         .unOrdered()
         .baselineColumns("date_col");
-    addCorruptedDateBaselineVals(builder);
+    addCorruptedDateBaselineValues(builder);
     builder.go();
 
-    String query = "select date_col from table(dfs.`" + new Path(path, PARTITIONED_1_2_FOLDER) + "` " +
-        "(type => 'parquet', autoCorrectCorruptDates => false))" + " where date_col = cast('15334-03-17' as date)";
+    String query = format("select date_col from table(dfs.`%s` (type => 'parquet', " +
+        "autoCorrectCorruptDates => false)) where date_col = cast('15334-03-17' as date)",
+        new Path(path, PARTITIONED_1_2_FOLDER));
     // verify that pruning is actually taking place
     testPlanMatchingPatterns(query, new String[]{"numFiles=1", "usedMetadataFile=true"}, null);
 
@@ -413,27 +382,26 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
         .sqlQuery(query)
         .unOrdered()
         .baselineColumns("date_col")
-        .baselineValues(new DateTime(15334, 03, 17, 0, 0))
+        .baselineValues(new DateTime(15334, 3, 17, 0, 0))
         .go();
   }
 
   @Test
   public void testReadNewMetadataCacheFileOverOldAndNewFiles() throws Exception {
-    String table = "dfs.`" + new Path(path, MIXED_CORRUPTED_AND_CORRECTED_PARTITIONED_FOLDER) + "`";
+    String table = format("dfs.`%s`", new Path(path, MIXED_CORRUPTED_AND_CORRECT_PARTITIONED_FOLDER));
     copyMetaDataCacheToTempReplacingInternalPaths("parquet/4203_corrupt_dates/" +
-        "mixed_version_partitioned_metadata.requires_replace.txt", MIXED_CORRUPTED_AND_CORRECTED_PARTITIONED_FOLDER);
+        "mixed_version_partitioned_metadata.requires_replace.txt", MIXED_CORRUPTED_AND_CORRECT_PARTITIONED_FOLDER);
     // for sanity, try reading all partitions without a filter
     TestBuilder builder = testBuilder()
         .sqlQuery("select date_col from " + table)
         .unOrdered()
         .baselineColumns("date_col");
-    addDateBaselineVals(builder);
-    addDateBaselineVals(builder);
-    addDateBaselineVals(builder);
+    addDateBaselineValues(builder);
+    addDateBaselineValues(builder);
+    addDateBaselineValues(builder);
     builder.go();
 
-    String query = "select date_col from " + table +
-        " where date_col = date '1970-01-01'";
+    String query = format("select date_col from %s where date_col = date '1970-01-01'", table);
     // verify that pruning is actually taking place
     testPlanMatchingPatterns(query, new String[]{"numFiles=3", "usedMetadataFile=true"}, null);
 
@@ -448,28 +416,38 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
         .go();
   }
 
+  @Test
+  public void testCorrectDateValuesGeneratedByOldVersionOfDrill() throws Exception {
+    testBuilder()
+        .sqlQuery("select i_rec_end_date from dfs.`%s` limit 1", CORRECT_DATES_1_6_0_PATH)
+        .baselineColumns("i_rec_end_date")
+        .unOrdered()
+        .baselineValues(new DateTime(2000, 10, 26, 0, 0))
+        .go();
+  }
+
   /**
    * Read a directory with parquet files where some have corrupted dates, see DRILL-4203.
    * @throws Exception
    */
-  private void readMixedCorruptedAndCorrectedDates() throws Exception {
+  private void readMixedCorruptedAndCorrectDates() throws Exception {
     // ensure that selecting the date column explicitly or as part of a star still results
     // in checking the file metadata for date columns (when we need to check the statistics
     // for bad values) to set the flag that the values are corrupt
     for (String selection : new String[] {"*", "date_col"}) {
       TestBuilder builder = testBuilder()
-          .sqlQuery("select " + selection + " from dfs.`" + MIXED_CORRUPTED_AND_CORRECTED_DATES_PATH + "`")
+          .sqlQuery("select %s from dfs.`%s`", selection, MIXED_CORRUPTED_AND_CORRECT_DATES_PATH)
           .unOrdered()
           .baselineColumns("date_col");
       for (int i = 0; i < 4; i++) {
-        addDateBaselineVals(builder);
+        addDateBaselineValues(builder);
       }
       builder.go();
     }
   }
 
 
-  private void addDateBaselineVals(TestBuilder builder) {
+  private void addDateBaselineValues(TestBuilder builder) {
     builder
         .baselineValues(new DateTime(1970, 1, 1, 0, 0))
         .baselineValues(new DateTime(1970, 1, 2, 0, 0))
@@ -480,16 +458,16 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
   }
 
   /**
-   * These are the same values added in the addDateBaselineVals, shifted as corrupt values
+   * These are the same values added in the addDateBaselineValues, shifted as corrupt values
    */
-  private void addCorruptedDateBaselineVals(TestBuilder builder) {
+  private void addCorruptedDateBaselineValues(TestBuilder builder) {
     builder
-        .baselineValues(new DateTime(15334, 03, 17, 0, 0))
-        .baselineValues(new DateTime(15334, 03, 18, 0, 0))
-        .baselineValues(new DateTime(15334, 03, 15, 0, 0))
-        .baselineValues(new DateTime(15334, 03, 16, 0, 0))
-        .baselineValues(new DateTime(15264, 03, 16, 0, 0))
-        .baselineValues(new DateTime(15379, 03, 17, 0, 0));
+        .baselineValues(new DateTime(15334, 3, 17, 0, 0))
+        .baselineValues(new DateTime(15334, 3, 18, 0, 0))
+        .baselineValues(new DateTime(15334, 3, 15, 0, 0))
+        .baselineValues(new DateTime(15334, 3, 16, 0, 0))
+        .baselineValues(new DateTime(15264, 3, 16, 0, 0))
+        .baselineValues(new DateTime(15379, 3, 17, 0, 0));
   }
 
   private void readFilesWithUserDisabledAutoCorrection() throws Exception {
@@ -498,14 +476,14 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
     // for bad values) to set the flag that the values are corrupt
     for (String selection : new String[] {"*", "date_col"}) {
       TestBuilder builder = testBuilder()
-          .sqlQuery("select " + selection + " from table(dfs.`" + MIXED_CORRUPTED_AND_CORRECTED_DATES_PATH + "`" +
-              "(type => 'parquet', autoCorrectCorruptDates => false))")
+          .sqlQuery("select %s from table(dfs.`%s` (type => 'parquet', autoCorrectCorruptDates => false))",
+              selection, MIXED_CORRUPTED_AND_CORRECT_DATES_PATH)
           .unOrdered()
           .baselineColumns("date_col");
-      addDateBaselineVals(builder);
-      addDateBaselineVals(builder);
-      addCorruptedDateBaselineVals(builder);
-      addCorruptedDateBaselineVals(builder);
+      addDateBaselineValues(builder);
+      addCorruptedDateBaselineValues(builder);
+      addCorruptedDateBaselineValues(builder);
+      addCorruptedDateBaselineValues(builder);
       builder.go();
     }
   }

http://git-wip-us.apache.org/repos/asf/drill/blob/eef3b3fb/exec/java-exec/src/test/resources/parquet/4203_corrupt_dates/correct_dates_and_old_drill_parquet_writer.parquet
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/parquet/4203_corrupt_dates/correct_dates_and_old_drill_parquet_writer.parquet b/exec/java-exec/src/test/resources/parquet/4203_corrupt_dates/correct_dates_and_old_drill_parquet_writer.parquet
new file mode 100644
index 0000000..6d81db0
Binary files /dev/null and b/exec/java-exec/src/test/resources/parquet/4203_corrupt_dates/correct_dates_and_old_drill_parquet_writer.parquet differ

[3/4] drill git commit: DRILL-5152: Enhance the mock data source: better data, SQL access

Posted by pa...@apache.org.

DRILL-5152: Enhance the mock data source: better data, SQL access

Provides an enhanced version of the mock data source. See the JIRA
entry for motivation, package-info.java for details of operation.

Revisions suggested by code review

Also includes additional comments and a few more compiler warning
cleanups.

This closes #708


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/535623bb
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/535623bb
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/535623bb

Branch: refs/heads/master
Commit: 535623bb8fdf81220164134d366817f57e560710
Parents: 77e5010
Author: Paul Rogers <pr...@maprtech.com>
Authored: Wed Dec 21 21:47:20 2016 -0800
Committer: Parth Chandra <pa...@apache.org>
Committed: Fri Jan 13 17:45:46 2017 -0800

----------------------------------------------------------------------
 .../drill/exec/record/AbstractRecordBatch.java  |   2 +-
 .../drill/exec/store/AbstractRecordReader.java  |   2 +-
 .../apache/drill/exec/store/StoragePlugin.java  |   5 +-
 .../drill/exec/store/StoragePluginRegistry.java |   4 +-
 .../exec/store/StoragePluginRegistryImpl.java   |  29 ++-
 .../apache/drill/exec/store/mock/ColumnDef.java | 178 ++++++++++++++++
 .../apache/drill/exec/store/mock/DateGen.java   |  69 +++++++
 .../apache/drill/exec/store/mock/DoubleGen.java |  48 +++++
 .../store/mock/ExtendedMockRecordReader.java    | 156 ++++++++++++++
 .../apache/drill/exec/store/mock/FieldGen.java  |  37 ++++
 .../apache/drill/exec/store/mock/IntGen.java    |  47 +++++
 .../drill/exec/store/mock/MockGroupScanPOP.java | 205 ++++++++++++++++---
 .../drill/exec/store/mock/MockRecordReader.java |   8 +-
 .../exec/store/mock/MockScanBatchCreator.java   |   8 +-
 .../exec/store/mock/MockStorageEngine.java      |  79 ++++++-
 .../store/mock/MockStorageEngineConfig.java     |   9 +-
 .../drill/exec/store/mock/MockStorePOP.java     |   3 +-
 .../drill/exec/store/mock/MockSubScanPOP.java   |  44 +++-
 .../apache/drill/exec/store/mock/MoneyGen.java  |  48 +++++
 .../apache/drill/exec/store/mock/StringGen.java |  56 +++++
 .../drill/exec/store/mock/package-info.java     | 130 ++++++++++++
 .../apache/drill/exec/TestOpSerialization.java  |   4 +-
 .../fn/interp/ExpressionInterpreterTest.java    |  18 +-
 23 files changed, 1108 insertions(+), 81 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/record/AbstractRecordBatch.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/AbstractRecordBatch.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/AbstractRecordBatch.java
index 998665c..d82c154 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/record/AbstractRecordBatch.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/AbstractRecordBatch.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information

http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/AbstractRecordReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/AbstractRecordReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/AbstractRecordReader.java
index 16118d9..2152025 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/AbstractRecordReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/AbstractRecordReader.java
@@ -67,7 +67,7 @@ public abstract class AbstractRecordReader implements RecordReader {
     Collection<SchemaPath> columnsToRead = projected;
 
     // If no column is required (SkipQuery), by default it will use DEFAULT_COLS_TO_READ .
-    // Handling SkipQuery is storage-plugin specif : JSON, text reader, parquet will override, in order to
+    // Handling SkipQuery is storage-plugin specific : JSON, text reader, parquet will override, in order to
     // improve query performance.
     if (projected.isEmpty()) {
       columnsToRead = getDefaultColumnsToRead();

http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePlugin.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePlugin.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePlugin.java
index 112bc15..2969d4f 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePlugin.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePlugin.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -71,7 +71,7 @@ public interface StoragePlugin extends SchemaFactory, AutoCloseable {
   public AbstractGroupScan getPhysicalScan(String userName, JSONOptions selection, List<SchemaPath> columns)
       throws IOException;
 
-  /** Method returns a jackson serializable object that extends a StoragePluginConfig
+  /** Method returns a Jackson serializable object that extends a StoragePluginConfig
   * @return an extension of StoragePluginConfig
   */
   public StoragePluginConfig getConfig();
@@ -80,5 +80,4 @@ public interface StoragePlugin extends SchemaFactory, AutoCloseable {
    * Initialize the storage plugin. The storage plugin will not be used until this method is called.
    */
   public void start() throws IOException;
-
 }

http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePluginRegistry.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePluginRegistry.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePluginRegistry.java
index 7018ce8..82f18f8 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePluginRegistry.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePluginRegistry.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -51,7 +51,7 @@ public interface StoragePluginRegistry extends Iterable<Map.Entry<String, Storag
    * @param name
    *          The name of the plugin
    * @param config
-   *          The plugin confgiruation
+   *          The plugin configuration
    * @param persist
    *          Whether to persist the plugin for later use or treat it as ephemeral.
    * @return The StoragePlugin instance.

http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePluginRegistryImpl.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePluginRegistryImpl.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePluginRegistryImpl.java
index bf4affd..3fb1c3a 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePluginRegistryImpl.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePluginRegistryImpl.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -125,9 +125,10 @@ public class StoragePluginRegistryImpl implements StoragePluginRegistry {
     availablePlugins = findAvailablePlugins(classpathScan);
 
     // create registered plugins defined in "storage-plugins.json"
-    this.plugins.putAll(createPlugins());
+    plugins.putAll(createPlugins());
   }
 
+  @SuppressWarnings("resource")
   private Map<String, StoragePlugin> createPlugins() throws DrillbitStartupException {
     try {
       /*
@@ -145,7 +146,7 @@ public class StoragePluginRegistryImpl implements StoragePluginRegistry {
             String pluginsData = Resources.toString(url, Charsets.UTF_8);
             StoragePlugins plugins = lpPersistence.getMapper().readValue(pluginsData, StoragePlugins.class);
             for (Map.Entry<String, StoragePluginConfig> config : plugins) {
-              if (!pluginSystemTable.putIfAbsent(config.getKey(), config.getValue())) {
+              if (!definePluginConfig(config.getKey(), config.getValue())) {
                 logger.warn("Duplicate plugin instance '{}' defined in [{}, {}], ignoring the later one.",
                     config.getKey(), pluginURLMap.get(config.getKey()), url);
                 continue;
@@ -185,6 +186,24 @@ public class StoragePluginRegistryImpl implements StoragePluginRegistry {
     }
   }
 
+  /**
+   * Add a plugin and configuration. Assumes neither exists. Primarily
+   * for testing.
+   *
+   * @param name plugin name
+   * @param config plugin config
+   * @param plugin plugin implementation
+   */
+
+  public void definePlugin(String name, StoragePluginConfig config, StoragePlugin plugin) {
+    addPlugin(name, plugin);
+    definePluginConfig(name, config);
+  }
+
+  private boolean definePluginConfig(String name, StoragePluginConfig config) {
+    return pluginSystemTable.putIfAbsent(name, config);
+  }
+
   @Override
   public void addPlugin(String name, StoragePlugin plugin) {
     plugins.put(name, plugin);
@@ -192,6 +211,7 @@ public class StoragePluginRegistryImpl implements StoragePluginRegistry {
 
   @Override
   public void deletePlugin(String name) {
+    @SuppressWarnings("resource")
     StoragePlugin plugin = plugins.remove(name);
     closePlugin(plugin);
     pluginSystemTable.delete(name);
@@ -209,6 +229,7 @@ public class StoragePluginRegistryImpl implements StoragePluginRegistry {
     }
   }
 
+  @SuppressWarnings("resource")
   @Override
   public StoragePlugin createOrUpdate(String name, StoragePluginConfig config, boolean persist)
       throws ExecutionSetupException {
@@ -299,6 +320,7 @@ public class StoragePluginRegistryImpl implements StoragePluginRegistry {
     }
   }
 
+  @SuppressWarnings("resource")
   @Override
   public FormatPlugin getFormatPlugin(StoragePluginConfig storageConfig, FormatPluginConfig formatConfig)
       throws ExecutionSetupException {
@@ -346,6 +368,7 @@ public class StoragePluginRegistryImpl implements StoragePluginRegistry {
 
   public class DrillSchemaFactory implements SchemaFactory {
 
+    @SuppressWarnings("resource")
     @Override
     public void registerSchemas(SchemaConfig schemaConfig, SchemaPlus parent) throws IOException {
       Stopwatch watch = Stopwatch.createStarted();

http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/ColumnDef.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/ColumnDef.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/ColumnDef.java
new file mode 100644
index 0000000..cfaacdd
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/ColumnDef.java
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.mock;
+
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.expr.TypeHelper;
+import org.apache.drill.exec.store.mock.MockGroupScanPOP.MockColumn;
+
+/**
+ * Defines a column for the "enhanced" version of the mock data
+ * source. This class is built from the column definitions in either
+ * the physical plan or an SQL statement (which gives rise to a
+ * physical plan.)
+ */
+
+public class ColumnDef {
+  public MockColumn mockCol;
+  public String name;
+  public int width;
+  public FieldGen generator;
+
+  public ColumnDef(MockColumn mockCol) {
+    this.mockCol = mockCol;
+    name = mockCol.getName();
+    width = TypeHelper.getSize(mockCol.getMajorType());
+    makeGenerator();
+  }
+
+  /**
+   * Create the data generator class for this column. The generator is
+   * created to match the data type by default. Or, the plan can
+   * specify a generator class (in which case the plan must ensure that
+   * the generator produces the correct value for the column data type.)
+   * The generator names a class: either a fully qualified name, or a
+   * class in this package.
+   */
+
+  private void makeGenerator() {
+    String genName = mockCol.getGenerator();
+    if (genName != null) {
+      if (! genName.contains(".")) {
+        genName = "org.apache.drill.exec.store.mock." + genName;
+      }
+      try {
+        ClassLoader cl = getClass().getClassLoader();
+        Class<?> genClass = cl.loadClass(genName);
+        generator = (FieldGen) genClass.newInstance();
+      } catch (ClassNotFoundException | InstantiationException
+          | IllegalAccessException | ClassCastException e) {
+        throw new IllegalArgumentException("Generator " + genName + " is undefined for mock field " + name);
+      }
+      generator.setup(this);
+      return;
+    }
+
+    makeDefaultGenerator();
+  }
+
+  private void makeDefaultGenerator() {
+
+    MinorType minorType = mockCol.getMinorType();
+    switch (minorType) {
+    case BIGINT:
+      break;
+    case BIT:
+      break;
+    case DATE:
+      break;
+    case DECIMAL18:
+      break;
+    case DECIMAL28DENSE:
+      break;
+    case DECIMAL28SPARSE:
+      break;
+    case DECIMAL38DENSE:
+      break;
+    case DECIMAL38SPARSE:
+      break;
+    case DECIMAL9:
+      break;
+    case FIXED16CHAR:
+      break;
+    case FIXEDBINARY:
+      break;
+    case FIXEDCHAR:
+      break;
+    case FLOAT4:
+      break;
+    case FLOAT8:
+      generator = new DoubleGen();
+      break;
+    case GENERIC_OBJECT:
+      break;
+    case INT:
+      generator = new IntGen();
+      break;
+    case INTERVAL:
+      break;
+    case INTERVALDAY:
+      break;
+    case INTERVALYEAR:
+      break;
+    case LATE:
+      break;
+    case LIST:
+      break;
+    case MAP:
+      break;
+    case MONEY:
+      break;
+    case NULL:
+      break;
+    case SMALLINT:
+      break;
+    case TIME:
+      break;
+    case TIMESTAMP:
+      break;
+    case TIMESTAMPTZ:
+      break;
+    case TIMETZ:
+      break;
+    case TINYINT:
+      break;
+    case UINT1:
+      break;
+    case UINT2:
+      break;
+    case UINT4:
+      break;
+    case UINT8:
+      break;
+    case UNION:
+      break;
+    case VAR16CHAR:
+      break;
+    case VARBINARY:
+      break;
+    case VARCHAR:
+      generator = new StringGen();
+      break;
+    default:
+      break;
+    }
+    if (generator == null) {
+      throw new IllegalArgumentException("No default column generator for column " + name + " of type " + minorType);
+    }
+    generator.setup(this);
+  }
+
+  public ColumnDef(MockColumn mockCol, int rep) {
+    this(mockCol);
+    name += Integer.toString(rep);
+  }
+
+  public MockColumn getConfig() {
+    return mockCol;
+  }
+
+  public String getName() {
+    return name;
+  }
+}

http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/DateGen.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/DateGen.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/DateGen.java
new file mode 100644
index 0000000..f7d53ed
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/DateGen.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.mock;
+
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.Random;
+
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.VarCharVector;
+
+/**
+ * Very simple date vaue generator that produces ISO dates
+ * uniformly distributed over the last year. ISO format
+ * is: 2016-12-07.
+ * <p>
+ * There are many possible date formats; this class does not
+ * attempt to generate all of them. Drill provides a date
+ * type, but we use a string format because example cases from
+ * people using the product often read text files. Finally, we
+ * (reluctantly) use the old-style date formats instead of the
+ * new Java 8 classes because Drill prefers to build with Java 7.
+ */
+
+public class DateGen implements FieldGen {
+
+  private final int ONE_DAY = 24 * 60 * 60 * 1000;
+  private final int ONE_YEAR = ONE_DAY * 365;
+
+  private final Random rand = new Random();
+  private long baseTime;
+  private SimpleDateFormat fmt;
+
+  public DateGen() {
+    // Start a year ago.
+    baseTime = System.currentTimeMillis() - ONE_YEAR;
+    fmt = new SimpleDateFormat("yyyy-mm-DD");
+  }
+
+  @Override
+  public void setup(ColumnDef colDef) { }
+
+  private long value() {
+    return baseTime + rand.nextInt(365) * ONE_DAY;
+  }
+
+  @Override
+  public void setValue(ValueVector v, int index) {
+    VarCharVector vector = (VarCharVector) v;
+    long randTime = baseTime + value();
+    String str = fmt.format(new Date(randTime));
+    vector.getMutator().setSafe(index, str.getBytes());
+  }
+}

http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/DoubleGen.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/DoubleGen.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/DoubleGen.java
new file mode 100644
index 0000000..e28a394
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/DoubleGen.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.mock;
+
+import java.util.Random;
+
+import org.apache.drill.exec.vector.Float8Vector;
+import org.apache.drill.exec.vector.ValueVector;
+
+/**
+ * Generates random field values uniformly distributed over
+ * the range +-1 million, with any number of digits past
+ * the decimal point.
+ */
+
+public class DoubleGen implements FieldGen {
+
+  private final Random rand = new Random();
+
+  @Override
+  public void setup(ColumnDef colDef) { }
+
+  private double value() {
+    return rand.nextDouble() * 2_000_000 - 1_000_000;
+  }
+
+  @Override
+  public void setValue(ValueVector v, int index) {
+    Float8Vector vector = (Float8Vector) v;
+    vector.getMutator().set(index, value());
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/ExtendedMockRecordReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/ExtendedMockRecordReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/ExtendedMockRecordReader.java
new file mode 100644
index 0000000..f3804d4
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/ExtendedMockRecordReader.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.mock;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.drill.common.exceptions.ExecutionSetupException;
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.exec.exception.OutOfMemoryException;
+import org.apache.drill.exec.exception.SchemaChangeException;
+import org.apache.drill.exec.expr.TypeHelper;
+import org.apache.drill.exec.ops.FragmentContext;
+import org.apache.drill.exec.ops.OperatorContext;
+import org.apache.drill.exec.physical.impl.OutputMutator;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.store.AbstractRecordReader;
+import org.apache.drill.exec.store.mock.MockGroupScanPOP.MockColumn;
+import org.apache.drill.exec.store.mock.MockGroupScanPOP.MockScanEntry;
+import org.apache.drill.exec.vector.AllocationHelper;
+import org.apache.drill.exec.vector.ValueVector;
+
+/**
+ * Extended form of the mock record reader that uses generator class
+ * instances to create the mock values. This is a work in progress.
+ * Generators exist for a few simple required types. One also exists
+ * to generate strings that contain dates.
+ * <p>
+ * The definition is provided inside the sub scan used to create the
+ * {@link ScanBatch} used to create this record reader.
+ */
+
+public class ExtendedMockRecordReader extends AbstractRecordReader {
+
+  private ValueVector[] valueVectors;
+  private int batchRecordCount;
+  private int recordsRead;
+
+  private final MockScanEntry config;
+  private final FragmentContext context;
+  private final ColumnDef fields[];
+
+  public ExtendedMockRecordReader(FragmentContext context, MockScanEntry config) {
+    this.context = context;
+    this.config = config;
+
+    fields = buildColumnDefs();
+  }
+
+  private ColumnDef[] buildColumnDefs() {
+    List<ColumnDef> defs = new ArrayList<>();
+
+    // Look for duplicate names. Bad things happen when the same name
+    // appears twice. We must do this here because some tests create
+    // a physical plan directly, meaning that this is the first
+    // opportunity to review the column definitions.
+
+    Set<String> names = new HashSet<>();
+    MockColumn cols[] = config.getTypes();
+    for (int i = 0; i < cols.length; i++) {
+      MockColumn col = cols[i];
+      if (names.contains(col.name)) {
+        throw new IllegalArgumentException("Duplicate column name: " + col.name);
+      }
+      names.add(col.name);
+      int repeat = Math.min(1, col.getRepeatCount());
+      if (repeat == 1) {
+        defs.add(new ColumnDef(col));
+      } else {
+        for (int j = 0; j < repeat; j++) {
+          defs.add(new ColumnDef(col, j+1));
+        }
+      }
+    }
+    ColumnDef[] defArray = new ColumnDef[defs.size()];
+    defs.toArray(defArray);
+    return defArray;
+  }
+
+  private int getEstimatedRecordSize(MockColumn[] types) {
+    int size = 0;
+    for (int i = 0; i < fields.length; i++) {
+      size += TypeHelper.getSize(fields[i].getConfig().getMajorType());
+    }
+    return size;
+  }
+
+  @Override
+  public void setup(OperatorContext context, OutputMutator output) throws ExecutionSetupException {
+    try {
+      final int estimateRowSize = getEstimatedRecordSize(config.getTypes());
+      valueVectors = new ValueVector[config.getTypes().length];
+      batchRecordCount = 250000 / estimateRowSize;
+
+      for (int i = 0; i < fields.length; i++) {
+        final ColumnDef col = fields[i];
+        final MajorType type = col.getConfig().getMajorType();
+        final MaterializedField field = MaterializedField.create(col.getName(), type);
+        final Class<? extends ValueVector> vvClass = TypeHelper.getValueVectorClass(field.getType().getMinorType(), field.getDataMode());
+        valueVectors[i] = output.addField(field, vvClass);
+      }
+    } catch (SchemaChangeException e) {
+      throw new ExecutionSetupException("Failure while setting up fields", e);
+    }
+  }
+
+  @Override
+  public int next() {
+    if (recordsRead >= this.config.getRecords()) {
+      return 0;
+    }
+
+    final int recordSetSize = Math.min(batchRecordCount, this.config.getRecords() - recordsRead);
+    recordsRead += recordSetSize;
+    for (int i = 0; i < recordSetSize; i++) {
+      int j = 0;
+      for (final ValueVector v : valueVectors) {
+        fields[j++].generator.setValue(v, i);
+      }
+    }
+
+    return recordSetSize;
+  }
+
+  @Override
+  public void allocate(Map<String, ValueVector> vectorMap) throws OutOfMemoryException {
+    try {
+      for (final ValueVector v : vectorMap.values()) {
+        AllocationHelper.allocate(v, Character.MAX_VALUE, 50, 10);
+      }
+    } catch (NullPointerException e) {
+      throw new OutOfMemoryException();
+    }
+  }
+
+  @Override
+  public void close() { }
+}

http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/FieldGen.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/FieldGen.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/FieldGen.java
new file mode 100644
index 0000000..b51077f
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/FieldGen.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.mock;
+
+import org.apache.drill.exec.vector.ValueVector;
+
+/**
+ * Interface which all mock column data generators must
+ * implement. Each has a {@link #setup} method which is given
+ * the column definition for the column. This definition may have
+ * additional configuration information for the column (column width,
+ * precision, etc.). Each also has a {@link #setValue} method that
+ * does the work of setting a specific value vector position to the
+ * generated value. The implementation is expected to cast the
+ * vector to the type supported by that particular generator.
+ * (This is test code; we're not overly concerned with the overhead
+ * of such casts.)
+ */
+public interface FieldGen {
+  void setup(ColumnDef colDef);
+  void setValue(ValueVector v, int index);
+}

http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/IntGen.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/IntGen.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/IntGen.java
new file mode 100644
index 0000000..be00541
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/IntGen.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.mock;
+
+import java.util.Random;
+
+import org.apache.drill.exec.vector.IntVector;
+import org.apache.drill.exec.vector.ValueVector;
+
+/**
+ * Generates integer values uniformly randomly distributed over
+ * the entire 32-bit integer range from
+ * {@link Integer.MIN_VALUE} to {@link Integer.MAX_VALUE}.
+ */
+
+public class IntGen implements FieldGen {
+
+  private final Random rand = new Random();
+
+  @Override
+  public void setup(ColumnDef colDef) { }
+
+  private int value() {
+    return rand.nextInt();
+  }
+
+  @Override
+  public void setValue(ValueVector v, int index) {
+    IntVector vector = (IntVector) v;
+    vector.getMutator().set(index, value());
+  }
+}

http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockGroupScanPOP.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockGroupScanPOP.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockGroupScanPOP.java
index bb71c31..2e8af42 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockGroupScanPOP.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockGroupScanPOP.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -17,17 +17,17 @@
  */
 package org.apache.drill.exec.store.mock;
 
+import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collections;
 import java.util.LinkedList;
 import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import org.apache.drill.common.expression.SchemaPath;
 import org.apache.drill.common.types.TypeProtos.DataMode;
 import org.apache.drill.common.types.TypeProtos.MajorType;
 import org.apache.drill.common.types.TypeProtos.MinorType;
-import org.apache.drill.exec.expr.TypeHelper;
-import org.apache.drill.exec.physical.EndpointAffinity;
 import org.apache.drill.exec.physical.base.AbstractGroupScan;
 import org.apache.drill.exec.physical.base.GroupScan;
 import org.apache.drill.exec.physical.base.PhysicalOperator;
@@ -43,21 +43,50 @@ import com.fasterxml.jackson.annotation.JsonProperty;
 import com.fasterxml.jackson.annotation.JsonTypeName;
 import com.google.common.base.Preconditions;
 
+/**
+ * Describes a "group" scan of a (logical) mock table. The mock table has a
+ * schema described by the {@link MockScanEntry}. Class. To simulate a scan that
+ * can be parallelized, this group scan can contain a list of
+ * {@link MockScanEntry}, each of which simulates a separate file on disk, or
+ * block within a file. Each will give rise to a separate minor fragment
+ * (assuming sufficient parallelization.)
+ */
+
 @JsonTypeName("mock-scan")
 public class MockGroupScanPOP extends AbstractGroupScan {
-  static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(MockGroupScanPOP.class);
+  static final org.slf4j.Logger logger = org.slf4j.LoggerFactory
+      .getLogger(MockGroupScanPOP.class);
 
+  /**
+   * URL for the scan. Unused. Appears to be a vestige of an earlier design that
+   * required them.
+   */
   private final String url;
+
+  /**
+   * The set of simulated files to scan.
+   */
   protected final List<MockScanEntry> readEntries;
-  private  LinkedList<MockScanEntry>[] mappings;
+  private LinkedList<MockScanEntry>[] mappings;
+
+  /**
+   * Whether this group scan uses a newer "extended" schema definition, or the
+   * original (non-extended) definition.
+   */
+
+  private boolean extended;
 
   @JsonCreator
-  public MockGroupScanPOP(@JsonProperty("url") String url, @JsonProperty("entries") List<MockScanEntry> readEntries) {
-    super((String)null);
+  public MockGroupScanPOP(@JsonProperty("url") String url,
+      @JsonProperty("extended") Boolean extended,
+      @JsonProperty("entries") List<MockScanEntry> readEntries) {
+    super((String) null);
     this.readEntries = readEntries;
     this.url = url;
+    this.extended = extended == null ? false : extended;
   }
 
+  @Override
   public ScanStats getScanStats() {
     return ScanStats.TRIVIAL_TABLE;
   }
@@ -71,22 +100,22 @@ public class MockGroupScanPOP extends AbstractGroupScan {
     return readEntries;
   }
 
-  public static class MockScanEntry{
+  /**
+   * Describes one simulated file (or block) within the logical file scan
+   * described by this group scan. Each block can have a distinct schema to test
+   * for schema changes.
+   */
+
+  public static class MockScanEntry {
 
     private final int records;
     private final MockColumn[] types;
-    private final int recordSize;
-
 
     @JsonCreator
-    public MockScanEntry(@JsonProperty("records") int records, @JsonProperty("types") MockColumn[] types) {
+    public MockScanEntry(@JsonProperty("records") int records,
+        @JsonProperty("types") MockColumn[] types) {
       this.records = records;
       this.types = types;
-      int size = 0;
-      for (MockColumn dt : types) {
-        size += TypeHelper.getSize(dt.getMajorType());
-      }
-      this.recordSize = size;
     }
 
     public int getRecords() {
@@ -99,50 +128,110 @@ public class MockGroupScanPOP extends AbstractGroupScan {
 
     @Override
     public String toString() {
-      return "MockScanEntry [records=" + records + ", columns=" + Arrays.toString(types) + "]";
+      return "MockScanEntry [records=" + records + ", columns="
+          + Arrays.toString(types) + "]";
     }
   }
 
+  /**
+   * Meta-data description of the columns we wish to create during a simulated
+   * scan.
+   */
+
   @JsonInclude(Include.NON_NULL)
-  public static class MockColumn{
-    @JsonProperty("type") public MinorType minorType;
+  public static class MockColumn {
+
+    /**
+     * Column type given as a Drill minor type (that is, a type without the
+     * extra information such as cardinality, width, etc.
+     */
+
+    @JsonProperty("type")
+    public MinorType minorType;
     public String name;
     public DataMode mode;
     public Integer width;
     public Integer precision;
     public Integer scale;
 
+    /**
+     * The scan can request to use a specific data generator class. The name of
+     * that class appears here. The name can be a simple class name, if that
+     * class resides in this Java package. Or, it can be a fully qualified name
+     * of a class that resides elsewhere. If null, the default generator for the
+     * data type is used.
+     */
+
+    public String generator;
+
+    /**
+     * Some tests want to create a very wide row with many columns. This field
+     * eases that task: specify a value other than 1 and the data source will
+     * generate that many copies of the column, each with separately generated
+     * random values. For example, to create 20 copies of field, "foo", set
+     * repeat to 20 and the actual generated batches will contain fields
+     * foo1, foo2, ... foo20.
+     */
+
+    public Integer repeat;
 
     @JsonCreator
-    public MockColumn(@JsonProperty("name") String name, @JsonProperty("type") MinorType minorType, @JsonProperty("mode") DataMode mode, @JsonProperty("width") Integer width, @JsonProperty("precision") Integer precision, @JsonProperty("scale") Integer scale) {
+    public MockColumn(@JsonProperty("name") String name,
+        @JsonProperty("type") MinorType minorType,
+        @JsonProperty("mode") DataMode mode,
+        @JsonProperty("width") Integer width,
+        @JsonProperty("precision") Integer precision,
+        @JsonProperty("scale") Integer scale,
+        @JsonProperty("generator") String generator,
+        @JsonProperty("repeat") Integer repeat) {
       this.name = name;
       this.minorType = minorType;
       this.mode = mode;
       this.width = width;
       this.precision = precision;
       this.scale = scale;
+      this.generator = generator;
+      this.repeat = repeat;
     }
 
     @JsonProperty("type")
     public MinorType getMinorType() {
       return minorType;
     }
+
     public String getName() {
       return name;
     }
+
     public DataMode getMode() {
       return mode;
     }
+
     public Integer getWidth() {
       return width;
     }
+
     public Integer getPrecision() {
       return precision;
     }
+
     public Integer getScale() {
       return scale;
     }
 
+    public String getGenerator() {
+      return generator;
+    }
+
+    public Integer getRepeat() {
+      return repeat;
+    }
+
+    @JsonIgnore
+    public int getRepeatCount() {
+      return repeat == null ? 1 : repeat;
+    }
+
     @JsonIgnore
     public MajorType getMajorType() {
       MajorType.Builder b = MajorType.newBuilder();
@@ -162,9 +251,9 @@ public class MockGroupScanPOP extends AbstractGroupScan {
 
     @Override
     public String toString() {
-      return "MockColumn [minorType=" + minorType + ", name=" + name + ", mode=" + mode + "]";
+      return "MockColumn [minorType=" + minorType + ", name=" + name + ", mode="
+          + mode + "]";
     }
-
   }
 
   @SuppressWarnings("unchecked")
@@ -174,7 +263,7 @@ public class MockGroupScanPOP extends AbstractGroupScan {
 
     mappings = new LinkedList[endpoints.size()];
 
-    int i =0;
+    int i = 0;
     for (MockScanEntry e : this.getReadEntries()) {
       if (i == endpoints.size()) {
         i -= endpoints.size();
@@ -191,8 +280,10 @@ public class MockGroupScanPOP extends AbstractGroupScan {
 
   @Override
   public SubScan getSpecificScan(int minorFragmentId) {
-    assert minorFragmentId < mappings.length : String.format("Mappings length [%d] should be longer than minor fragment id [%d] but it isn't.", mappings.length, minorFragmentId);
-    return new MockSubScanPOP(url, mappings[minorFragmentId]);
+    assert minorFragmentId < mappings.length : String.format(
+        "Mappings length [%d] should be longer than minor fragment id [%d] but it isn't.",
+        mappings.length, minorFragmentId);
+    return new MockSubScanPOP(url, extended, mappings[minorFragmentId]);
   }
 
   @Override
@@ -204,13 +295,60 @@ public class MockGroupScanPOP extends AbstractGroupScan {
   @JsonIgnore
   public PhysicalOperator getNewWithChildren(List<PhysicalOperator> children) {
     Preconditions.checkArgument(children.isEmpty());
-    return new MockGroupScanPOP(url, readEntries);
-
+    return new MockGroupScanPOP(url, extended, readEntries);
   }
 
   @Override
   public GroupScan clone(List<SchemaPath> columns) {
-    return this;
+    if (columns.isEmpty()) {
+      throw new IllegalArgumentException("No columns for mock scan");
+    }
+    List<MockColumn> mockCols = new ArrayList<>();
+    Pattern p = Pattern.compile("(\\w+)_([isd])(\\d*)");
+    for (SchemaPath path : columns) {
+      String col = path.getLastSegment().getNameSegment().getPath();
+      if (col.equals("*")) {
+        return this;
+      }
+      Matcher m = p.matcher(col);
+      if (!m.matches()) {
+        throw new IllegalArgumentException(
+            "Badly formatted mock column name: " + col);
+      }
+      @SuppressWarnings("unused")
+      String name = m.group(1);
+      String type = m.group(2);
+      String length = m.group(3);
+      int width = 10;
+      if (!length.isEmpty()) {
+        width = Integer.parseInt(length);
+      }
+      MinorType minorType;
+      switch (type) {
+      case "i":
+        minorType = MinorType.INT;
+        break;
+      case "s":
+        minorType = MinorType.VARCHAR;
+        break;
+      case "d":
+        minorType = MinorType.FLOAT8;
+        break;
+      default:
+        throw new IllegalArgumentException(
+            "Unsupported field type " + type + " for mock column " + col);
+      }
+      MockColumn mockCol = new MockColumn(col, minorType, DataMode.REQUIRED,
+          width, 0, 0, null, 1);
+      mockCols.add(mockCol);
+    }
+    MockScanEntry entry = readEntries.get(0);
+    MockColumn types[] = new MockColumn[mockCols.size()];
+    mockCols.toArray(types);
+    MockScanEntry newEntry = new MockScanEntry(entry.records, types);
+    List<MockScanEntry> newEntries = new ArrayList<>();
+    newEntries.add(newEntry);
+    return new MockGroupScanPOP(url, true, newEntries);
   }
 
   @Override
@@ -220,8 +358,13 @@ public class MockGroupScanPOP extends AbstractGroupScan {
 
   @Override
   public String toString() {
-    return "MockGroupScanPOP [url=" + url
-        + ", readEntries=" + readEntries + "]";
+    return "MockGroupScanPOP [url=" + url + ", readEntries=" + readEntries
+        + "]";
   }
 
+  @Override
+  @JsonIgnore
+  public boolean canPushdownProjects(List<SchemaPath> columns) {
+    return true;
+  }
 }

http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockRecordReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockRecordReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockRecordReader.java
index ed3decb..6f8cb39 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockRecordReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockRecordReader.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -20,7 +20,6 @@ package org.apache.drill.exec.store.mock;
 import java.util.Map;
 
 import org.apache.drill.common.exceptions.ExecutionSetupException;
-import org.apache.drill.common.expression.SchemaPath;
 import org.apache.drill.common.types.TypeProtos.MajorType;
 import org.apache.drill.exec.exception.OutOfMemoryException;
 import org.apache.drill.exec.exception.SchemaChangeException;
@@ -43,9 +42,9 @@ public class MockRecordReader extends AbstractRecordReader {
   private ValueVector[] valueVectors;
   private int recordsRead;
   private int batchRecordCount;
+  @SuppressWarnings("unused")
   private OperatorContext operatorContext;
 
-
   public MockRecordReader(FragmentContext context, MockScanEntry config) {
     this.context = context;
     this.config = config;
@@ -111,6 +110,5 @@ public class MockRecordReader extends AbstractRecordReader {
   }
 
   @Override
-  public void close() {
-  }
+  public void close() { }
 }

http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockScanBatchCreator.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockScanBatchCreator.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockScanBatchCreator.java
index 6cdbc3c..9cdb7ad 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockScanBatchCreator.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockScanBatchCreator.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -40,7 +40,11 @@ public class MockScanBatchCreator implements BatchCreator<MockSubScanPOP> {
     final List<MockScanEntry> entries = config.getReadEntries();
     final List<RecordReader> readers = Lists.newArrayList();
     for(final MockScanEntry e : entries) {
-      readers.add(new MockRecordReader(context, e));
+      if ( config.isExtended( ) ) {
+        readers.add(new ExtendedMockRecordReader(context, e));
+      } else {
+        readers.add(new MockRecordReader(context, e));
+      }
     }
     return new ScanBatch(config, context, readers.iterator());
   }

http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorageEngine.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorageEngine.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorageEngine.java
index d68fd52..df8ee50 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorageEngine.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorageEngine.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -19,44 +19,64 @@ package org.apache.drill.exec.store.mock;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import org.apache.calcite.schema.SchemaPlus;
-
+import org.apache.calcite.schema.Table;
 import org.apache.drill.common.JSONOptions;
 import org.apache.drill.common.expression.SchemaPath;
 import org.apache.drill.common.logical.StoragePluginConfig;
 import org.apache.drill.exec.physical.base.AbstractGroupScan;
+import org.apache.drill.exec.planner.logical.DynamicDrillTable;
 import org.apache.drill.exec.server.DrillbitContext;
+import org.apache.drill.exec.store.AbstractSchema;
 import org.apache.drill.exec.store.AbstractStoragePlugin;
 import org.apache.drill.exec.store.SchemaConfig;
 import org.apache.drill.exec.store.mock.MockGroupScanPOP.MockScanEntry;
 
 import com.fasterxml.jackson.core.type.TypeReference;
 import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.collect.ImmutableList;
 
 public class MockStorageEngine extends AbstractStoragePlugin {
   static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(MockStorageEngine.class);
 
   private final MockStorageEngineConfig configuration;
+  private final MockSchema schema;
 
   public MockStorageEngine(MockStorageEngineConfig configuration, DrillbitContext context, String name) {
     this.configuration = configuration;
+    this.schema = new MockSchema(this);
   }
 
   @Override
   public AbstractGroupScan getPhysicalScan(String userName, JSONOptions selection, List<SchemaPath> columns)
       throws IOException {
 
-    ArrayList<MockScanEntry> readEntries = selection.getListWith(new ObjectMapper(),
+    List<MockScanEntry> readEntries = selection.getListWith(new ObjectMapper(),
         new TypeReference<ArrayList<MockScanEntry>>() {
         });
 
-    return new MockGroupScanPOP(null, readEntries);
+    // The classic (logical-plan based) and extended (SQL-based) paths
+    // come through here. If this is a SQL query, then no columns are
+    // defined in the plan.
+
+    assert ! readEntries.isEmpty();
+    boolean extended = readEntries.size() == 1;
+    if (extended) {
+      MockScanEntry entry = readEntries.get(0);
+      extended = entry.getTypes() == null;
+    }
+    return new MockGroupScanPOP(null, extended, readEntries);
   }
 
   @Override
   public void registerSchemas(SchemaConfig schemaConfig, SchemaPlus parent) throws IOException {
+    parent.add(schema.getName(), schema);
   }
 
   @Override
@@ -64,5 +84,56 @@ public class MockStorageEngine extends AbstractStoragePlugin {
     return configuration;
   }
 
+  @Override
+  public boolean supportsRead() {
+    return true;
+  }
+
+//  public static class ImplicitTable extends DynamicDrillTable {
+//
+//    public ImplicitTable(StoragePlugin plugin, String storageEngineName,
+//        Object selection) {
+//      super(plugin, storageEngineName, selection);
+//    }
+//
+//  }
+
+  private static class MockSchema extends AbstractSchema {
 
+    private MockStorageEngine engine;
+
+    public MockSchema(MockStorageEngine engine) {
+      super(ImmutableList.<String>of(), MockStorageEngineConfig.NAME);
+      this.engine = engine;
+    }
+
+    @Override
+    public Table getTable(String name) {
+      Pattern p = Pattern.compile("(\\w+)_(\\d+)(k|m)?", Pattern.CASE_INSENSITIVE);
+      Matcher m = p.matcher(name);
+      if (! m.matches()) {
+        return null;
+      }
+      @SuppressWarnings("unused")
+      String baseName = m.group(1);
+      int n = Integer.parseInt(m.group(2));
+      String unit = m.group(3);
+      if (unit.equalsIgnoreCase("K")) { n *= 1000; }
+      else if (unit.equalsIgnoreCase("M")) { n *= 1_000_000; }
+      MockScanEntry entry = new MockScanEntry(n, null);
+      List<MockScanEntry> list = new ArrayList<>();
+      list.add(entry);
+      return new DynamicDrillTable(engine, this.name, list);
+    }
+
+    @Override
+    public Set<String> getTableNames() {
+      return new HashSet<>();
+    }
+
+    @Override
+    public String getTypeName() {
+      return MockStorageEngineConfig.NAME;
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorageEngineConfig.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorageEngineConfig.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorageEngineConfig.java
index 2f7ea18..f20ff45 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorageEngineConfig.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorageEngineConfig.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -24,13 +24,12 @@ import com.fasterxml.jackson.annotation.JsonProperty;
 import com.fasterxml.jackson.annotation.JsonTypeName;
 
 @JsonTypeName(MockStorageEngineConfig.NAME)
-public class MockStorageEngineConfig extends StoragePluginConfigBase{
-
-  static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(MockStorageEngineConfig.class);
+public class MockStorageEngineConfig extends StoragePluginConfigBase {
 
   private String url;
 
   public static final String NAME = "mock";
+  public static final MockStorageEngineConfig INSTANCE = new MockStorageEngineConfig("mock:///");
 
   @JsonCreator
   public MockStorageEngineConfig(@JsonProperty("url") String url) {
@@ -41,7 +40,6 @@ public class MockStorageEngineConfig extends StoragePluginConfigBase{
     return url;
   }
 
-
   @Override
   public boolean equals(Object o) {
     if (this == o) {
@@ -64,5 +62,4 @@ public class MockStorageEngineConfig extends StoragePluginConfigBase{
   public int hashCode() {
     return url != null ? url.hashCode() : 0;
   }
-
 }

http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorePOP.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorePOP.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorePOP.java
index 4c12d57..9fee5c7 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorePOP.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorePOP.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -39,6 +39,7 @@ public class MockStorePOP extends AbstractStore {
     super(child);
   }
 
+  @Override
   public int getMaxWidth() {
     return 1;
   }

http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockSubScanPOP.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockSubScanPOP.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockSubScanPOP.java
index 705452d..f169f51 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockSubScanPOP.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockSubScanPOP.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -18,7 +18,6 @@
 package org.apache.drill.exec.store.mock;
 
 import java.util.Iterator;
-import java.util.LinkedList;
 import java.util.List;
 
 import org.apache.drill.exec.physical.base.AbstractBase;
@@ -34,18 +33,42 @@ import com.fasterxml.jackson.annotation.JsonTypeName;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Iterators;
 
+/**
+ * Describes a physical scan operation for the mock data source. Each operator
+ * can, in general, give rise to one or more actual scans. For the mock data
+ * source, each sub-scan does exactly one (simulated) scan.
+ */
+
 @JsonTypeName("mock-sub-scan")
 public class MockSubScanPOP extends AbstractBase implements SubScan {
   static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(MockGroupScanPOP.class);
 
   private final String url;
   protected final List<MockGroupScanPOP.MockScanEntry> readEntries;
-//  private final OperatorCost cost;
-//  private final Size size;
-  private  LinkedList<MockGroupScanPOP.MockScanEntry>[] mappings;
+  private final boolean extended;
+
+  /**
+   * This constructor is called from Jackson and is designed to support both
+   * older physical plans and the newer ("extended") plans. Jackson will fill
+   * in a null value for the <tt>extended</tt> field for older plans; we use
+   * that null value to know that the plan is old, thus not extended. Newer
+   * plans simply provide the value.
+   *
+   * @param url
+   *          not used for the mock plan, appears to be a vestige of creating
+   *          this from a file-based plugin. Must keep it because older physical
+   *          plans contained a dummy URL value.
+   * @param extended
+   *          see above
+   * @param readEntries
+   *          a description of the columns to generate in a Jackson-serialized
+   *          form unique to the mock data source plugin.
+   */
 
   @JsonCreator
-  public MockSubScanPOP(@JsonProperty("url") String url, @JsonProperty("entries") List<MockGroupScanPOP.MockScanEntry> readEntries) {
+  public MockSubScanPOP(@JsonProperty("url") String url,
+                        @JsonProperty("extended") Boolean extended,
+                        @JsonProperty("entries") List<MockGroupScanPOP.MockScanEntry> readEntries) {
     this.readEntries = readEntries;
 //    OperatorCost cost = new OperatorCost(0,0,0,0);
 //    Size size = new Size(0,0);
@@ -56,11 +79,11 @@ public class MockSubScanPOP extends AbstractBase implements SubScan {
 //    this.cost = cost;
 //    this.size = size;
     this.url = url;
+    this.extended = extended == null ? false : extended;
   }
 
-  public String getUrl() {
-    return url;
-  }
+  public String getUrl() { return url; }
+  public boolean isExtended() { return extended; }
 
   @JsonProperty("entries")
   public List<MockGroupScanPOP.MockScanEntry> getReadEntries() {
@@ -88,7 +111,7 @@ public class MockSubScanPOP extends AbstractBase implements SubScan {
   @JsonIgnore
   public PhysicalOperator getNewWithChildren(List<PhysicalOperator> children) {
     Preconditions.checkArgument(children.isEmpty());
-    return new MockSubScanPOP(url, readEntries);
+    return new MockSubScanPOP(url, extended, readEntries);
 
   }
 
@@ -96,5 +119,4 @@ public class MockSubScanPOP extends AbstractBase implements SubScan {
   public int getOperatorType() {
     return CoreOperatorType.MOCK_SUB_SCAN_VALUE;
   }
-
 }

http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MoneyGen.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MoneyGen.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MoneyGen.java
new file mode 100644
index 0000000..d4e2379
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MoneyGen.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.mock;
+
+import java.util.Random;
+
+import org.apache.drill.exec.vector.Float8Vector;
+import org.apache.drill.exec.vector.ValueVector;
+
+/**
+ * Generates a mock money field as a double over the range 0
+ * to 1 million. Values include cents. That is the value
+ * ranges uniformly over the range 0.00 to
+ * 999,999.99.
+ */
+
+public class MoneyGen implements FieldGen {
+
+  private final Random rand = new Random();
+
+  @Override
+  public void setup(ColumnDef colDef) { }
+
+  private double value() {
+    return Math.ceil(rand.nextDouble() * 1_000_000 * 100) / 100;
+  }
+
+  @Override
+  public void setValue(ValueVector v, int index) {
+    Float8Vector vector = (Float8Vector) v;
+    vector.getMutator().set(index, value());
+  }
+}

http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/StringGen.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/StringGen.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/StringGen.java
new file mode 100644
index 0000000..72be10f
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/StringGen.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.mock;
+
+import java.util.Random;
+
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.VarCharVector;
+
+/**
+ * Generates a mock string field of the given length. Fields are composed
+ * of upper case letters uniformly distributed from A to Z, and repeated
+ * or the length of the field. Exampled for a 4-character field:
+ * DDDD, MMMM, AAAA, RRRR, ...
+ */
+
+public class StringGen implements FieldGen {
+
+  private final Random rand = new Random();
+  private int length;
+
+  @Override
+  public void setup(ColumnDef colDef) {
+    length = colDef.width;
+  }
+
+  private String value() {
+    String c = Character.toString((char) (rand.nextInt(26) + 'A'));
+    StringBuilder buf = new StringBuilder();
+    for (int i = 0; i < length; i++) {
+      buf.append(c);
+    }
+    return buf.toString();
+  }
+
+  @Override
+  public void setValue(ValueVector v, int index) {
+    VarCharVector vector = (VarCharVector) v;
+    vector.getMutator().setSafe(index, value().getBytes());
+  }
+}

http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/package-info.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/package-info.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/package-info.java
new file mode 100644
index 0000000..e99cfc5
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/package-info.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * Defines a mock data source which generates dummy test data for use
+ * in testing. The data source operates in two modes:
+ * <ul>
+ * <li><b>Classic:</b> used in physical plans in many unit tests.
+ * The plan specifies a set of columns; data is generated by the
+ * vectors themselves based on two alternating values.</li>
+ * <li><b>Enhanced:</b> available for use in newer unit tests.
+ * Enhances the physical plan description to allow specifying a data
+ * generator class (for various types, data formats, etc.) Also
+ * provides a data storage engine framework to allow using mock
+ * tables in SQL queries.</li>
+ * </ul>
+ * <h3>Classic Mode</h3>
+ * Create a scan operator that looks like the following (from
+ * <tt></tt>):
+ * <pre><code>
+ *    graph:[
+ *      {
+ *        {@literal @}id:1,
+ *        pop:"mock-scan",
+ *        url: "http://apache.org",
+ *        entries:[
+ *          {records: 1000000, types: [
+ *             {name: "blue", type: "INT", mode: "REQUIRED"},
+ *             {name: "green", type: "INT", mode: "REQUIRED"}
+ *        ]}
+ *      ]
+ *    }, ...
+ * </code></pre>
+ * Here:
+ * <ul>
+ * <li>The <tt>pop</tt> must be <tt>mock-scan</tt>.</li>
+ * <li>The <tt>url</tt> is unused.</li>
+ * <li>The <tt>entries</tt> section can have one or more entries. If
+ * more than one entry, the storage engine will enable parallel scans
+ * up to the number of entries, as though each entry was a different
+ * file or group.</li>
+ * <li>The entry <tt>name</tt> is arbitrary, though color names seem
+ * to be the traditional names used in Drill tests.</li>
+ * <li>The <tt>type</tt> is one of the supported Drill
+ * {@link MinorType} names.</li>
+ * <li>The <tt>mode</tt> is one of the supported Drill
+ * {@link DataMode} names: usually <tt>OPTIONAL</tt> or <tt>REQUIRED</tt>.</li>
+ * </ul>
+ *
+ * <h3>Enhanced Mode</h3>
+ * Enhanced builds on the Classic mode to add additional capabilities.
+ * Enhanced mode can be used either in a physical plan or in SQL. Data
+ * is randomly generated over a wide range of values and can be
+ * controlled by custom generator classes. When
+ * in a physical plan, the <tt>records</tt> section has additional
+ * attributes as described in {@link MockGroupScanPOP.MockColumn}:
+ * <ul>
+ * <li>The <tt>generator</tt> lets you specify a class to generate the
+ * sample data. Rules for the class name is that it can either contain
+ * a full package path, or just a class name. If just a class name, the
+ * class is assumed to reside in this package. For example, to generate
+ * an ISO date into a string, use <tt>DateGen</tt>. Additional generators
+ * can (and should) be added as the need arises.</li>
+ * <li>The <tt>repeat</tt> attribute lets you create a very wide row by
+ * repeating a column the specified number of times. Actual column names
+ * have a numeric suffix. For example, if the base name is "blue" and
+ * is repeated twice, actual columns are "blue1" and "blue2".</li>
+ * </ul>
+ * When used in SQL, use the <tt>mock</tt> name space as follows:
+ * <pre><code>
+ * SELECT id_i, name_s50 FROM `mock`.`employee_500`;
+ * </code></pre>
+ * Both the column names and table names encode information that specifies
+ * what data to generate.
+ * <p>
+ * Columns are of the form <tt><i>name</i>_<i>type</i><i>length</i>?</tt>.
+ * <ul>
+ * <li>The name is anything you want ("id" and "name" in the example.)</li>
+ * <li>The underscore is required to separate the type from the name.</li>
+ * <li>The type is one of "i" (integer), "d" (double) or "s" (string).
+ * Other types can be added as needed: n (decimal number), l (long), etc.</li>
+ * <li>The length is optional and is used only for string (<tt>VARCHAR</tt>)
+ * columns. The default string length is 10.</li>
+ * <li>Columns do not yet support nulls. When they do, the encoding will
+ * be "_n<i>percent</i>" where the percent specifies the percent of rows
+ * that should contain null values in this column.<l/i>
+ * <li>The column is known to SQL as its full name, that is "id_i" or
+ * "name_s50".</li>
+ * </ul>
+ * <p>
+ * Tables are of the form <tt><i>name</i>_<i>rows</i><i>unit<i>?</tt> where:
+ * <ul>
+ * <li>The name is anything you want. ("employee" in the example.)</li>
+ * <li>The underscore is required to separate the row count from the name.</li>
+ * <li>The row count specifies the number of rows to return.</li>
+ * <li>The count unit can be none, K (multiply count by 1000) or M
+ * (multiply row count by one million), case insensitive.</li>
+ * <li>Another field (not yet implemented) might specify the split count.</li>
+ * </ul>
+ * <h3>Data Generators</h3>
+ * The classic mode uses data generators built into each vector to generate
+ * the sample data. These generators use a very simple black/white alternating
+ * series of two values. Simple, but limited. The enhanced mode allows custom
+ * data generators. Unfortunately, this requires a separate generator class for
+ * each data type. As a result, we presently support just a few key data types.
+ * On the other hand, the custom generators do allow tests to specify a custom
+ * generator class to generate the kind of data needed for that test.
+ * <p>
+ * All data generators implement the {@link FieldGen} interface, and must have
+ * a non-argument constructor to allow dynamic instantiation. The mock data
+ * source either picks a default generator (if no <tt>generator</tt> is provided)
+ * or uses the custom generator specified in <tt>generator<tt>. Generators
+ * are independent (though one could, perhaps, write generators that correlate
+ * field values.)
+ */
+package org.apache.drill.exec.store.mock;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/test/java/org/apache/drill/exec/TestOpSerialization.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/TestOpSerialization.java b/exec/java-exec/src/test/java/org/apache/drill/exec/TestOpSerialization.java
index f4fe2da..7237183 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/TestOpSerialization.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/TestOpSerialization.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -47,7 +47,7 @@ public class TestOpSerialization {
   public void testSerializedDeserialize() throws Throwable {
     DrillConfig c = DrillConfig.create();
     PhysicalPlanReader reader = PhysicalPlanReaderTestFactory.defaultPhysicalPlanReader(c);
-    MockSubScanPOP s = new MockSubScanPOP("abc", null);
+    MockSubScanPOP s = new MockSubScanPOP("abc", false, null);
     s.setOperatorId(3);
     Filter f = new Filter(s, new ValueExpressions.BooleanExpression("true", ExpressionPosition.UNKNOWN), 0.1f);
     f.setOperatorId(2);

http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/test/java/org/apache/drill/exec/fn/interp/ExpressionInterpreterTest.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/fn/interp/ExpressionInterpreterTest.java b/exec/java-exec/src/test/java/org/apache/drill/exec/fn/interp/ExpressionInterpreterTest.java
index 722d45e..e191d35 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/fn/interp/ExpressionInterpreterTest.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/fn/interp/ExpressionInterpreterTest.java
@@ -1,4 +1,4 @@
-/*******************************************************************************
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -14,7 +14,7 @@
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
- ******************************************************************************/
+ */
 package org.apache.drill.exec.fn.interp;
 
 import static org.junit.Assert.assertEquals;
@@ -22,15 +22,10 @@ import static org.junit.Assert.assertEquals;
 import java.nio.ByteBuffer;
 import java.util.List;
 
-import org.antlr.runtime.ANTLRStringStream;
-import org.antlr.runtime.CommonTokenStream;
-import org.antlr.runtime.RecognitionException;
 import org.apache.drill.common.exceptions.DrillRuntimeException;
 import org.apache.drill.common.expression.ErrorCollector;
 import org.apache.drill.common.expression.ErrorCollectorImpl;
 import org.apache.drill.common.expression.LogicalExpression;
-import org.apache.drill.common.expression.parser.ExprLexer;
-import org.apache.drill.common.expression.parser.ExprParser;
 import org.apache.drill.common.types.TypeProtos;
 import org.apache.drill.common.types.Types;
 import org.apache.drill.common.util.DrillStringUtils;
@@ -154,7 +149,9 @@ public class ExpressionInterpreterTest  extends PopUnitTestBase {
   }
 
   protected void doTest(String expressionStr, String[] colNames, TypeProtos.MajorType[] colTypes, String[] expectFirstTwoValues, BitControl.PlanFragment planFragment) throws Exception {
+    @SuppressWarnings("resource")
     final RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet();
+    @SuppressWarnings("resource")
     final Drillbit bit1 = new Drillbit(CONFIG, serviceSet);
 
     bit1.run();
@@ -165,16 +162,18 @@ public class ExpressionInterpreterTest  extends PopUnitTestBase {
     final MockGroupScanPOP.MockColumn[] columns = new MockGroupScanPOP.MockColumn[colNames.length];
 
     for (int i = 0; i < colNames.length; i++ ) {
-      columns[i] = new MockGroupScanPOP.MockColumn(colNames[i], colTypes[i].getMinorType(), colTypes[i].getMode(),0,0,0);
+      columns[i] = new MockGroupScanPOP.MockColumn(colNames[i], colTypes[i].getMinorType(), colTypes[i].getMode(), 0, 0, 0, null, null);
     }
 
     final MockGroupScanPOP.MockScanEntry entry = new MockGroupScanPOP.MockScanEntry(10, columns);
-    final MockSubScanPOP scanPOP = new MockSubScanPOP("testTable", java.util.Collections.singletonList(entry));
+    final MockSubScanPOP scanPOP = new MockSubScanPOP("testTable", false, java.util.Collections.singletonList(entry));
 
+    @SuppressWarnings("resource")
     final ScanBatch batch = createMockScanBatch(bit1, scanPOP, planFragment);
 
     batch.next();
 
+    @SuppressWarnings("resource")
     final ValueVector vv = evalExprWithInterpreter(expressionStr, batch, bit1);
 
     // Verify the first 2 values in the output of evaluation.
@@ -190,6 +189,7 @@ public class ExpressionInterpreterTest  extends PopUnitTestBase {
     bit1.close();
   }
 
+  @SuppressWarnings("resource")
   private ScanBatch createMockScanBatch(Drillbit bit, MockSubScanPOP scanPOP, BitControl.PlanFragment planFragment) {
     final List<RecordBatch> children = Lists.newArrayList();
     final MockScanBatchCreator creator = new MockScanBatchCreator();

[2/4] drill git commit: DRILL-5105: comment out unecessary recursive buffer size check

Posted by pa...@apache.org.

DRILL-5105: comment out unecessary recursive buffer size check

This closes #715


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/77e50100
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/77e50100
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/77e50100

Branch: refs/heads/master
Commit: 77e501005bbfe1d5736feab78d8d6e2f495eb8a3
Parents: eef3b3f
Author: chunhui-shi <cs...@maprtech.com>
Authored: Tue Jan 3 17:39:49 2017 -0800
Committer: Parth Chandra <pa...@apache.org>
Committed: Fri Jan 13 17:44:40 2017 -0800

----------------------------------------------------------------------
 .../java/org/apache/drill/exec/vector/complex/MapVector.java  | 7 +++----
 .../apache/drill/exec/vector/complex/RepeatedMapVector.java   | 7 +++----
 2 files changed, 6 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/77e50100/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java
index e76e674..8447893 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java
@@ -134,10 +134,9 @@ public class MapVector extends AbstractMapVector {
 
   @Override
   public DrillBuf[] getBuffers(boolean clear) {
-    int expectedSize = getBufferSize();
-    int actualSize   = super.getBufferSize();
-
-    Preconditions.checkArgument(expectedSize == actualSize);
+    //int expectedSize = getBufferSize();
+    //int actualSize   = super.getBufferSize();
+    //Preconditions.checkArgument(expectedSize == actualSize);
     return super.getBuffers(clear);
   }
 

http://git-wip-us.apache.org/repos/asf/drill/blob/77e50100/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedMapVector.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedMapVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedMapVector.java
index 9839b2e..94cf4a6 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedMapVector.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedMapVector.java
@@ -413,10 +413,9 @@ public class RepeatedMapVector extends AbstractMapVector
 
   @Override
   public DrillBuf[] getBuffers(boolean clear) {
-    final int expectedBufferSize = getBufferSize();
-    final int actualBufferSize = super.getBufferSize();
-
-    Preconditions.checkArgument(expectedBufferSize == actualBufferSize + offsets.getBufferSize());
+    //final int expectedBufferSize = getBufferSize();
+    //final int actualBufferSize = super.getBufferSize();
+    //Preconditions.checkArgument(expectedBufferSize == actualBufferSize + offsets.getBufferSize());
     return ArrayUtils.addAll(offsets.getBuffers(clear), super.getBuffers(clear));
   }

[4/4] drill git commit: DRILL-4919: Fix select count(1) / count(*) on csv with header

Posted by pa...@apache.org.

DRILL-4919: Fix select count(1) / count(*) on csv with header

This closes #714


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/34969583
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/34969583
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/34969583

Branch: refs/heads/master
Commit: 34969583bfab410c80cb14a1c20249f097d5f7a7
Parents: 535623b
Author: Arina Ielchiieva <ar...@gmail.com>
Authored: Thu Dec 29 15:42:53 2016 +0000
Committer: Parth Chandra <pa...@apache.org>
Committed: Fri Jan 13 17:46:13 2017 -0800

----------------------------------------------------------------------
 .../compliant/CompliantTextRecordReader.java    | 18 +++++++++++++++-
 .../drill/exec/store/text/TestCsvHeader.java    | 22 ++++++++++++++++++--
 2 files changed, 37 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/34969583/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
index d324270..ac4abb9 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
@@ -17,6 +17,7 @@
  */
 package org.apache.drill.exec.store.easy.text.compliant;
 
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Maps;
 import com.univocity.parsers.common.TextParsingException;
 import io.netty.buffer.DrillBuf;
@@ -51,8 +52,12 @@ public class CompliantTextRecordReader extends AbstractRecordReader {
   static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(CompliantTextRecordReader.class);
 
   private static final int MAX_RECORDS_PER_BATCH = 8096;
-  static final int READ_BUFFER = 1024*1024;
+  private static final int READ_BUFFER = 1024*1024;
   private static final int WHITE_SPACE_BUFFER = 64*1024;
+  // When no named column is required, ask SCAN to return a DEFAULT column.
+  // If such column does not exist, it will be returned as a nullable-int column.
+  private static final List<SchemaPath> DEFAULT_NAMED_TEXT_COLS_TO_READ =
+      ImmutableList.of(SchemaPath.getSimplePath("_DEFAULT_COL_TO_READ_"));
 
   // settings to be used while parsing
   private TextParsingSettings settings;
@@ -89,8 +94,19 @@ public class CompliantTextRecordReader extends AbstractRecordReader {
     return super.isStarQuery();
   }
 
+  /**
+   * Returns list of default columns to read to replace empty list of columns.
+   * For text files without headers returns "columns[0]".
+   * Text files with headers do not support columns syntax,
+   * so when header extraction is enabled, returns fake named column "_DEFAULT_COL_TO_READ_".
+   *
+   * @return list of default columns to read
+   */
   @Override
   protected List<SchemaPath> getDefaultColumnsToRead() {
+    if (settings.isHeaderExtractionEnabled()) {
+      return DEFAULT_NAMED_TEXT_COLS_TO_READ;
+    }
     return DEFAULT_TEXT_COLS_TO_READ;
   }
 

http://git-wip-us.apache.org/repos/asf/drill/blob/34969583/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java
index a2e548b..cf54bb0 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java
@@ -17,6 +17,7 @@
  */
 package org.apache.drill.exec.store.text;
 
+import com.google.common.collect.Lists;
 import org.apache.drill.BaseTestQuery;
 import org.apache.drill.TestBuilder;
 import org.apache.drill.common.util.FileUtils;
@@ -24,14 +25,14 @@ import org.apache.drill.common.util.FileUtils;
 import java.io.BufferedOutputStream;
 import java.io.File;
 import java.io.FileOutputStream;
+import java.util.List;
 
 import org.junit.Before;
 import org.junit.Test;
 
 public class TestCsvHeader extends BaseTestQuery{
 
-  static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TestCsvHeader.class);
-  String root;
+  private String root;
 
   @Before
   public void initialize() throws Exception {
@@ -185,4 +186,21 @@ public class TestCsvHeader extends BaseTestQuery{
       }
       builder.go();
   }
+
+  @Test
+  public void testCountOnCsvWithHeader() throws Exception {
+    final String query = "select count(%s) as cnt from %s.`%s`";
+    final List<Object> options = Lists.<Object>newArrayList("*", 1, "'A'");
+
+    for (Object option : options) {
+      testBuilder()
+          .sqlQuery(query, option, TEMP_SCHEMA, root)
+          .unOrdered()
+          .baselineColumns("cnt")
+          .baselineValues(4L)
+          .build()
+          .run();
+    }
+  }
+
 }