You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by pa...@apache.org on 2017/01/14 01:47:44 UTC
[1/4] drill git commit: DRILL-4996: Parquet Date auto-correction is
not working in auto-partitioned parquet files generated by drill-1.6
Repository: drill
Updated Branches:
refs/heads/master ee399317a -> 34969583b
DRILL-4996: Parquet Date auto-correction is not working in auto-partitioned parquet files generated by drill-1.6
- Changed detection approach of corrupted date values for the case, when parquet files are generated by drill:
the corruption status is determined by looking at the min/max values in the metadata;
- Appropriate refactoring of TestCorruptParquetDateCorrection.
This closes #687
Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/eef3b3fb
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/eef3b3fb
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/eef3b3fb
Branch: refs/heads/master
Commit: eef3b3fb6f4e76e95510253d155d0659e387fc99
Parents: ee39931
Author: Vitalii Diravka <vi...@gmail.com>
Authored: Mon Dec 12 04:41:49 2016 +0000
Committer: Parth Chandra <pa...@apache.org>
Committed: Fri Jan 13 17:44:29 2017 -0800
----------------------------------------------------------------------
.../store/parquet/ParquetReaderUtility.java | 18 +-
.../TestCorruptParquetDateCorrection.java | 284 +++++++++----------
...t_dates_and_old_drill_parquet_writer.parquet | Bin 0 -> 4241 bytes
3 files changed, 140 insertions(+), 162 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/drill/blob/eef3b3fb/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetReaderUtility.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetReaderUtility.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetReaderUtility.java
index b22e666..a94e220 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetReaderUtility.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetReaderUtility.java
@@ -195,26 +195,26 @@ public class ParquetReaderUtility {
String createdBy = footer.getFileMetaData().getCreatedBy();
String drillVersion = footer.getFileMetaData().getKeyValueMetaData().get(ParquetRecordWriter.DRILL_VERSION_PROPERTY);
- String stringWriterVersion = footer.getFileMetaData().getKeyValueMetaData().get(ParquetRecordWriter.WRITER_VERSION_PROPERTY);
+ String writerVersionValue = footer.getFileMetaData().getKeyValueMetaData().get(ParquetRecordWriter.WRITER_VERSION_PROPERTY);
// This flag can be present in parquet files which were generated with 1.9.0-SNAPSHOT and 1.9.0 drill versions.
// If this flag is present it means that the version of the drill parquet writer is 2
final String isDateCorrectFlag = "is.date.correct";
String isDateCorrect = footer.getFileMetaData().getKeyValueMetaData().get(isDateCorrectFlag);
if (drillVersion != null) {
int writerVersion = 1;
- if (stringWriterVersion != null) {
- writerVersion = Integer.parseInt(stringWriterVersion);
+ if (writerVersionValue != null) {
+ writerVersion = Integer.parseInt(writerVersionValue);
}
else if (Boolean.valueOf(isDateCorrect)) {
writerVersion = DRILL_WRITER_VERSION_STD_DATE_FORMAT;
}
return writerVersion >= DRILL_WRITER_VERSION_STD_DATE_FORMAT ? DateCorruptionStatus.META_SHOWS_NO_CORRUPTION
- : DateCorruptionStatus.META_SHOWS_CORRUPTION;
+ // loop through parquet column metadata to find date columns, check for corrupt values
+ : checkForCorruptDateValuesInStatistics(footer, columns, autoCorrectCorruptDates);
} else {
// Possibly an old, un-migrated Drill file, check the column statistics to see if min/max values look corrupt
// only applies if there is a date column selected
if (createdBy == null || createdBy.equals("parquet-mr")) {
- // loop through parquet column metadata to find date columns, check for corrupt values
return checkForCorruptDateValuesInStatistics(footer, columns, autoCorrectCorruptDates);
} else {
// check the created by to see if it is a migrated Drill file
@@ -226,7 +226,7 @@ public class ParquetReaderUtility {
SemanticVersion semVer = parsedCreatedByVersion.getSemanticVersion();
String pre = semVer.pre + "";
if (semVer.major == 1 && semVer.minor == 8 && semVer.patch == 1 && pre.contains("drill")) {
- return DateCorruptionStatus.META_SHOWS_CORRUPTION;
+ return checkForCorruptDateValuesInStatistics(footer, columns, autoCorrectCorruptDates);
}
}
// written by a tool that wasn't Drill, the dates are not corrupted
@@ -244,9 +244,9 @@ public class ParquetReaderUtility {
* Detect corrupt date values by looking at the min/max values in the metadata.
*
* This should only be used when a file does not have enough metadata to determine if
- * the data was written with an older version of Drill, or an external tool. Drill
- * versions 1.3 and beyond should have enough metadata to confirm that the data was written
- * by Drill.
+ * the data was written with an external tool or an older version of Drill
+ * ({@link org.apache.drill.exec.store.parquet.ParquetRecordWriter#WRITER_VERSION_PROPERTY} <
+ * {@link org.apache.drill.exec.store.parquet.ParquetReaderUtility#DRILL_WRITER_VERSION_STD_DATE_FORMAT})
*
* This method only checks the first Row Group, because Drill has only ever written
* a single Row Group per file.
http://git-wip-us.apache.org/repos/asf/drill/blob/eef3b3fb/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestCorruptParquetDateCorrection.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestCorruptParquetDateCorrection.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestCorruptParquetDateCorrection.java
index 0ab247d..8cd1a85 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestCorruptParquetDateCorrection.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestCorruptParquetDateCorrection.java
@@ -17,6 +17,8 @@
*/
package org.apache.drill.exec.physical.impl.writer;
+import static java.lang.String.format;
+
import org.apache.drill.PlanTestBase;
import org.apache.drill.TestBuilder;
import org.apache.drill.common.util.TestTools;
@@ -37,10 +39,11 @@ import java.util.regex.Pattern;
* Tests for compatibility reading old parquet files after date corruption
* issue was fixed in DRILL-4203.
*
- * Drill was writing non-standard dates into parquet files for all releases
- * before 1.9.0. The values have been read by Drill correctly by Drill, but
- * external tools like Spark reading the files will see corrupted values for
- * all dates that have been written by Drill.
+ * Drill could write non-standard dates into parquet files. This issue is related to
+ * all drill releases where {@link org.apache.drill.exec.store.parquet.ParquetRecordWriter#WRITER_VERSION_PROPERTY} <
+ * {@link org.apache.drill.exec.store.parquet.ParquetReaderUtility#DRILL_WRITER_VERSION_STD_DATE_FORMAT}
+ * The values have been read correctly by Drill, but external tools like Spark reading the files will see
+ * corrupted values for all dates that have been written by Drill.
*
* This change corrects the behavior of the Drill parquet writer to correctly
* store dates in the format given in the parquet specification.
@@ -59,8 +62,7 @@ import java.util.regex.Pattern;
* While the old behavior was a consistent shift into an unlikely range
* to be used in a modern database (over 10,000 years in the future), these are still
* valid date values. In the case where these may have been written into
- * files intentionally, and we cannot be certain from the metadata if Drill
- * produced the files, an option is included to turn off the auto-correction.
+ * files intentionally, an option is included to turn off the auto-correction.
* Use of this option is assumed to be extremely unlikely, but it is included
* for completeness.
*/
@@ -76,11 +78,8 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
// - one from the 0.6 version of Drill, before files had min/max statistics
// - detecting corrupt values must be deferred to actual data page reading
// - one from 1.4, where there is a proper created-by, but the corruption is present
- private static final String MIXED_CORRUPTED_AND_CORRECTED_DATES_PATH =
+ private static final String MIXED_CORRUPTED_AND_CORRECT_DATES_PATH =
"[WORKING_PATH]/src/test/resources/parquet/4203_corrupt_dates/mixed_drill_versions";
- // partitioned with 1.4.0, date values are known to be corrupt
- private static final String CORRUPTED_PARTITIONED_DATES_1_4_0_PATH =
- "[WORKING_PATH]/src/test/resources/parquet/4203_corrupt_dates/partitioned_with_corruption_4203";
// partitioned with 1.2.0, no certain metadata that these were written with Drill
// the value will be checked to see that they look corrupt and they will be corrected
// by default. Users can use the format plugin option autoCorrectCorruptDates to disable
@@ -88,9 +87,13 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
// in the similar range as Drill's corrupt values
private static final String CORRUPTED_PARTITIONED_DATES_1_2_PATH =
"[WORKING_PATH]/src/test/resources/parquet/4203_corrupt_dates/partitioned_with_corruption_4203_1_2";
+ // partitioned with 1.4.0, no certain metadata regarding the date corruption status.
+ // The same detection approach of the corrupt date values as for the files partitioned with 1.2.0
+ private static final String CORRUPTED_PARTITIONED_DATES_1_4_0_PATH =
+ "[WORKING_PATH]/src/test/resources/parquet/4203_corrupt_dates/partitioned_with_corruption_4203";
private static final String PARQUET_DATE_FILE_WITH_NULL_FILLED_COLS =
"[WORKING_PATH]/src/test/resources/parquet/4203_corrupt_dates/null_date_cols_with_corruption_4203.parquet";
- private static final String CORRECTED_PARTITIONED_DATES_1_9_PATH =
+ private static final String CORRECT_PARTITIONED_DATES_1_9_PATH =
"[WORKING_PATH]/src/test/resources/parquet/4203_corrupt_dates/1_9_0_partitioned_no_corruption";
private static final String VARCHAR_PARTITIONED =
"[WORKING_PATH]/src/test/resources/parquet/4203_corrupt_dates/fewtypes_varcharpartition";
@@ -98,11 +101,13 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
"[WORKING_PATH]/src/test/resources/parquet/4203_corrupt_dates/fewtypes_datepartition";
private static final String EXCEPTION_WHILE_PARSING_CREATED_BY_META =
"[WORKING_PATH]/src/test/resources/parquet/4203_corrupt_dates/hive1dot2_fewtypes_null";
+ private static final String CORRECT_DATES_1_6_0_PATH =
+ "[WORKING_PATH]/src/test/resources/parquet/4203_corrupt_dates/correct_dates_and_old_drill_parquet_writer.parquet";
+ private static final String PARTITIONED_1_2_FOLDER = "partitioned_with_corruption_4203_1_2";
+ private static final String MIXED_CORRUPTED_AND_CORRECT_PARTITIONED_FOLDER = "mixed_partitioned";
private static FileSystem fs;
private static Path path;
- static String PARTITIONED_1_2_FOLDER = "partitioned_with_corruption_4203_1_2";
- static String MIXED_CORRUPTED_AND_CORRECTED_PARTITIONED_FOLDER = "mixed_partitioned";
@BeforeClass
public static void initFs() throws Exception {
@@ -116,9 +121,9 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
copyDirectoryIntoTempSpace(CORRUPTED_PARTITIONED_DATES_1_2_PATH);
copyMetaDataCacheToTempReplacingInternalPaths("parquet/4203_corrupt_dates/drill.parquet.metadata_1_2.requires_replace.txt",
PARTITIONED_1_2_FOLDER);
- copyDirectoryIntoTempSpace(CORRUPTED_PARTITIONED_DATES_1_2_PATH, MIXED_CORRUPTED_AND_CORRECTED_PARTITIONED_FOLDER);
- copyDirectoryIntoTempSpace(CORRECTED_PARTITIONED_DATES_1_9_PATH, MIXED_CORRUPTED_AND_CORRECTED_PARTITIONED_FOLDER);
- copyDirectoryIntoTempSpace(CORRUPTED_PARTITIONED_DATES_1_4_0_PATH, MIXED_CORRUPTED_AND_CORRECTED_PARTITIONED_FOLDER);
+ copyDirectoryIntoTempSpace(CORRUPTED_PARTITIONED_DATES_1_2_PATH, MIXED_CORRUPTED_AND_CORRECT_PARTITIONED_FOLDER);
+ copyDirectoryIntoTempSpace(CORRECT_PARTITIONED_DATES_1_9_PATH, MIXED_CORRUPTED_AND_CORRECT_PARTITIONED_FOLDER);
+ copyDirectoryIntoTempSpace(CORRUPTED_PARTITIONED_DATES_1_4_0_PATH, MIXED_CORRUPTED_AND_CORRECT_PARTITIONED_FOLDER);
}
/**
@@ -128,20 +133,20 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
* in the case where we are certain correction is NOT needed. For more info see DRILL-4203.
*/
@Test
- public void testReadPartitionedOnCorrectedDates() throws Exception {
+ public void testReadPartitionedOnCorrectDates() throws Exception {
try {
for (String selection : new String[]{"*", "date_col"}) {
// for sanity, try reading all partitions without a filter
TestBuilder builder = testBuilder()
- .sqlQuery("select " + selection + " from table(dfs.`" + CORRECTED_PARTITIONED_DATES_1_9_PATH + "`" +
- "(type => 'parquet', autoCorrectCorruptDates => false))")
+ .sqlQuery("select %s from table(dfs.`%s` (type => 'parquet', autoCorrectCorruptDates => false))",
+ selection, CORRECT_PARTITIONED_DATES_1_9_PATH)
.unOrdered()
.baselineColumns("date_col");
- addDateBaselineVals(builder);
+ addDateBaselineValues(builder);
builder.go();
- String query = "select " + selection + " from table(dfs.`" + CORRECTED_PARTITIONED_DATES_1_9_PATH + "` " +
- "(type => 'parquet', autoCorrectCorruptDates => false))" + " where date_col = date '1970-01-01'";
+ String query = format("select %s from table(dfs.`%s` (type => 'parquet', autoCorrectCorruptDates => false))" +
+ " where date_col = date '1970-01-01'", selection, CORRECT_PARTITIONED_DATES_1_9_PATH);
// verify that pruning is actually taking place
testPlanMatchingPatterns(query, new String[]{"numFiles=1"}, null);
@@ -161,9 +166,7 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
@Test
public void testVarcharPartitionedReadWithCorruption() throws Exception {
testBuilder()
- .sqlQuery("select date_col from " +
- "dfs.`" + VARCHAR_PARTITIONED + "`" +
- "where length(varchar_col) = 12")
+ .sqlQuery("select date_col from dfs.`%s` where length(varchar_col) = 12", VARCHAR_PARTITIONED)
.baselineColumns("date_col")
.unOrdered()
.baselineValues(new DateTime(2039, 4, 9, 0, 0))
@@ -174,24 +177,21 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
@Test
public void testDatePartitionedReadWithCorruption() throws Exception {
testBuilder()
- .sqlQuery("select date_col from " +
- "dfs.`" + DATE_PARTITIONED + "`" +
- "where date_col = '1999-04-08'")
+ .sqlQuery("select date_col from dfs.`%s` where date_col = '1999-04-08'", DATE_PARTITIONED)
.baselineColumns("date_col")
.unOrdered()
.baselineValues(new DateTime(1999, 4, 8, 0, 0))
.go();
- String sql = "select date_col from dfs.`" + DATE_PARTITIONED + "` where date_col > '1999-04-08'";
- testPlanMatchingPatterns(sql, new String[]{"numFiles=6"}, null);
+ String query = format("select date_col from dfs.`%s` where date_col > '1999-04-08'", DATE_PARTITIONED);
+ testPlanMatchingPatterns(query, new String[]{"numFiles=6"}, null);
}
@Test
public void testCorrectDatesAndExceptionWhileParsingCreatedBy() throws Exception {
testBuilder()
- .sqlQuery("select date_col from " +
- "dfs.`" + EXCEPTION_WHILE_PARSING_CREATED_BY_META +
- "` where to_date(date_col, 'yyyy-mm-dd') < '1997-01-02'")
+ .sqlQuery("select date_col from dfs.`%s` where to_date(date_col, 'yyyy-mm-dd') < '1997-01-02'",
+ EXCEPTION_WHILE_PARSING_CREATED_BY_META)
.baselineColumns("date_col")
.unOrdered()
.baselineValues(new DateTime(1996, 1, 29, 0, 0))
@@ -201,68 +201,34 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
.go();
}
- /**
- * Test reading a directory full of partitioned parquet files with dates, these files have a drill version
- * number of 1.4.0 in their footers, so we can be certain they are corrupt. The option to disable the
- * correction is passed, but it will not change the result in the case where we are certain correction
- * is needed. For more info see DRILL-4203.
- */
- @Test
- public void testReadPartitionedOnCorruptedDates() throws Exception {
- try {
- for (String selection : new String[]{"*", "date_col"}) {
- // for sanity, try reading all partitions without a filter
- TestBuilder builder = testBuilder()
- .sqlQuery("select " + selection + " from table(dfs.`" + CORRUPTED_PARTITIONED_DATES_1_4_0_PATH + "`" +
- "(type => 'parquet', autoCorrectCorruptDates => false))")
- .unOrdered()
- .baselineColumns("date_col");
- addDateBaselineVals(builder);
- builder.go();
-
- String query = "select " + selection + " from table(dfs.`" + CORRUPTED_PARTITIONED_DATES_1_4_0_PATH + "` " +
- "(type => 'parquet', autoCorrectCorruptDates => false))" + " where date_col = date '1970-01-01'";
- // verify that pruning is actually taking place
- testPlanMatchingPatterns(query, new String[]{"numFiles=1"}, null);
-
- // read with a filter on the partition column
- testBuilder()
- .sqlQuery(query)
- .unOrdered()
- .baselineColumns("date_col")
- .baselineValues(new DateTime(1970, 1, 1, 0, 0))
- .go();
- }
- } finally {
- test("alter session reset all");
- }
- }
@Test
public void testReadPartitionedOnCorruptedDates_UserDisabledCorrection() throws Exception {
try {
for (String selection : new String[]{"*", "date_col"}) {
- // for sanity, try reading all partitions without a filter
- TestBuilder builder = testBuilder()
- .sqlQuery("select " + selection + " from table(dfs.`" + CORRUPTED_PARTITIONED_DATES_1_2_PATH + "`" +
- "(type => 'parquet', autoCorrectCorruptDates => false))")
- .unOrdered()
- .baselineColumns("date_col");
- addCorruptedDateBaselineVals(builder);
- builder.go();
-
- String query = "select " + selection + " from table(dfs.`" + CORRUPTED_PARTITIONED_DATES_1_2_PATH + "` " +
- "(type => 'parquet', autoCorrectCorruptDates => false))" + " where date_col = cast('15334-03-17' as date)";
- // verify that pruning is actually taking place
- testPlanMatchingPatterns(query, new String[]{"numFiles=1"}, null);
-
- // read with a filter on the partition column
- testBuilder()
- .sqlQuery(query)
- .unOrdered()
- .baselineColumns("date_col")
- .baselineValues(new DateTime(15334, 03, 17, 0, 0))
- .go();
+ for (String table : new String[]{CORRUPTED_PARTITIONED_DATES_1_2_PATH, CORRUPTED_PARTITIONED_DATES_1_4_0_PATH}) {
+ // for sanity, try reading all partitions without a filter
+ TestBuilder builder = testBuilder()
+ .sqlQuery("select %s from table(dfs.`%s` (type => 'parquet', autoCorrectCorruptDates => false))",
+ selection, table)
+ .unOrdered()
+ .baselineColumns("date_col");
+ addCorruptedDateBaselineValues(builder);
+ builder.go();
+
+ String query = format("select %s from table(dfs.`%s` (type => 'parquet', " +
+ "autoCorrectCorruptDates => false)) where date_col = cast('15334-03-17' as date)", selection, table);
+ // verify that pruning is actually taking place
+ testPlanMatchingPatterns(query, new String[]{"numFiles=1"}, null);
+
+ // read with a filter on the partition column
+ testBuilder()
+ .sqlQuery(query)
+ .unOrdered()
+ .baselineColumns("date_col")
+ .baselineValues(new DateTime(15334, 3, 17, 0, 0))
+ .go();
+ }
}
} finally {
test("alter session reset all");
@@ -270,29 +236,31 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
}
@Test
- public void testCorruptValDetectionDuringPruning() throws Exception {
+ public void testCorruptValueDetectionDuringPruning() throws Exception {
try {
for (String selection : new String[]{"*", "date_col"}) {
- // for sanity, try reading all partitions without a filter
- TestBuilder builder = testBuilder()
- .sqlQuery("select " + selection + " from dfs.`" + CORRUPTED_PARTITIONED_DATES_1_2_PATH + "`")
- .unOrdered()
- .baselineColumns("date_col");
- addDateBaselineVals(builder);
- builder.go();
-
- String query = "select " + selection + " from dfs.`" + CORRUPTED_PARTITIONED_DATES_1_2_PATH + "`" +
- " where date_col = date '1970-01-01'";
- // verify that pruning is actually taking place
- testPlanMatchingPatterns(query, new String[]{"numFiles=1"}, null);
-
- // read with a filter on the partition column
- testBuilder()
- .sqlQuery(query)
- .unOrdered()
- .baselineColumns("date_col")
- .baselineValues(new DateTime(1970, 1, 1, 0, 0))
- .go();
+ for (String table : new String[]{CORRUPTED_PARTITIONED_DATES_1_2_PATH, CORRUPTED_PARTITIONED_DATES_1_4_0_PATH}) {
+ // for sanity, try reading all partitions without a filter
+ TestBuilder builder = testBuilder()
+ .sqlQuery("select %s from dfs.`%s`", selection, table)
+ .unOrdered()
+ .baselineColumns("date_col");
+ addDateBaselineValues(builder);
+ builder.go();
+
+ String query = format("select %s from dfs.`%s`" +
+ " where date_col = date '1970-01-01'", selection, table);
+ // verify that pruning is actually taking place
+ testPlanMatchingPatterns(query, new String[]{"numFiles=1"}, null);
+
+ // read with a filter on the partition column
+ testBuilder()
+ .sqlQuery(query)
+ .unOrdered()
+ .baselineColumns("date_col")
+ .baselineValues(new DateTime(1970, 1, 1, 0, 0))
+ .go();
+ }
}
} finally {
test("alter session reset all");
@@ -313,8 +281,8 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
@Test
public void testReadCorruptDatesWithNullFilledColumns() throws Exception {
testBuilder()
- .sqlQuery("select null_dates_1, null_dates_2, non_existent_field, date_col from dfs.`" +
- PARQUET_DATE_FILE_WITH_NULL_FILLED_COLS + "`")
+ .sqlQuery("select null_dates_1, null_dates_2, non_existent_field, date_col from dfs.`%s`",
+ PARQUET_DATE_FILE_WITH_NULL_FILLED_COLS)
.unOrdered()
.baselineColumns("null_dates_1", "null_dates_2", "non_existent_field", "date_col")
.baselineValues(null, null, null, new DateTime(1970, 1, 1, 0, 0))
@@ -332,7 +300,7 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
readFilesWithUserDisabledAutoCorrection();
try {
- test(String.format("alter session set %s = true", ExecConstants.PARQUET_NEW_RECORD_READER));
+ test("alter session set %s = true", ExecConstants.PARQUET_NEW_RECORD_READER);
// read all of the types with the complex reader
readFilesWithUserDisabledAutoCorrection();
} finally {
@@ -352,34 +320,34 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
@Test
public void testReadMixedOldAndNewBothReaders() throws Exception {
/// read once with the flat reader
- readMixedCorruptedAndCorrectedDates();
+ readMixedCorruptedAndCorrectDates();
try {
// read all of the types with the complex reader
- test(String.format("alter session set %s = true", ExecConstants.PARQUET_NEW_RECORD_READER));
- readMixedCorruptedAndCorrectedDates();
+ test("alter session set %s = true", ExecConstants.PARQUET_NEW_RECORD_READER);
+ readMixedCorruptedAndCorrectDates();
} finally {
- test(String.format("alter session set %s = false", ExecConstants.PARQUET_NEW_RECORD_READER));
+ test("alter session set %s = false", ExecConstants.PARQUET_NEW_RECORD_READER);
}
}
@Test
public void testReadOldMetadataCacheFile() throws Exception {
// for sanity, try reading all partitions without a filter
- String query = "select date_col from dfs.`" + new Path(path, PARTITIONED_1_2_FOLDER) + "`";
+ String query = format("select date_col from dfs.`%s`", new Path(path, PARTITIONED_1_2_FOLDER));
TestBuilder builder = testBuilder()
.sqlQuery(query)
.unOrdered()
.baselineColumns("date_col");
- addDateBaselineVals(builder);
+ addDateBaselineValues(builder);
builder.go();
testPlanMatchingPatterns(query, new String[]{"usedMetadataFile=true"}, null);
}
@Test
public void testReadOldMetadataCacheFileWithPruning() throws Exception {
- String query = "select date_col from dfs.`" + new Path(path, PARTITIONED_1_2_FOLDER) + "`" +
- " where date_col = date '1970-01-01'";
+ String query = format("select date_col from dfs.`%s` where date_col = date '1970-01-01'",
+ new Path(path, PARTITIONED_1_2_FOLDER));
// verify that pruning is actually taking place
testPlanMatchingPatterns(query, new String[]{"numFiles=1", "usedMetadataFile=true"}, null);
@@ -396,15 +364,16 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
public void testReadOldMetadataCacheFileOverrideCorrection() throws Exception {
// for sanity, try reading all partitions without a filter
TestBuilder builder = testBuilder()
- .sqlQuery("select date_col from table(dfs.`" + new Path(path, PARTITIONED_1_2_FOLDER) + "`" +
- "(type => 'parquet', autoCorrectCorruptDates => false))")
+ .sqlQuery("select date_col from table(dfs.`%s` (type => 'parquet', autoCorrectCorruptDates => false))",
+ new Path(path, PARTITIONED_1_2_FOLDER))
.unOrdered()
.baselineColumns("date_col");
- addCorruptedDateBaselineVals(builder);
+ addCorruptedDateBaselineValues(builder);
builder.go();
- String query = "select date_col from table(dfs.`" + new Path(path, PARTITIONED_1_2_FOLDER) + "` " +
- "(type => 'parquet', autoCorrectCorruptDates => false))" + " where date_col = cast('15334-03-17' as date)";
+ String query = format("select date_col from table(dfs.`%s` (type => 'parquet', " +
+ "autoCorrectCorruptDates => false)) where date_col = cast('15334-03-17' as date)",
+ new Path(path, PARTITIONED_1_2_FOLDER));
// verify that pruning is actually taking place
testPlanMatchingPatterns(query, new String[]{"numFiles=1", "usedMetadataFile=true"}, null);
@@ -413,27 +382,26 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
.sqlQuery(query)
.unOrdered()
.baselineColumns("date_col")
- .baselineValues(new DateTime(15334, 03, 17, 0, 0))
+ .baselineValues(new DateTime(15334, 3, 17, 0, 0))
.go();
}
@Test
public void testReadNewMetadataCacheFileOverOldAndNewFiles() throws Exception {
- String table = "dfs.`" + new Path(path, MIXED_CORRUPTED_AND_CORRECTED_PARTITIONED_FOLDER) + "`";
+ String table = format("dfs.`%s`", new Path(path, MIXED_CORRUPTED_AND_CORRECT_PARTITIONED_FOLDER));
copyMetaDataCacheToTempReplacingInternalPaths("parquet/4203_corrupt_dates/" +
- "mixed_version_partitioned_metadata.requires_replace.txt", MIXED_CORRUPTED_AND_CORRECTED_PARTITIONED_FOLDER);
+ "mixed_version_partitioned_metadata.requires_replace.txt", MIXED_CORRUPTED_AND_CORRECT_PARTITIONED_FOLDER);
// for sanity, try reading all partitions without a filter
TestBuilder builder = testBuilder()
.sqlQuery("select date_col from " + table)
.unOrdered()
.baselineColumns("date_col");
- addDateBaselineVals(builder);
- addDateBaselineVals(builder);
- addDateBaselineVals(builder);
+ addDateBaselineValues(builder);
+ addDateBaselineValues(builder);
+ addDateBaselineValues(builder);
builder.go();
- String query = "select date_col from " + table +
- " where date_col = date '1970-01-01'";
+ String query = format("select date_col from %s where date_col = date '1970-01-01'", table);
// verify that pruning is actually taking place
testPlanMatchingPatterns(query, new String[]{"numFiles=3", "usedMetadataFile=true"}, null);
@@ -448,28 +416,38 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
.go();
}
+ @Test
+ public void testCorrectDateValuesGeneratedByOldVersionOfDrill() throws Exception {
+ testBuilder()
+ .sqlQuery("select i_rec_end_date from dfs.`%s` limit 1", CORRECT_DATES_1_6_0_PATH)
+ .baselineColumns("i_rec_end_date")
+ .unOrdered()
+ .baselineValues(new DateTime(2000, 10, 26, 0, 0))
+ .go();
+ }
+
/**
* Read a directory with parquet files where some have corrupted dates, see DRILL-4203.
* @throws Exception
*/
- private void readMixedCorruptedAndCorrectedDates() throws Exception {
+ private void readMixedCorruptedAndCorrectDates() throws Exception {
// ensure that selecting the date column explicitly or as part of a star still results
// in checking the file metadata for date columns (when we need to check the statistics
// for bad values) to set the flag that the values are corrupt
for (String selection : new String[] {"*", "date_col"}) {
TestBuilder builder = testBuilder()
- .sqlQuery("select " + selection + " from dfs.`" + MIXED_CORRUPTED_AND_CORRECTED_DATES_PATH + "`")
+ .sqlQuery("select %s from dfs.`%s`", selection, MIXED_CORRUPTED_AND_CORRECT_DATES_PATH)
.unOrdered()
.baselineColumns("date_col");
for (int i = 0; i < 4; i++) {
- addDateBaselineVals(builder);
+ addDateBaselineValues(builder);
}
builder.go();
}
}
- private void addDateBaselineVals(TestBuilder builder) {
+ private void addDateBaselineValues(TestBuilder builder) {
builder
.baselineValues(new DateTime(1970, 1, 1, 0, 0))
.baselineValues(new DateTime(1970, 1, 2, 0, 0))
@@ -480,16 +458,16 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
}
/**
- * These are the same values added in the addDateBaselineVals, shifted as corrupt values
+ * These are the same values added in the addDateBaselineValues, shifted as corrupt values
*/
- private void addCorruptedDateBaselineVals(TestBuilder builder) {
+ private void addCorruptedDateBaselineValues(TestBuilder builder) {
builder
- .baselineValues(new DateTime(15334, 03, 17, 0, 0))
- .baselineValues(new DateTime(15334, 03, 18, 0, 0))
- .baselineValues(new DateTime(15334, 03, 15, 0, 0))
- .baselineValues(new DateTime(15334, 03, 16, 0, 0))
- .baselineValues(new DateTime(15264, 03, 16, 0, 0))
- .baselineValues(new DateTime(15379, 03, 17, 0, 0));
+ .baselineValues(new DateTime(15334, 3, 17, 0, 0))
+ .baselineValues(new DateTime(15334, 3, 18, 0, 0))
+ .baselineValues(new DateTime(15334, 3, 15, 0, 0))
+ .baselineValues(new DateTime(15334, 3, 16, 0, 0))
+ .baselineValues(new DateTime(15264, 3, 16, 0, 0))
+ .baselineValues(new DateTime(15379, 3, 17, 0, 0));
}
private void readFilesWithUserDisabledAutoCorrection() throws Exception {
@@ -498,14 +476,14 @@ public class TestCorruptParquetDateCorrection extends PlanTestBase {
// for bad values) to set the flag that the values are corrupt
for (String selection : new String[] {"*", "date_col"}) {
TestBuilder builder = testBuilder()
- .sqlQuery("select " + selection + " from table(dfs.`" + MIXED_CORRUPTED_AND_CORRECTED_DATES_PATH + "`" +
- "(type => 'parquet', autoCorrectCorruptDates => false))")
+ .sqlQuery("select %s from table(dfs.`%s` (type => 'parquet', autoCorrectCorruptDates => false))",
+ selection, MIXED_CORRUPTED_AND_CORRECT_DATES_PATH)
.unOrdered()
.baselineColumns("date_col");
- addDateBaselineVals(builder);
- addDateBaselineVals(builder);
- addCorruptedDateBaselineVals(builder);
- addCorruptedDateBaselineVals(builder);
+ addDateBaselineValues(builder);
+ addCorruptedDateBaselineValues(builder);
+ addCorruptedDateBaselineValues(builder);
+ addCorruptedDateBaselineValues(builder);
builder.go();
}
}
http://git-wip-us.apache.org/repos/asf/drill/blob/eef3b3fb/exec/java-exec/src/test/resources/parquet/4203_corrupt_dates/correct_dates_and_old_drill_parquet_writer.parquet
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/parquet/4203_corrupt_dates/correct_dates_and_old_drill_parquet_writer.parquet b/exec/java-exec/src/test/resources/parquet/4203_corrupt_dates/correct_dates_and_old_drill_parquet_writer.parquet
new file mode 100644
index 0000000..6d81db0
Binary files /dev/null and b/exec/java-exec/src/test/resources/parquet/4203_corrupt_dates/correct_dates_and_old_drill_parquet_writer.parquet differ
[3/4] drill git commit: DRILL-5152: Enhance the mock data source:
better data, SQL access
Posted by pa...@apache.org.
DRILL-5152: Enhance the mock data source: better data, SQL access
Provides an enhanced version of the mock data source. See the JIRA
entry for motivation, package-info.java for details of operation.
Revisions suggested by code review
Also includes additional comments and a few more compiler warning
cleanups.
This closes #708
Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/535623bb
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/535623bb
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/535623bb
Branch: refs/heads/master
Commit: 535623bb8fdf81220164134d366817f57e560710
Parents: 77e5010
Author: Paul Rogers <pr...@maprtech.com>
Authored: Wed Dec 21 21:47:20 2016 -0800
Committer: Parth Chandra <pa...@apache.org>
Committed: Fri Jan 13 17:45:46 2017 -0800
----------------------------------------------------------------------
.../drill/exec/record/AbstractRecordBatch.java | 2 +-
.../drill/exec/store/AbstractRecordReader.java | 2 +-
.../apache/drill/exec/store/StoragePlugin.java | 5 +-
.../drill/exec/store/StoragePluginRegistry.java | 4 +-
.../exec/store/StoragePluginRegistryImpl.java | 29 ++-
.../apache/drill/exec/store/mock/ColumnDef.java | 178 ++++++++++++++++
.../apache/drill/exec/store/mock/DateGen.java | 69 +++++++
.../apache/drill/exec/store/mock/DoubleGen.java | 48 +++++
.../store/mock/ExtendedMockRecordReader.java | 156 ++++++++++++++
.../apache/drill/exec/store/mock/FieldGen.java | 37 ++++
.../apache/drill/exec/store/mock/IntGen.java | 47 +++++
.../drill/exec/store/mock/MockGroupScanPOP.java | 205 ++++++++++++++++---
.../drill/exec/store/mock/MockRecordReader.java | 8 +-
.../exec/store/mock/MockScanBatchCreator.java | 8 +-
.../exec/store/mock/MockStorageEngine.java | 79 ++++++-
.../store/mock/MockStorageEngineConfig.java | 9 +-
.../drill/exec/store/mock/MockStorePOP.java | 3 +-
.../drill/exec/store/mock/MockSubScanPOP.java | 44 +++-
.../apache/drill/exec/store/mock/MoneyGen.java | 48 +++++
.../apache/drill/exec/store/mock/StringGen.java | 56 +++++
.../drill/exec/store/mock/package-info.java | 130 ++++++++++++
.../apache/drill/exec/TestOpSerialization.java | 4 +-
.../fn/interp/ExpressionInterpreterTest.java | 18 +-
23 files changed, 1108 insertions(+), 81 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/record/AbstractRecordBatch.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/AbstractRecordBatch.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/AbstractRecordBatch.java
index 998665c..d82c154 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/record/AbstractRecordBatch.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/AbstractRecordBatch.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/AbstractRecordReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/AbstractRecordReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/AbstractRecordReader.java
index 16118d9..2152025 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/AbstractRecordReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/AbstractRecordReader.java
@@ -67,7 +67,7 @@ public abstract class AbstractRecordReader implements RecordReader {
Collection<SchemaPath> columnsToRead = projected;
// If no column is required (SkipQuery), by default it will use DEFAULT_COLS_TO_READ .
- // Handling SkipQuery is storage-plugin specif : JSON, text reader, parquet will override, in order to
+ // Handling SkipQuery is storage-plugin specific : JSON, text reader, parquet will override, in order to
// improve query performance.
if (projected.isEmpty()) {
columnsToRead = getDefaultColumnsToRead();
http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePlugin.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePlugin.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePlugin.java
index 112bc15..2969d4f 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePlugin.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePlugin.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -71,7 +71,7 @@ public interface StoragePlugin extends SchemaFactory, AutoCloseable {
public AbstractGroupScan getPhysicalScan(String userName, JSONOptions selection, List<SchemaPath> columns)
throws IOException;
- /** Method returns a jackson serializable object that extends a StoragePluginConfig
+ /** Method returns a Jackson serializable object that extends a StoragePluginConfig
* @return an extension of StoragePluginConfig
*/
public StoragePluginConfig getConfig();
@@ -80,5 +80,4 @@ public interface StoragePlugin extends SchemaFactory, AutoCloseable {
* Initialize the storage plugin. The storage plugin will not be used until this method is called.
*/
public void start() throws IOException;
-
}
http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePluginRegistry.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePluginRegistry.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePluginRegistry.java
index 7018ce8..82f18f8 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePluginRegistry.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePluginRegistry.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -51,7 +51,7 @@ public interface StoragePluginRegistry extends Iterable<Map.Entry<String, Storag
* @param name
* The name of the plugin
* @param config
- * The plugin confgiruation
+ * The plugin configuration
* @param persist
* Whether to persist the plugin for later use or treat it as ephemeral.
* @return The StoragePlugin instance.
http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePluginRegistryImpl.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePluginRegistryImpl.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePluginRegistryImpl.java
index bf4affd..3fb1c3a 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePluginRegistryImpl.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/StoragePluginRegistryImpl.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -125,9 +125,10 @@ public class StoragePluginRegistryImpl implements StoragePluginRegistry {
availablePlugins = findAvailablePlugins(classpathScan);
// create registered plugins defined in "storage-plugins.json"
- this.plugins.putAll(createPlugins());
+ plugins.putAll(createPlugins());
}
+ @SuppressWarnings("resource")
private Map<String, StoragePlugin> createPlugins() throws DrillbitStartupException {
try {
/*
@@ -145,7 +146,7 @@ public class StoragePluginRegistryImpl implements StoragePluginRegistry {
String pluginsData = Resources.toString(url, Charsets.UTF_8);
StoragePlugins plugins = lpPersistence.getMapper().readValue(pluginsData, StoragePlugins.class);
for (Map.Entry<String, StoragePluginConfig> config : plugins) {
- if (!pluginSystemTable.putIfAbsent(config.getKey(), config.getValue())) {
+ if (!definePluginConfig(config.getKey(), config.getValue())) {
logger.warn("Duplicate plugin instance '{}' defined in [{}, {}], ignoring the later one.",
config.getKey(), pluginURLMap.get(config.getKey()), url);
continue;
@@ -185,6 +186,24 @@ public class StoragePluginRegistryImpl implements StoragePluginRegistry {
}
}
+ /**
+ * Add a plugin and configuration. Assumes neither exists. Primarily
+ * for testing.
+ *
+ * @param name plugin name
+ * @param config plugin config
+ * @param plugin plugin implementation
+ */
+
+ public void definePlugin(String name, StoragePluginConfig config, StoragePlugin plugin) {
+ addPlugin(name, plugin);
+ definePluginConfig(name, config);
+ }
+
+ private boolean definePluginConfig(String name, StoragePluginConfig config) {
+ return pluginSystemTable.putIfAbsent(name, config);
+ }
+
@Override
public void addPlugin(String name, StoragePlugin plugin) {
plugins.put(name, plugin);
@@ -192,6 +211,7 @@ public class StoragePluginRegistryImpl implements StoragePluginRegistry {
@Override
public void deletePlugin(String name) {
+ @SuppressWarnings("resource")
StoragePlugin plugin = plugins.remove(name);
closePlugin(plugin);
pluginSystemTable.delete(name);
@@ -209,6 +229,7 @@ public class StoragePluginRegistryImpl implements StoragePluginRegistry {
}
}
+ @SuppressWarnings("resource")
@Override
public StoragePlugin createOrUpdate(String name, StoragePluginConfig config, boolean persist)
throws ExecutionSetupException {
@@ -299,6 +320,7 @@ public class StoragePluginRegistryImpl implements StoragePluginRegistry {
}
}
+ @SuppressWarnings("resource")
@Override
public FormatPlugin getFormatPlugin(StoragePluginConfig storageConfig, FormatPluginConfig formatConfig)
throws ExecutionSetupException {
@@ -346,6 +368,7 @@ public class StoragePluginRegistryImpl implements StoragePluginRegistry {
public class DrillSchemaFactory implements SchemaFactory {
+ @SuppressWarnings("resource")
@Override
public void registerSchemas(SchemaConfig schemaConfig, SchemaPlus parent) throws IOException {
Stopwatch watch = Stopwatch.createStarted();
http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/ColumnDef.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/ColumnDef.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/ColumnDef.java
new file mode 100644
index 0000000..cfaacdd
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/ColumnDef.java
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.mock;
+
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.expr.TypeHelper;
+import org.apache.drill.exec.store.mock.MockGroupScanPOP.MockColumn;
+
+/**
+ * Defines a column for the "enhanced" version of the mock data
+ * source. This class is built from the column definitions in either
+ * the physical plan or an SQL statement (which gives rise to a
+ * physical plan.)
+ */
+
+public class ColumnDef {
+ public MockColumn mockCol;
+ public String name;
+ public int width;
+ public FieldGen generator;
+
+ public ColumnDef(MockColumn mockCol) {
+ this.mockCol = mockCol;
+ name = mockCol.getName();
+ width = TypeHelper.getSize(mockCol.getMajorType());
+ makeGenerator();
+ }
+
+ /**
+ * Create the data generator class for this column. The generator is
+ * created to match the data type by default. Or, the plan can
+ * specify a generator class (in which case the plan must ensure that
+ * the generator produces the correct value for the column data type.)
+ * The generator names a class: either a fully qualified name, or a
+ * class in this package.
+ */
+
+ private void makeGenerator() {
+ String genName = mockCol.getGenerator();
+ if (genName != null) {
+ if (! genName.contains(".")) {
+ genName = "org.apache.drill.exec.store.mock." + genName;
+ }
+ try {
+ ClassLoader cl = getClass().getClassLoader();
+ Class<?> genClass = cl.loadClass(genName);
+ generator = (FieldGen) genClass.newInstance();
+ } catch (ClassNotFoundException | InstantiationException
+ | IllegalAccessException | ClassCastException e) {
+ throw new IllegalArgumentException("Generator " + genName + " is undefined for mock field " + name);
+ }
+ generator.setup(this);
+ return;
+ }
+
+ makeDefaultGenerator();
+ }
+
+ private void makeDefaultGenerator() {
+
+ MinorType minorType = mockCol.getMinorType();
+ switch (minorType) {
+ case BIGINT:
+ break;
+ case BIT:
+ break;
+ case DATE:
+ break;
+ case DECIMAL18:
+ break;
+ case DECIMAL28DENSE:
+ break;
+ case DECIMAL28SPARSE:
+ break;
+ case DECIMAL38DENSE:
+ break;
+ case DECIMAL38SPARSE:
+ break;
+ case DECIMAL9:
+ break;
+ case FIXED16CHAR:
+ break;
+ case FIXEDBINARY:
+ break;
+ case FIXEDCHAR:
+ break;
+ case FLOAT4:
+ break;
+ case FLOAT8:
+ generator = new DoubleGen();
+ break;
+ case GENERIC_OBJECT:
+ break;
+ case INT:
+ generator = new IntGen();
+ break;
+ case INTERVAL:
+ break;
+ case INTERVALDAY:
+ break;
+ case INTERVALYEAR:
+ break;
+ case LATE:
+ break;
+ case LIST:
+ break;
+ case MAP:
+ break;
+ case MONEY:
+ break;
+ case NULL:
+ break;
+ case SMALLINT:
+ break;
+ case TIME:
+ break;
+ case TIMESTAMP:
+ break;
+ case TIMESTAMPTZ:
+ break;
+ case TIMETZ:
+ break;
+ case TINYINT:
+ break;
+ case UINT1:
+ break;
+ case UINT2:
+ break;
+ case UINT4:
+ break;
+ case UINT8:
+ break;
+ case UNION:
+ break;
+ case VAR16CHAR:
+ break;
+ case VARBINARY:
+ break;
+ case VARCHAR:
+ generator = new StringGen();
+ break;
+ default:
+ break;
+ }
+ if (generator == null) {
+ throw new IllegalArgumentException("No default column generator for column " + name + " of type " + minorType);
+ }
+ generator.setup(this);
+ }
+
+ public ColumnDef(MockColumn mockCol, int rep) {
+ this(mockCol);
+ name += Integer.toString(rep);
+ }
+
+ public MockColumn getConfig() {
+ return mockCol;
+ }
+
+ public String getName() {
+ return name;
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/DateGen.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/DateGen.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/DateGen.java
new file mode 100644
index 0000000..f7d53ed
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/DateGen.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.mock;
+
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.Random;
+
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.VarCharVector;
+
+/**
+ * Very simple date vaue generator that produces ISO dates
+ * uniformly distributed over the last year. ISO format
+ * is: 2016-12-07.
+ * <p>
+ * There are many possible date formats; this class does not
+ * attempt to generate all of them. Drill provides a date
+ * type, but we use a string format because example cases from
+ * people using the product often read text files. Finally, we
+ * (reluctantly) use the old-style date formats instead of the
+ * new Java 8 classes because Drill prefers to build with Java 7.
+ */
+
+public class DateGen implements FieldGen {
+
+ private final int ONE_DAY = 24 * 60 * 60 * 1000;
+ private final int ONE_YEAR = ONE_DAY * 365;
+
+ private final Random rand = new Random();
+ private long baseTime;
+ private SimpleDateFormat fmt;
+
+ public DateGen() {
+ // Start a year ago.
+ baseTime = System.currentTimeMillis() - ONE_YEAR;
+ fmt = new SimpleDateFormat("yyyy-mm-DD");
+ }
+
+ @Override
+ public void setup(ColumnDef colDef) { }
+
+ private long value() {
+ return baseTime + rand.nextInt(365) * ONE_DAY;
+ }
+
+ @Override
+ public void setValue(ValueVector v, int index) {
+ VarCharVector vector = (VarCharVector) v;
+ long randTime = baseTime + value();
+ String str = fmt.format(new Date(randTime));
+ vector.getMutator().setSafe(index, str.getBytes());
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/DoubleGen.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/DoubleGen.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/DoubleGen.java
new file mode 100644
index 0000000..e28a394
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/DoubleGen.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.mock;
+
+import java.util.Random;
+
+import org.apache.drill.exec.vector.Float8Vector;
+import org.apache.drill.exec.vector.ValueVector;
+
+/**
+ * Generates random field values uniformly distributed over
+ * the range +-1 million, with any number of digits past
+ * the decimal point.
+ */
+
+public class DoubleGen implements FieldGen {
+
+ private final Random rand = new Random();
+
+ @Override
+ public void setup(ColumnDef colDef) { }
+
+ private double value() {
+ return rand.nextDouble() * 2_000_000 - 1_000_000;
+ }
+
+ @Override
+ public void setValue(ValueVector v, int index) {
+ Float8Vector vector = (Float8Vector) v;
+ vector.getMutator().set(index, value());
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/ExtendedMockRecordReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/ExtendedMockRecordReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/ExtendedMockRecordReader.java
new file mode 100644
index 0000000..f3804d4
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/ExtendedMockRecordReader.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.mock;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.drill.common.exceptions.ExecutionSetupException;
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.exec.exception.OutOfMemoryException;
+import org.apache.drill.exec.exception.SchemaChangeException;
+import org.apache.drill.exec.expr.TypeHelper;
+import org.apache.drill.exec.ops.FragmentContext;
+import org.apache.drill.exec.ops.OperatorContext;
+import org.apache.drill.exec.physical.impl.OutputMutator;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.store.AbstractRecordReader;
+import org.apache.drill.exec.store.mock.MockGroupScanPOP.MockColumn;
+import org.apache.drill.exec.store.mock.MockGroupScanPOP.MockScanEntry;
+import org.apache.drill.exec.vector.AllocationHelper;
+import org.apache.drill.exec.vector.ValueVector;
+
+/**
+ * Extended form of the mock record reader that uses generator class
+ * instances to create the mock values. This is a work in progress.
+ * Generators exist for a few simple required types. One also exists
+ * to generate strings that contain dates.
+ * <p>
+ * The definition is provided inside the sub scan used to create the
+ * {@link ScanBatch} used to create this record reader.
+ */
+
+public class ExtendedMockRecordReader extends AbstractRecordReader {
+
+ private ValueVector[] valueVectors;
+ private int batchRecordCount;
+ private int recordsRead;
+
+ private final MockScanEntry config;
+ private final FragmentContext context;
+ private final ColumnDef fields[];
+
+ public ExtendedMockRecordReader(FragmentContext context, MockScanEntry config) {
+ this.context = context;
+ this.config = config;
+
+ fields = buildColumnDefs();
+ }
+
+ private ColumnDef[] buildColumnDefs() {
+ List<ColumnDef> defs = new ArrayList<>();
+
+ // Look for duplicate names. Bad things happen when the same name
+ // appears twice. We must do this here because some tests create
+ // a physical plan directly, meaning that this is the first
+ // opportunity to review the column definitions.
+
+ Set<String> names = new HashSet<>();
+ MockColumn cols[] = config.getTypes();
+ for (int i = 0; i < cols.length; i++) {
+ MockColumn col = cols[i];
+ if (names.contains(col.name)) {
+ throw new IllegalArgumentException("Duplicate column name: " + col.name);
+ }
+ names.add(col.name);
+ int repeat = Math.min(1, col.getRepeatCount());
+ if (repeat == 1) {
+ defs.add(new ColumnDef(col));
+ } else {
+ for (int j = 0; j < repeat; j++) {
+ defs.add(new ColumnDef(col, j+1));
+ }
+ }
+ }
+ ColumnDef[] defArray = new ColumnDef[defs.size()];
+ defs.toArray(defArray);
+ return defArray;
+ }
+
+ private int getEstimatedRecordSize(MockColumn[] types) {
+ int size = 0;
+ for (int i = 0; i < fields.length; i++) {
+ size += TypeHelper.getSize(fields[i].getConfig().getMajorType());
+ }
+ return size;
+ }
+
+ @Override
+ public void setup(OperatorContext context, OutputMutator output) throws ExecutionSetupException {
+ try {
+ final int estimateRowSize = getEstimatedRecordSize(config.getTypes());
+ valueVectors = new ValueVector[config.getTypes().length];
+ batchRecordCount = 250000 / estimateRowSize;
+
+ for (int i = 0; i < fields.length; i++) {
+ final ColumnDef col = fields[i];
+ final MajorType type = col.getConfig().getMajorType();
+ final MaterializedField field = MaterializedField.create(col.getName(), type);
+ final Class<? extends ValueVector> vvClass = TypeHelper.getValueVectorClass(field.getType().getMinorType(), field.getDataMode());
+ valueVectors[i] = output.addField(field, vvClass);
+ }
+ } catch (SchemaChangeException e) {
+ throw new ExecutionSetupException("Failure while setting up fields", e);
+ }
+ }
+
+ @Override
+ public int next() {
+ if (recordsRead >= this.config.getRecords()) {
+ return 0;
+ }
+
+ final int recordSetSize = Math.min(batchRecordCount, this.config.getRecords() - recordsRead);
+ recordsRead += recordSetSize;
+ for (int i = 0; i < recordSetSize; i++) {
+ int j = 0;
+ for (final ValueVector v : valueVectors) {
+ fields[j++].generator.setValue(v, i);
+ }
+ }
+
+ return recordSetSize;
+ }
+
+ @Override
+ public void allocate(Map<String, ValueVector> vectorMap) throws OutOfMemoryException {
+ try {
+ for (final ValueVector v : vectorMap.values()) {
+ AllocationHelper.allocate(v, Character.MAX_VALUE, 50, 10);
+ }
+ } catch (NullPointerException e) {
+ throw new OutOfMemoryException();
+ }
+ }
+
+ @Override
+ public void close() { }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/FieldGen.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/FieldGen.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/FieldGen.java
new file mode 100644
index 0000000..b51077f
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/FieldGen.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.mock;
+
+import org.apache.drill.exec.vector.ValueVector;
+
+/**
+ * Interface which all mock column data generators must
+ * implement. Each has a {@link #setup} method which is given
+ * the column definition for the column. This definition may have
+ * additional configuration information for the column (column width,
+ * precision, etc.). Each also has a {@link #setValue} method that
+ * does the work of setting a specific value vector position to the
+ * generated value. The implementation is expected to cast the
+ * vector to the type supported by that particular generator.
+ * (This is test code; we're not overly concerned with the overhead
+ * of such casts.)
+ */
+public interface FieldGen {
+ void setup(ColumnDef colDef);
+ void setValue(ValueVector v, int index);
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/IntGen.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/IntGen.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/IntGen.java
new file mode 100644
index 0000000..be00541
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/IntGen.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.mock;
+
+import java.util.Random;
+
+import org.apache.drill.exec.vector.IntVector;
+import org.apache.drill.exec.vector.ValueVector;
+
+/**
+ * Generates integer values uniformly randomly distributed over
+ * the entire 32-bit integer range from
+ * {@link Integer.MIN_VALUE} to {@link Integer.MAX_VALUE}.
+ */
+
+public class IntGen implements FieldGen {
+
+ private final Random rand = new Random();
+
+ @Override
+ public void setup(ColumnDef colDef) { }
+
+ private int value() {
+ return rand.nextInt();
+ }
+
+ @Override
+ public void setValue(ValueVector v, int index) {
+ IntVector vector = (IntVector) v;
+ vector.getMutator().set(index, value());
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockGroupScanPOP.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockGroupScanPOP.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockGroupScanPOP.java
index bb71c31..2e8af42 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockGroupScanPOP.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockGroupScanPOP.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -17,17 +17,17 @@
*/
package org.apache.drill.exec.store.mock;
+import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.common.types.TypeProtos.DataMode;
import org.apache.drill.common.types.TypeProtos.MajorType;
import org.apache.drill.common.types.TypeProtos.MinorType;
-import org.apache.drill.exec.expr.TypeHelper;
-import org.apache.drill.exec.physical.EndpointAffinity;
import org.apache.drill.exec.physical.base.AbstractGroupScan;
import org.apache.drill.exec.physical.base.GroupScan;
import org.apache.drill.exec.physical.base.PhysicalOperator;
@@ -43,21 +43,50 @@ import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonTypeName;
import com.google.common.base.Preconditions;
+/**
+ * Describes a "group" scan of a (logical) mock table. The mock table has a
+ * schema described by the {@link MockScanEntry}. Class. To simulate a scan that
+ * can be parallelized, this group scan can contain a list of
+ * {@link MockScanEntry}, each of which simulates a separate file on disk, or
+ * block within a file. Each will give rise to a separate minor fragment
+ * (assuming sufficient parallelization.)
+ */
+
@JsonTypeName("mock-scan")
public class MockGroupScanPOP extends AbstractGroupScan {
- static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(MockGroupScanPOP.class);
+ static final org.slf4j.Logger logger = org.slf4j.LoggerFactory
+ .getLogger(MockGroupScanPOP.class);
+ /**
+ * URL for the scan. Unused. Appears to be a vestige of an earlier design that
+ * required them.
+ */
private final String url;
+
+ /**
+ * The set of simulated files to scan.
+ */
protected final List<MockScanEntry> readEntries;
- private LinkedList<MockScanEntry>[] mappings;
+ private LinkedList<MockScanEntry>[] mappings;
+
+ /**
+ * Whether this group scan uses a newer "extended" schema definition, or the
+ * original (non-extended) definition.
+ */
+
+ private boolean extended;
@JsonCreator
- public MockGroupScanPOP(@JsonProperty("url") String url, @JsonProperty("entries") List<MockScanEntry> readEntries) {
- super((String)null);
+ public MockGroupScanPOP(@JsonProperty("url") String url,
+ @JsonProperty("extended") Boolean extended,
+ @JsonProperty("entries") List<MockScanEntry> readEntries) {
+ super((String) null);
this.readEntries = readEntries;
this.url = url;
+ this.extended = extended == null ? false : extended;
}
+ @Override
public ScanStats getScanStats() {
return ScanStats.TRIVIAL_TABLE;
}
@@ -71,22 +100,22 @@ public class MockGroupScanPOP extends AbstractGroupScan {
return readEntries;
}
- public static class MockScanEntry{
+ /**
+ * Describes one simulated file (or block) within the logical file scan
+ * described by this group scan. Each block can have a distinct schema to test
+ * for schema changes.
+ */
+
+ public static class MockScanEntry {
private final int records;
private final MockColumn[] types;
- private final int recordSize;
-
@JsonCreator
- public MockScanEntry(@JsonProperty("records") int records, @JsonProperty("types") MockColumn[] types) {
+ public MockScanEntry(@JsonProperty("records") int records,
+ @JsonProperty("types") MockColumn[] types) {
this.records = records;
this.types = types;
- int size = 0;
- for (MockColumn dt : types) {
- size += TypeHelper.getSize(dt.getMajorType());
- }
- this.recordSize = size;
}
public int getRecords() {
@@ -99,50 +128,110 @@ public class MockGroupScanPOP extends AbstractGroupScan {
@Override
public String toString() {
- return "MockScanEntry [records=" + records + ", columns=" + Arrays.toString(types) + "]";
+ return "MockScanEntry [records=" + records + ", columns="
+ + Arrays.toString(types) + "]";
}
}
+ /**
+ * Meta-data description of the columns we wish to create during a simulated
+ * scan.
+ */
+
@JsonInclude(Include.NON_NULL)
- public static class MockColumn{
- @JsonProperty("type") public MinorType minorType;
+ public static class MockColumn {
+
+ /**
+ * Column type given as a Drill minor type (that is, a type without the
+ * extra information such as cardinality, width, etc.
+ */
+
+ @JsonProperty("type")
+ public MinorType minorType;
public String name;
public DataMode mode;
public Integer width;
public Integer precision;
public Integer scale;
+ /**
+ * The scan can request to use a specific data generator class. The name of
+ * that class appears here. The name can be a simple class name, if that
+ * class resides in this Java package. Or, it can be a fully qualified name
+ * of a class that resides elsewhere. If null, the default generator for the
+ * data type is used.
+ */
+
+ public String generator;
+
+ /**
+ * Some tests want to create a very wide row with many columns. This field
+ * eases that task: specify a value other than 1 and the data source will
+ * generate that many copies of the column, each with separately generated
+ * random values. For example, to create 20 copies of field, "foo", set
+ * repeat to 20 and the actual generated batches will contain fields
+ * foo1, foo2, ... foo20.
+ */
+
+ public Integer repeat;
@JsonCreator
- public MockColumn(@JsonProperty("name") String name, @JsonProperty("type") MinorType minorType, @JsonProperty("mode") DataMode mode, @JsonProperty("width") Integer width, @JsonProperty("precision") Integer precision, @JsonProperty("scale") Integer scale) {
+ public MockColumn(@JsonProperty("name") String name,
+ @JsonProperty("type") MinorType minorType,
+ @JsonProperty("mode") DataMode mode,
+ @JsonProperty("width") Integer width,
+ @JsonProperty("precision") Integer precision,
+ @JsonProperty("scale") Integer scale,
+ @JsonProperty("generator") String generator,
+ @JsonProperty("repeat") Integer repeat) {
this.name = name;
this.minorType = minorType;
this.mode = mode;
this.width = width;
this.precision = precision;
this.scale = scale;
+ this.generator = generator;
+ this.repeat = repeat;
}
@JsonProperty("type")
public MinorType getMinorType() {
return minorType;
}
+
public String getName() {
return name;
}
+
public DataMode getMode() {
return mode;
}
+
public Integer getWidth() {
return width;
}
+
public Integer getPrecision() {
return precision;
}
+
public Integer getScale() {
return scale;
}
+ public String getGenerator() {
+ return generator;
+ }
+
+ public Integer getRepeat() {
+ return repeat;
+ }
+
+ @JsonIgnore
+ public int getRepeatCount() {
+ return repeat == null ? 1 : repeat;
+ }
+
@JsonIgnore
public MajorType getMajorType() {
MajorType.Builder b = MajorType.newBuilder();
@@ -162,9 +251,9 @@ public class MockGroupScanPOP extends AbstractGroupScan {
@Override
public String toString() {
- return "MockColumn [minorType=" + minorType + ", name=" + name + ", mode=" + mode + "]";
+ return "MockColumn [minorType=" + minorType + ", name=" + name + ", mode="
+ + mode + "]";
}
-
}
@SuppressWarnings("unchecked")
@@ -174,7 +263,7 @@ public class MockGroupScanPOP extends AbstractGroupScan {
mappings = new LinkedList[endpoints.size()];
- int i =0;
+ int i = 0;
for (MockScanEntry e : this.getReadEntries()) {
if (i == endpoints.size()) {
i -= endpoints.size();
@@ -191,8 +280,10 @@ public class MockGroupScanPOP extends AbstractGroupScan {
@Override
public SubScan getSpecificScan(int minorFragmentId) {
- assert minorFragmentId < mappings.length : String.format("Mappings length [%d] should be longer than minor fragment id [%d] but it isn't.", mappings.length, minorFragmentId);
- return new MockSubScanPOP(url, mappings[minorFragmentId]);
+ assert minorFragmentId < mappings.length : String.format(
+ "Mappings length [%d] should be longer than minor fragment id [%d] but it isn't.",
+ mappings.length, minorFragmentId);
+ return new MockSubScanPOP(url, extended, mappings[minorFragmentId]);
}
@Override
@@ -204,13 +295,60 @@ public class MockGroupScanPOP extends AbstractGroupScan {
@JsonIgnore
public PhysicalOperator getNewWithChildren(List<PhysicalOperator> children) {
Preconditions.checkArgument(children.isEmpty());
- return new MockGroupScanPOP(url, readEntries);
-
+ return new MockGroupScanPOP(url, extended, readEntries);
}
@Override
public GroupScan clone(List<SchemaPath> columns) {
- return this;
+ if (columns.isEmpty()) {
+ throw new IllegalArgumentException("No columns for mock scan");
+ }
+ List<MockColumn> mockCols = new ArrayList<>();
+ Pattern p = Pattern.compile("(\\w+)_([isd])(\\d*)");
+ for (SchemaPath path : columns) {
+ String col = path.getLastSegment().getNameSegment().getPath();
+ if (col.equals("*")) {
+ return this;
+ }
+ Matcher m = p.matcher(col);
+ if (!m.matches()) {
+ throw new IllegalArgumentException(
+ "Badly formatted mock column name: " + col);
+ }
+ @SuppressWarnings("unused")
+ String name = m.group(1);
+ String type = m.group(2);
+ String length = m.group(3);
+ int width = 10;
+ if (!length.isEmpty()) {
+ width = Integer.parseInt(length);
+ }
+ MinorType minorType;
+ switch (type) {
+ case "i":
+ minorType = MinorType.INT;
+ break;
+ case "s":
+ minorType = MinorType.VARCHAR;
+ break;
+ case "d":
+ minorType = MinorType.FLOAT8;
+ break;
+ default:
+ throw new IllegalArgumentException(
+ "Unsupported field type " + type + " for mock column " + col);
+ }
+ MockColumn mockCol = new MockColumn(col, minorType, DataMode.REQUIRED,
+ width, 0, 0, null, 1);
+ mockCols.add(mockCol);
+ }
+ MockScanEntry entry = readEntries.get(0);
+ MockColumn types[] = new MockColumn[mockCols.size()];
+ mockCols.toArray(types);
+ MockScanEntry newEntry = new MockScanEntry(entry.records, types);
+ List<MockScanEntry> newEntries = new ArrayList<>();
+ newEntries.add(newEntry);
+ return new MockGroupScanPOP(url, true, newEntries);
}
@Override
@@ -220,8 +358,13 @@ public class MockGroupScanPOP extends AbstractGroupScan {
@Override
public String toString() {
- return "MockGroupScanPOP [url=" + url
- + ", readEntries=" + readEntries + "]";
+ return "MockGroupScanPOP [url=" + url + ", readEntries=" + readEntries
+ + "]";
}
+ @Override
+ @JsonIgnore
+ public boolean canPushdownProjects(List<SchemaPath> columns) {
+ return true;
+ }
}
http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockRecordReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockRecordReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockRecordReader.java
index ed3decb..6f8cb39 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockRecordReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockRecordReader.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -20,7 +20,6 @@ package org.apache.drill.exec.store.mock;
import java.util.Map;
import org.apache.drill.common.exceptions.ExecutionSetupException;
-import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.common.types.TypeProtos.MajorType;
import org.apache.drill.exec.exception.OutOfMemoryException;
import org.apache.drill.exec.exception.SchemaChangeException;
@@ -43,9 +42,9 @@ public class MockRecordReader extends AbstractRecordReader {
private ValueVector[] valueVectors;
private int recordsRead;
private int batchRecordCount;
+ @SuppressWarnings("unused")
private OperatorContext operatorContext;
-
public MockRecordReader(FragmentContext context, MockScanEntry config) {
this.context = context;
this.config = config;
@@ -111,6 +110,5 @@ public class MockRecordReader extends AbstractRecordReader {
}
@Override
- public void close() {
- }
+ public void close() { }
}
http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockScanBatchCreator.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockScanBatchCreator.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockScanBatchCreator.java
index 6cdbc3c..9cdb7ad 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockScanBatchCreator.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockScanBatchCreator.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -40,7 +40,11 @@ public class MockScanBatchCreator implements BatchCreator<MockSubScanPOP> {
final List<MockScanEntry> entries = config.getReadEntries();
final List<RecordReader> readers = Lists.newArrayList();
for(final MockScanEntry e : entries) {
- readers.add(new MockRecordReader(context, e));
+ if ( config.isExtended( ) ) {
+ readers.add(new ExtendedMockRecordReader(context, e));
+ } else {
+ readers.add(new MockRecordReader(context, e));
+ }
}
return new ScanBatch(config, context, readers.iterator());
}
http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorageEngine.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorageEngine.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorageEngine.java
index d68fd52..df8ee50 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorageEngine.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorageEngine.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -19,44 +19,64 @@ package org.apache.drill.exec.store.mock;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.HashSet;
import java.util.List;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import org.apache.calcite.schema.SchemaPlus;
-
+import org.apache.calcite.schema.Table;
import org.apache.drill.common.JSONOptions;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.common.logical.StoragePluginConfig;
import org.apache.drill.exec.physical.base.AbstractGroupScan;
+import org.apache.drill.exec.planner.logical.DynamicDrillTable;
import org.apache.drill.exec.server.DrillbitContext;
+import org.apache.drill.exec.store.AbstractSchema;
import org.apache.drill.exec.store.AbstractStoragePlugin;
import org.apache.drill.exec.store.SchemaConfig;
import org.apache.drill.exec.store.mock.MockGroupScanPOP.MockScanEntry;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.collect.ImmutableList;
public class MockStorageEngine extends AbstractStoragePlugin {
static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(MockStorageEngine.class);
private final MockStorageEngineConfig configuration;
+ private final MockSchema schema;
public MockStorageEngine(MockStorageEngineConfig configuration, DrillbitContext context, String name) {
this.configuration = configuration;
+ this.schema = new MockSchema(this);
}
@Override
public AbstractGroupScan getPhysicalScan(String userName, JSONOptions selection, List<SchemaPath> columns)
throws IOException {
- ArrayList<MockScanEntry> readEntries = selection.getListWith(new ObjectMapper(),
+ List<MockScanEntry> readEntries = selection.getListWith(new ObjectMapper(),
new TypeReference<ArrayList<MockScanEntry>>() {
});
- return new MockGroupScanPOP(null, readEntries);
+ // The classic (logical-plan based) and extended (SQL-based) paths
+ // come through here. If this is a SQL query, then no columns are
+ // defined in the plan.
+
+ assert ! readEntries.isEmpty();
+ boolean extended = readEntries.size() == 1;
+ if (extended) {
+ MockScanEntry entry = readEntries.get(0);
+ extended = entry.getTypes() == null;
+ }
+ return new MockGroupScanPOP(null, extended, readEntries);
}
@Override
public void registerSchemas(SchemaConfig schemaConfig, SchemaPlus parent) throws IOException {
+ parent.add(schema.getName(), schema);
}
@Override
@@ -64,5 +84,56 @@ public class MockStorageEngine extends AbstractStoragePlugin {
return configuration;
}
+ @Override
+ public boolean supportsRead() {
+ return true;
+ }
+
+// public static class ImplicitTable extends DynamicDrillTable {
+//
+// public ImplicitTable(StoragePlugin plugin, String storageEngineName,
+// Object selection) {
+// super(plugin, storageEngineName, selection);
+// }
+//
+// }
+
+ private static class MockSchema extends AbstractSchema {
+ private MockStorageEngine engine;
+
+ public MockSchema(MockStorageEngine engine) {
+ super(ImmutableList.<String>of(), MockStorageEngineConfig.NAME);
+ this.engine = engine;
+ }
+
+ @Override
+ public Table getTable(String name) {
+ Pattern p = Pattern.compile("(\\w+)_(\\d+)(k|m)?", Pattern.CASE_INSENSITIVE);
+ Matcher m = p.matcher(name);
+ if (! m.matches()) {
+ return null;
+ }
+ @SuppressWarnings("unused")
+ String baseName = m.group(1);
+ int n = Integer.parseInt(m.group(2));
+ String unit = m.group(3);
+ if (unit.equalsIgnoreCase("K")) { n *= 1000; }
+ else if (unit.equalsIgnoreCase("M")) { n *= 1_000_000; }
+ MockScanEntry entry = new MockScanEntry(n, null);
+ List<MockScanEntry> list = new ArrayList<>();
+ list.add(entry);
+ return new DynamicDrillTable(engine, this.name, list);
+ }
+
+ @Override
+ public Set<String> getTableNames() {
+ return new HashSet<>();
+ }
+
+ @Override
+ public String getTypeName() {
+ return MockStorageEngineConfig.NAME;
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorageEngineConfig.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorageEngineConfig.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorageEngineConfig.java
index 2f7ea18..f20ff45 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorageEngineConfig.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorageEngineConfig.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -24,13 +24,12 @@ import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonTypeName;
@JsonTypeName(MockStorageEngineConfig.NAME)
-public class MockStorageEngineConfig extends StoragePluginConfigBase{
-
- static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(MockStorageEngineConfig.class);
+public class MockStorageEngineConfig extends StoragePluginConfigBase {
private String url;
public static final String NAME = "mock";
+ public static final MockStorageEngineConfig INSTANCE = new MockStorageEngineConfig("mock:///");
@JsonCreator
public MockStorageEngineConfig(@JsonProperty("url") String url) {
@@ -41,7 +40,6 @@ public class MockStorageEngineConfig extends StoragePluginConfigBase{
return url;
}
-
@Override
public boolean equals(Object o) {
if (this == o) {
@@ -64,5 +62,4 @@ public class MockStorageEngineConfig extends StoragePluginConfigBase{
public int hashCode() {
return url != null ? url.hashCode() : 0;
}
-
}
http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorePOP.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorePOP.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorePOP.java
index 4c12d57..9fee5c7 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorePOP.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorePOP.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -39,6 +39,7 @@ public class MockStorePOP extends AbstractStore {
super(child);
}
+ @Override
public int getMaxWidth() {
return 1;
}
http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockSubScanPOP.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockSubScanPOP.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockSubScanPOP.java
index 705452d..f169f51 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockSubScanPOP.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockSubScanPOP.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -18,7 +18,6 @@
package org.apache.drill.exec.store.mock;
import java.util.Iterator;
-import java.util.LinkedList;
import java.util.List;
import org.apache.drill.exec.physical.base.AbstractBase;
@@ -34,18 +33,42 @@ import com.fasterxml.jackson.annotation.JsonTypeName;
import com.google.common.base.Preconditions;
import com.google.common.collect.Iterators;
+/**
+ * Describes a physical scan operation for the mock data source. Each operator
+ * can, in general, give rise to one or more actual scans. For the mock data
+ * source, each sub-scan does exactly one (simulated) scan.
+ */
+
@JsonTypeName("mock-sub-scan")
public class MockSubScanPOP extends AbstractBase implements SubScan {
static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(MockGroupScanPOP.class);
private final String url;
protected final List<MockGroupScanPOP.MockScanEntry> readEntries;
-// private final OperatorCost cost;
-// private final Size size;
- private LinkedList<MockGroupScanPOP.MockScanEntry>[] mappings;
+ private final boolean extended;
+
+ /**
+ * This constructor is called from Jackson and is designed to support both
+ * older physical plans and the newer ("extended") plans. Jackson will fill
+ * in a null value for the <tt>extended</tt> field for older plans; we use
+ * that null value to know that the plan is old, thus not extended. Newer
+ * plans simply provide the value.
+ *
+ * @param url
+ * not used for the mock plan, appears to be a vestige of creating
+ * this from a file-based plugin. Must keep it because older physical
+ * plans contained a dummy URL value.
+ * @param extended
+ * see above
+ * @param readEntries
+ * a description of the columns to generate in a Jackson-serialized
+ * form unique to the mock data source plugin.
+ */
@JsonCreator
- public MockSubScanPOP(@JsonProperty("url") String url, @JsonProperty("entries") List<MockGroupScanPOP.MockScanEntry> readEntries) {
+ public MockSubScanPOP(@JsonProperty("url") String url,
+ @JsonProperty("extended") Boolean extended,
+ @JsonProperty("entries") List<MockGroupScanPOP.MockScanEntry> readEntries) {
this.readEntries = readEntries;
// OperatorCost cost = new OperatorCost(0,0,0,0);
// Size size = new Size(0,0);
@@ -56,11 +79,11 @@ public class MockSubScanPOP extends AbstractBase implements SubScan {
// this.cost = cost;
// this.size = size;
this.url = url;
+ this.extended = extended == null ? false : extended;
}
- public String getUrl() {
- return url;
- }
+ public String getUrl() { return url; }
+ public boolean isExtended() { return extended; }
@JsonProperty("entries")
public List<MockGroupScanPOP.MockScanEntry> getReadEntries() {
@@ -88,7 +111,7 @@ public class MockSubScanPOP extends AbstractBase implements SubScan {
@JsonIgnore
public PhysicalOperator getNewWithChildren(List<PhysicalOperator> children) {
Preconditions.checkArgument(children.isEmpty());
- return new MockSubScanPOP(url, readEntries);
+ return new MockSubScanPOP(url, extended, readEntries);
}
@@ -96,5 +119,4 @@ public class MockSubScanPOP extends AbstractBase implements SubScan {
public int getOperatorType() {
return CoreOperatorType.MOCK_SUB_SCAN_VALUE;
}
-
}
http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MoneyGen.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MoneyGen.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MoneyGen.java
new file mode 100644
index 0000000..d4e2379
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MoneyGen.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.mock;
+
+import java.util.Random;
+
+import org.apache.drill.exec.vector.Float8Vector;
+import org.apache.drill.exec.vector.ValueVector;
+
+/**
+ * Generates a mock money field as a double over the range 0
+ * to 1 million. Values include cents. That is the value
+ * ranges uniformly over the range 0.00 to
+ * 999,999.99.
+ */
+
+public class MoneyGen implements FieldGen {
+
+ private final Random rand = new Random();
+
+ @Override
+ public void setup(ColumnDef colDef) { }
+
+ private double value() {
+ return Math.ceil(rand.nextDouble() * 1_000_000 * 100) / 100;
+ }
+
+ @Override
+ public void setValue(ValueVector v, int index) {
+ Float8Vector vector = (Float8Vector) v;
+ vector.getMutator().set(index, value());
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/StringGen.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/StringGen.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/StringGen.java
new file mode 100644
index 0000000..72be10f
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/StringGen.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.mock;
+
+import java.util.Random;
+
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.VarCharVector;
+
+/**
+ * Generates a mock string field of the given length. Fields are composed
+ * of upper case letters uniformly distributed from A to Z, and repeated
+ * or the length of the field. Exampled for a 4-character field:
+ * DDDD, MMMM, AAAA, RRRR, ...
+ */
+
+public class StringGen implements FieldGen {
+
+ private final Random rand = new Random();
+ private int length;
+
+ @Override
+ public void setup(ColumnDef colDef) {
+ length = colDef.width;
+ }
+
+ private String value() {
+ String c = Character.toString((char) (rand.nextInt(26) + 'A'));
+ StringBuilder buf = new StringBuilder();
+ for (int i = 0; i < length; i++) {
+ buf.append(c);
+ }
+ return buf.toString();
+ }
+
+ @Override
+ public void setValue(ValueVector v, int index) {
+ VarCharVector vector = (VarCharVector) v;
+ vector.getMutator().setSafe(index, value().getBytes());
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/package-info.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/package-info.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/package-info.java
new file mode 100644
index 0000000..e99cfc5
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/package-info.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * Defines a mock data source which generates dummy test data for use
+ * in testing. The data source operates in two modes:
+ * <ul>
+ * <li><b>Classic:</b> used in physical plans in many unit tests.
+ * The plan specifies a set of columns; data is generated by the
+ * vectors themselves based on two alternating values.</li>
+ * <li><b>Enhanced:</b> available for use in newer unit tests.
+ * Enhances the physical plan description to allow specifying a data
+ * generator class (for various types, data formats, etc.) Also
+ * provides a data storage engine framework to allow using mock
+ * tables in SQL queries.</li>
+ * </ul>
+ * <h3>Classic Mode</h3>
+ * Create a scan operator that looks like the following (from
+ * <tt></tt>):
+ * <pre><code>
+ * graph:[
+ * {
+ * {@literal @}id:1,
+ * pop:"mock-scan",
+ * url: "http://apache.org",
+ * entries:[
+ * {records: 1000000, types: [
+ * {name: "blue", type: "INT", mode: "REQUIRED"},
+ * {name: "green", type: "INT", mode: "REQUIRED"}
+ * ]}
+ * ]
+ * }, ...
+ * </code></pre>
+ * Here:
+ * <ul>
+ * <li>The <tt>pop</tt> must be <tt>mock-scan</tt>.</li>
+ * <li>The <tt>url</tt> is unused.</li>
+ * <li>The <tt>entries</tt> section can have one or more entries. If
+ * more than one entry, the storage engine will enable parallel scans
+ * up to the number of entries, as though each entry was a different
+ * file or group.</li>
+ * <li>The entry <tt>name</tt> is arbitrary, though color names seem
+ * to be the traditional names used in Drill tests.</li>
+ * <li>The <tt>type</tt> is one of the supported Drill
+ * {@link MinorType} names.</li>
+ * <li>The <tt>mode</tt> is one of the supported Drill
+ * {@link DataMode} names: usually <tt>OPTIONAL</tt> or <tt>REQUIRED</tt>.</li>
+ * </ul>
+ *
+ * <h3>Enhanced Mode</h3>
+ * Enhanced builds on the Classic mode to add additional capabilities.
+ * Enhanced mode can be used either in a physical plan or in SQL. Data
+ * is randomly generated over a wide range of values and can be
+ * controlled by custom generator classes. When
+ * in a physical plan, the <tt>records</tt> section has additional
+ * attributes as described in {@link MockGroupScanPOP.MockColumn}:
+ * <ul>
+ * <li>The <tt>generator</tt> lets you specify a class to generate the
+ * sample data. Rules for the class name is that it can either contain
+ * a full package path, or just a class name. If just a class name, the
+ * class is assumed to reside in this package. For example, to generate
+ * an ISO date into a string, use <tt>DateGen</tt>. Additional generators
+ * can (and should) be added as the need arises.</li>
+ * <li>The <tt>repeat</tt> attribute lets you create a very wide row by
+ * repeating a column the specified number of times. Actual column names
+ * have a numeric suffix. For example, if the base name is "blue" and
+ * is repeated twice, actual columns are "blue1" and "blue2".</li>
+ * </ul>
+ * When used in SQL, use the <tt>mock</tt> name space as follows:
+ * <pre><code>
+ * SELECT id_i, name_s50 FROM `mock`.`employee_500`;
+ * </code></pre>
+ * Both the column names and table names encode information that specifies
+ * what data to generate.
+ * <p>
+ * Columns are of the form <tt><i>name</i>_<i>type</i><i>length</i>?</tt>.
+ * <ul>
+ * <li>The name is anything you want ("id" and "name" in the example.)</li>
+ * <li>The underscore is required to separate the type from the name.</li>
+ * <li>The type is one of "i" (integer), "d" (double) or "s" (string).
+ * Other types can be added as needed: n (decimal number), l (long), etc.</li>
+ * <li>The length is optional and is used only for string (<tt>VARCHAR</tt>)
+ * columns. The default string length is 10.</li>
+ * <li>Columns do not yet support nulls. When they do, the encoding will
+ * be "_n<i>percent</i>" where the percent specifies the percent of rows
+ * that should contain null values in this column.<l/i>
+ * <li>The column is known to SQL as its full name, that is "id_i" or
+ * "name_s50".</li>
+ * </ul>
+ * <p>
+ * Tables are of the form <tt><i>name</i>_<i>rows</i><i>unit<i>?</tt> where:
+ * <ul>
+ * <li>The name is anything you want. ("employee" in the example.)</li>
+ * <li>The underscore is required to separate the row count from the name.</li>
+ * <li>The row count specifies the number of rows to return.</li>
+ * <li>The count unit can be none, K (multiply count by 1000) or M
+ * (multiply row count by one million), case insensitive.</li>
+ * <li>Another field (not yet implemented) might specify the split count.</li>
+ * </ul>
+ * <h3>Data Generators</h3>
+ * The classic mode uses data generators built into each vector to generate
+ * the sample data. These generators use a very simple black/white alternating
+ * series of two values. Simple, but limited. The enhanced mode allows custom
+ * data generators. Unfortunately, this requires a separate generator class for
+ * each data type. As a result, we presently support just a few key data types.
+ * On the other hand, the custom generators do allow tests to specify a custom
+ * generator class to generate the kind of data needed for that test.
+ * <p>
+ * All data generators implement the {@link FieldGen} interface, and must have
+ * a non-argument constructor to allow dynamic instantiation. The mock data
+ * source either picks a default generator (if no <tt>generator</tt> is provided)
+ * or uses the custom generator specified in <tt>generator<tt>. Generators
+ * are independent (though one could, perhaps, write generators that correlate
+ * field values.)
+ */
+package org.apache.drill.exec.store.mock;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/test/java/org/apache/drill/exec/TestOpSerialization.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/TestOpSerialization.java b/exec/java-exec/src/test/java/org/apache/drill/exec/TestOpSerialization.java
index f4fe2da..7237183 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/TestOpSerialization.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/TestOpSerialization.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -47,7 +47,7 @@ public class TestOpSerialization {
public void testSerializedDeserialize() throws Throwable {
DrillConfig c = DrillConfig.create();
PhysicalPlanReader reader = PhysicalPlanReaderTestFactory.defaultPhysicalPlanReader(c);
- MockSubScanPOP s = new MockSubScanPOP("abc", null);
+ MockSubScanPOP s = new MockSubScanPOP("abc", false, null);
s.setOperatorId(3);
Filter f = new Filter(s, new ValueExpressions.BooleanExpression("true", ExpressionPosition.UNKNOWN), 0.1f);
f.setOperatorId(2);
http://git-wip-us.apache.org/repos/asf/drill/blob/535623bb/exec/java-exec/src/test/java/org/apache/drill/exec/fn/interp/ExpressionInterpreterTest.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/fn/interp/ExpressionInterpreterTest.java b/exec/java-exec/src/test/java/org/apache/drill/exec/fn/interp/ExpressionInterpreterTest.java
index 722d45e..e191d35 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/fn/interp/ExpressionInterpreterTest.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/fn/interp/ExpressionInterpreterTest.java
@@ -1,4 +1,4 @@
-/*******************************************************************************
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -14,7 +14,7 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- ******************************************************************************/
+ */
package org.apache.drill.exec.fn.interp;
import static org.junit.Assert.assertEquals;
@@ -22,15 +22,10 @@ import static org.junit.Assert.assertEquals;
import java.nio.ByteBuffer;
import java.util.List;
-import org.antlr.runtime.ANTLRStringStream;
-import org.antlr.runtime.CommonTokenStream;
-import org.antlr.runtime.RecognitionException;
import org.apache.drill.common.exceptions.DrillRuntimeException;
import org.apache.drill.common.expression.ErrorCollector;
import org.apache.drill.common.expression.ErrorCollectorImpl;
import org.apache.drill.common.expression.LogicalExpression;
-import org.apache.drill.common.expression.parser.ExprLexer;
-import org.apache.drill.common.expression.parser.ExprParser;
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.common.types.Types;
import org.apache.drill.common.util.DrillStringUtils;
@@ -154,7 +149,9 @@ public class ExpressionInterpreterTest extends PopUnitTestBase {
}
protected void doTest(String expressionStr, String[] colNames, TypeProtos.MajorType[] colTypes, String[] expectFirstTwoValues, BitControl.PlanFragment planFragment) throws Exception {
+ @SuppressWarnings("resource")
final RemoteServiceSet serviceSet = RemoteServiceSet.getLocalServiceSet();
+ @SuppressWarnings("resource")
final Drillbit bit1 = new Drillbit(CONFIG, serviceSet);
bit1.run();
@@ -165,16 +162,18 @@ public class ExpressionInterpreterTest extends PopUnitTestBase {
final MockGroupScanPOP.MockColumn[] columns = new MockGroupScanPOP.MockColumn[colNames.length];
for (int i = 0; i < colNames.length; i++ ) {
- columns[i] = new MockGroupScanPOP.MockColumn(colNames[i], colTypes[i].getMinorType(), colTypes[i].getMode(),0,0,0);
+ columns[i] = new MockGroupScanPOP.MockColumn(colNames[i], colTypes[i].getMinorType(), colTypes[i].getMode(), 0, 0, 0, null, null);
}
final MockGroupScanPOP.MockScanEntry entry = new MockGroupScanPOP.MockScanEntry(10, columns);
- final MockSubScanPOP scanPOP = new MockSubScanPOP("testTable", java.util.Collections.singletonList(entry));
+ final MockSubScanPOP scanPOP = new MockSubScanPOP("testTable", false, java.util.Collections.singletonList(entry));
+ @SuppressWarnings("resource")
final ScanBatch batch = createMockScanBatch(bit1, scanPOP, planFragment);
batch.next();
+ @SuppressWarnings("resource")
final ValueVector vv = evalExprWithInterpreter(expressionStr, batch, bit1);
// Verify the first 2 values in the output of evaluation.
@@ -190,6 +189,7 @@ public class ExpressionInterpreterTest extends PopUnitTestBase {
bit1.close();
}
+ @SuppressWarnings("resource")
private ScanBatch createMockScanBatch(Drillbit bit, MockSubScanPOP scanPOP, BitControl.PlanFragment planFragment) {
final List<RecordBatch> children = Lists.newArrayList();
final MockScanBatchCreator creator = new MockScanBatchCreator();
[2/4] drill git commit: DRILL-5105: comment out unecessary recursive
buffer size check
Posted by pa...@apache.org.
DRILL-5105: comment out unecessary recursive buffer size check
This closes #715
Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/77e50100
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/77e50100
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/77e50100
Branch: refs/heads/master
Commit: 77e501005bbfe1d5736feab78d8d6e2f495eb8a3
Parents: eef3b3f
Author: chunhui-shi <cs...@maprtech.com>
Authored: Tue Jan 3 17:39:49 2017 -0800
Committer: Parth Chandra <pa...@apache.org>
Committed: Fri Jan 13 17:44:40 2017 -0800
----------------------------------------------------------------------
.../java/org/apache/drill/exec/vector/complex/MapVector.java | 7 +++----
.../apache/drill/exec/vector/complex/RepeatedMapVector.java | 7 +++----
2 files changed, 6 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/drill/blob/77e50100/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java
index e76e674..8447893 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java
@@ -134,10 +134,9 @@ public class MapVector extends AbstractMapVector {
@Override
public DrillBuf[] getBuffers(boolean clear) {
- int expectedSize = getBufferSize();
- int actualSize = super.getBufferSize();
-
- Preconditions.checkArgument(expectedSize == actualSize);
+ //int expectedSize = getBufferSize();
+ //int actualSize = super.getBufferSize();
+ //Preconditions.checkArgument(expectedSize == actualSize);
return super.getBuffers(clear);
}
http://git-wip-us.apache.org/repos/asf/drill/blob/77e50100/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedMapVector.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedMapVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedMapVector.java
index 9839b2e..94cf4a6 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedMapVector.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedMapVector.java
@@ -413,10 +413,9 @@ public class RepeatedMapVector extends AbstractMapVector
@Override
public DrillBuf[] getBuffers(boolean clear) {
- final int expectedBufferSize = getBufferSize();
- final int actualBufferSize = super.getBufferSize();
-
- Preconditions.checkArgument(expectedBufferSize == actualBufferSize + offsets.getBufferSize());
+ //final int expectedBufferSize = getBufferSize();
+ //final int actualBufferSize = super.getBufferSize();
+ //Preconditions.checkArgument(expectedBufferSize == actualBufferSize + offsets.getBufferSize());
return ArrayUtils.addAll(offsets.getBuffers(clear), super.getBuffers(clear));
}
[4/4] drill git commit: DRILL-4919: Fix select count(1) / count(*) on
csv with header
Posted by pa...@apache.org.
DRILL-4919: Fix select count(1) / count(*) on csv with header
This closes #714
Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/34969583
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/34969583
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/34969583
Branch: refs/heads/master
Commit: 34969583bfab410c80cb14a1c20249f097d5f7a7
Parents: 535623b
Author: Arina Ielchiieva <ar...@gmail.com>
Authored: Thu Dec 29 15:42:53 2016 +0000
Committer: Parth Chandra <pa...@apache.org>
Committed: Fri Jan 13 17:46:13 2017 -0800
----------------------------------------------------------------------
.../compliant/CompliantTextRecordReader.java | 18 +++++++++++++++-
.../drill/exec/store/text/TestCsvHeader.java | 22 ++++++++++++++++++--
2 files changed, 37 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/drill/blob/34969583/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
index d324270..ac4abb9 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
@@ -17,6 +17,7 @@
*/
package org.apache.drill.exec.store.easy.text.compliant;
+import com.google.common.collect.ImmutableList;
import com.google.common.collect.Maps;
import com.univocity.parsers.common.TextParsingException;
import io.netty.buffer.DrillBuf;
@@ -51,8 +52,12 @@ public class CompliantTextRecordReader extends AbstractRecordReader {
static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(CompliantTextRecordReader.class);
private static final int MAX_RECORDS_PER_BATCH = 8096;
- static final int READ_BUFFER = 1024*1024;
+ private static final int READ_BUFFER = 1024*1024;
private static final int WHITE_SPACE_BUFFER = 64*1024;
+ // When no named column is required, ask SCAN to return a DEFAULT column.
+ // If such column does not exist, it will be returned as a nullable-int column.
+ private static final List<SchemaPath> DEFAULT_NAMED_TEXT_COLS_TO_READ =
+ ImmutableList.of(SchemaPath.getSimplePath("_DEFAULT_COL_TO_READ_"));
// settings to be used while parsing
private TextParsingSettings settings;
@@ -89,8 +94,19 @@ public class CompliantTextRecordReader extends AbstractRecordReader {
return super.isStarQuery();
}
+ /**
+ * Returns list of default columns to read to replace empty list of columns.
+ * For text files without headers returns "columns[0]".
+ * Text files with headers do not support columns syntax,
+ * so when header extraction is enabled, returns fake named column "_DEFAULT_COL_TO_READ_".
+ *
+ * @return list of default columns to read
+ */
@Override
protected List<SchemaPath> getDefaultColumnsToRead() {
+ if (settings.isHeaderExtractionEnabled()) {
+ return DEFAULT_NAMED_TEXT_COLS_TO_READ;
+ }
return DEFAULT_TEXT_COLS_TO_READ;
}
http://git-wip-us.apache.org/repos/asf/drill/blob/34969583/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java
index a2e548b..cf54bb0 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java
@@ -17,6 +17,7 @@
*/
package org.apache.drill.exec.store.text;
+import com.google.common.collect.Lists;
import org.apache.drill.BaseTestQuery;
import org.apache.drill.TestBuilder;
import org.apache.drill.common.util.FileUtils;
@@ -24,14 +25,14 @@ import org.apache.drill.common.util.FileUtils;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
+import java.util.List;
import org.junit.Before;
import org.junit.Test;
public class TestCsvHeader extends BaseTestQuery{
- static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TestCsvHeader.class);
- String root;
+ private String root;
@Before
public void initialize() throws Exception {
@@ -185,4 +186,21 @@ public class TestCsvHeader extends BaseTestQuery{
}
builder.go();
}
+
+ @Test
+ public void testCountOnCsvWithHeader() throws Exception {
+ final String query = "select count(%s) as cnt from %s.`%s`";
+ final List<Object> options = Lists.<Object>newArrayList("*", 1, "'A'");
+
+ for (Object option : options) {
+ testBuilder()
+ .sqlQuery(query, option, TEMP_SCHEMA, root)
+ .unOrdered()
+ .baselineColumns("cnt")
+ .baselineValues(4L)
+ .build()
+ .run();
+ }
+ }
+
}