You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by js...@apache.org on 2015/03/27 00:51:45 UTC

[1/2] drill git commit: DRILL-2507: most TestParquetWriter tests aren't actually launching the test queries

Repository: drill
Updated Branches:
  refs/heads/master 9d92b8e31 -> ee94a37e5


DRILL-2507: most TestParquetWriter tests aren't actually launching the test queries


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/d4285b2e
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/d4285b2e
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/d4285b2e

Branch: refs/heads/master
Commit: d4285b2e70ae2f6929076bbc8b97b4dae90cf1b8
Parents: 9d92b8e
Author: adeneche <ad...@gmail.com>
Authored: Thu Mar 19 20:46:01 2015 -0700
Committer: Jason Altekruse <al...@gmail.com>
Committed: Thu Mar 26 15:51:11 2015 -0700

----------------------------------------------------------------------
 .../physical/impl/writer/TestParquetWriter.java | 84 ++++++++++----------
 1 file changed, 41 insertions(+), 43 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/d4285b2e/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java
index 76328c6..288a295 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java
@@ -17,41 +17,19 @@
  */
 package org.apache.drill.exec.physical.impl.writer;
 
-import static org.junit.Assert.assertEquals;
-
-import java.io.UnsupportedEncodingException;
-import java.lang.reflect.Array;
 import java.math.BigDecimal;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
 
 import org.apache.drill.BaseTestQuery;
 import org.apache.drill.exec.ExecConstants;
-import org.apache.drill.exec.HyperVectorValueIterator;
-import org.apache.drill.exec.exception.SchemaChangeException;
-import org.apache.drill.exec.proto.UserBitShared;
-import org.apache.drill.exec.record.BatchSchema;
-import org.apache.drill.exec.record.HyperVectorWrapper;
-import org.apache.drill.exec.record.MaterializedField;
-import org.apache.drill.exec.record.RecordBatchLoader;
-import org.apache.drill.exec.record.VectorWrapper;
-import org.apache.drill.exec.rpc.user.QueryResultBatch;
-import org.apache.drill.exec.vector.ValueVector;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Text;
-import org.junit.Assert;
 import org.junit.BeforeClass;
 import org.junit.Ignore;
 import org.junit.Test;
 
 public class TestParquetWriter extends BaseTestQuery {
-  static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TestParquetWriter.class);
+//  private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TestParquetWriter.class);
 
   static FileSystem fs;
 
@@ -103,12 +81,17 @@ public class TestParquetWriter extends BaseTestQuery {
 
   @Test
   public void testTPCHReadWrite1_date_convertedType() throws Exception {
-    String selection = "L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER, L_QUANTITY, L_EXTENDEDPRICE, L_DISCOUNT, L_TAX, " +
+    try {
+      test("alter session set `%s` = false", ExecConstants.PARQUET_WRITER_ENABLE_DICTIONARY_ENCODING);
+      String selection = "L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER, L_QUANTITY, L_EXTENDEDPRICE, L_DISCOUNT, L_TAX, " +
         "L_RETURNFLAG, L_LINESTATUS, L_SHIPDATE, cast(L_COMMITDATE as DATE) as L_COMMITDATE, cast(L_RECEIPTDATE as DATE) AS L_RECEIPTDATE, L_SHIPINSTRUCT, L_SHIPMODE, L_COMMENT";
-    String validationSelection = "L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER, L_QUANTITY, L_EXTENDEDPRICE, L_DISCOUNT, L_TAX, " +
+      String validationSelection = "L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER, L_QUANTITY, L_EXTENDEDPRICE, L_DISCOUNT, L_TAX, " +
         "L_RETURNFLAG, L_LINESTATUS, L_SHIPDATE,L_COMMITDATE ,L_RECEIPTDATE, L_SHIPINSTRUCT, L_SHIPMODE, L_COMMENT";
-    String inputTable = "cp.`tpch/lineitem.parquet`";
-    runTestAndValidate(selection, validationSelection, inputTable, "lineitem_parquet_converted");
+      String inputTable = "cp.`tpch/lineitem.parquet`";
+      runTestAndValidate(selection, validationSelection, inputTable, "lineitem_parquet_converted");
+    } finally {
+      test("alter session set `%s` = %b", ExecConstants.PARQUET_WRITER_ENABLE_DICTIONARY_ENCODING, ExecConstants.PARQUET_WRITER_ENABLE_DICTIONARY_ENCODING_VALIDATOR.getDefault().bool_val);
+    }
   }
 
   @Test
@@ -155,20 +138,26 @@ public class TestParquetWriter extends BaseTestQuery {
 
   @Test
   public void testTPCHReadWriteNoDictUncompressed() throws Exception {
-    test(String.format("alter session set `%s` = false;", ExecConstants.PARQUET_WRITER_ENABLE_DICTIONARY_ENCODING));
-    test(String.format("alter session set `%s` = 'none'", ExecConstants.PARQUET_WRITER_COMPRESSION_TYPE));
-    String inputTable = "cp.`tpch/supplier.parquet`";
-    runTestAndValidate("*", "*", inputTable, "supplier_parquet_no_dict_uncompressed");
-    test(String.format("alter session set `%s` = true;", ExecConstants.PARQUET_WRITER_ENABLE_DICTIONARY_ENCODING));
-    test(String.format("alter session set `%s` = 'snappy'", ExecConstants.PARQUET_WRITER_COMPRESSION_TYPE));
+    try {
+      test(String.format("alter session set `%s` = false", ExecConstants.PARQUET_WRITER_ENABLE_DICTIONARY_ENCODING));
+      test(String.format("alter session set `%s` = 'none'", ExecConstants.PARQUET_WRITER_COMPRESSION_TYPE));
+      String inputTable = "cp.`tpch/supplier.parquet`";
+      runTestAndValidate("*", "*", inputTable, "supplier_parquet_no_dict_uncompressed");
+    } finally {
+      test(String.format("alter session set `%s` = %b", ExecConstants.PARQUET_WRITER_ENABLE_DICTIONARY_ENCODING, ExecConstants.PARQUET_WRITER_ENABLE_DICTIONARY_ENCODING_VALIDATOR.getDefault().bool_val));
+      test(String.format("alter session set `%s` = '%s'", ExecConstants.PARQUET_WRITER_COMPRESSION_TYPE, ExecConstants.PARQUET_WRITER_COMPRESSION_TYPE_VALIDATOR.getDefault().string_val));
+    }
   }
 
   @Test
   public void testTPCHReadWriteDictGzip() throws Exception {
-    test(String.format("alter session set `%s` = 'gzip'", ExecConstants.PARQUET_WRITER_COMPRESSION_TYPE));
-    String inputTable = "cp.`tpch/supplier.parquet`";
-    runTestAndValidate("*", "*", inputTable, "supplier_parquet_dict_gzip");
-    test(String.format("alter session set `%s` = 'snappy'", ExecConstants.PARQUET_WRITER_COMPRESSION_TYPE));
+    try {
+      test(String.format("alter session set `%s` = 'gzip'", ExecConstants.PARQUET_WRITER_COMPRESSION_TYPE));
+      String inputTable = "cp.`tpch/supplier.parquet`";
+      runTestAndValidate("*", "*", inputTable, "supplier_parquet_dict_gzip");
+    } finally {
+      test(String.format("alter session set `%s` = '%s'", ExecConstants.PARQUET_WRITER_COMPRESSION_TYPE, ExecConstants.PARQUET_WRITER_COMPRESSION_TYPE_VALIDATOR.getDefault().string_val));
+    }
   }
 
   // working to create an exhaustive test of the format for this one. including all convertedTypes
@@ -251,28 +240,33 @@ public class TestParquetWriter extends BaseTestQuery {
 
   @Test //DRILL-2030
   public void testWriterWithStarAndExp() throws Exception {
-    String selection = " *, r_regionkey + 1";
-    String validateSelection = "r_regionkey, r_name, r_comment, r_regionkey + 1";
+    String selection = " *, r_regionkey + 1 r_regionkey2";
+    String validateSelection = "r_regionkey, r_name, r_comment, r_regionkey + 1 r_regionkey2";
     String inputTable = "cp.`tpch/region.parquet`";
     runTestAndValidate(selection, validateSelection, inputTable, "region_star_exp");
   }
 
   public void compareParquetReadersColumnar(String selection, String table) throws Exception {
     String query = "select " + selection + " from " + table;
-    testBuilder()
+
+    try {
+      testBuilder()
         .ordered()
         .sqlQuery(query)
         .optionSettingQueriesForTestQuery("alter system set `store.parquet.use_new_reader` = false")
         .sqlBaselineQuery(query)
         .optionSettingQueriesForBaseline("alter system set `store.parquet.use_new_reader` = true")
         .build().run();
-
+    } finally {
+      test("alter system set `%s` = %b", ExecConstants.PARQUET_NEW_RECORD_READER, ExecConstants.PARQUET_RECORD_READER_IMPLEMENTATION_VALIDATOR.getDefault().bool_val);
+    }
   }
 
   public void compareParquetReadersHyperVector(String selection, String table) throws Exception {
 
     String query = "select " + selection + " from " + table;
-    testBuilder()
+    try {
+      testBuilder()
         .ordered()
         .highPerformanceComparison()
         .sqlQuery(query)
@@ -280,6 +274,9 @@ public class TestParquetWriter extends BaseTestQuery {
         .sqlBaselineQuery(query)
         .optionSettingQueriesForBaseline("alter system set `store.parquet.use_new_reader` = true")
         .build().run();
+    } finally {
+      test("alter system set `%s` = %b", ExecConstants.PARQUET_NEW_RECORD_READER, ExecConstants.PARQUET_RECORD_READER_IMPLEMENTATION_VALIDATOR.getDefault().bool_val);
+    }
   }
 
   @Ignore
@@ -402,7 +399,8 @@ public class TestParquetWriter extends BaseTestQuery {
     testBuilder()
         .unOrdered()
         .sqlQuery(query)
-        .sqlBaselineQuery(validateQuery);
+        .sqlBaselineQuery(validateQuery)
+        .go();
   }
 
 }


[2/2] drill git commit: DRILL-2253: PART-2 Vectorized Parquet reader fails to read correctly against RLE Dictionary encoded DATE column

Posted by js...@apache.org.
DRILL-2253: PART-2 Vectorized Parquet reader fails to read correctly against RLE Dictionary encoded DATE column


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/ee94a37e
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/ee94a37e
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/ee94a37e

Branch: refs/heads/master
Commit: ee94a37e52848647bc2dd2efa57741993a1e754a
Parents: d4285b2
Author: adeneche <ad...@gmail.com>
Authored: Thu Feb 26 16:33:08 2015 -0800
Committer: Jason Altekruse <al...@gmail.com>
Committed: Thu Mar 26 15:53:26 2015 -0700

----------------------------------------------------------------------
 .../columnreaders/FixedByteAlignedReader.java   |  12 +++--
 .../parquet/columnreaders/TestDateReader.java   |  48 +++++++++++++++++++
 .../TestNullableFixedAlignedReaders.java        |  36 --------------
 .../resources/parquet/date_dictionary.parquet   | Bin 2008 -> 299 bytes
 .../resources/parquet/date_nodictionary.parquet | Bin 0 -> 289 bytes
 5 files changed, 56 insertions(+), 40 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/ee94a37e/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/FixedByteAlignedReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/FixedByteAlignedReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/FixedByteAlignedReader.java
index f467f8c..c2af964 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/FixedByteAlignedReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/FixedByteAlignedReader.java
@@ -130,10 +130,14 @@ class FixedByteAlignedReader extends ColumnReader {
 
     @Override
     void addNext(int start, int index) {
-//      dateVector.getMutator().set(index, DateTimeUtils.fromJulianDay(
-//          NullableFixedByteAlignedReaders.NullableDateReader.readIntLittleEndian(bytebuf, start)
-      dateVector.getMutator().set(index, DateTimeUtils.fromJulianDay(readIntLittleEndian(bytebuf, start)
-              - ParquetOutputRecordWriter.JULIAN_DAY_EPOC - 0.5));
+      int intValue;
+      if (usingDictionary) {
+        intValue =  pageReader.dictionaryValueReader.readInteger();
+      } else {
+        intValue = readIntLittleEndian(bytebuf, start);
+      }
+
+      dateVector.getMutator().set(index, DateTimeUtils.fromJulianDay(intValue - ParquetOutputRecordWriter.JULIAN_DAY_EPOC - 0.5));
     }
 
     // copied out of parquet library, didn't want to deal with the uneeded throws statement they had declared

http://git-wip-us.apache.org/repos/asf/drill/blob/ee94a37e/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/columnreaders/TestDateReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/columnreaders/TestDateReader.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/columnreaders/TestDateReader.java
new file mode 100644
index 0000000..e95842c
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/columnreaders/TestDateReader.java
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.parquet.columnreaders;
+
+import org.apache.drill.BaseTestQuery;
+import org.junit.Test;
+
+public class TestDateReader extends BaseTestQuery {
+
+  /**
+   * check if DateReader works well with dictionary encoding.
+   */
+  @Test
+  public void testDictionary() throws Exception {
+    // the file 'date_dictionary.parquet' contains two DATE columns, one optional and one required
+    // and uses the PLAIN_DICTIONARY encoding
+
+    // query parquet file. We shouldn't get any exception
+    testNoResult("SELECT * FROM cp.`parquet/date_dictionary.parquet`");
+  }
+
+  /**
+   * check if DateReader works well with plain encoding.
+   */
+  @Test
+  public void testNoDictionary() throws Exception {
+    // the file 'date_dictionary.parquet' contains two DATE columns, one optional and one required
+    // and uses the PLAIN encoding
+
+    // query parquet file. We shouldn't get any exception
+    testNoResult("SELECT * FROM cp.`parquet/date_nodictionary.parquet`");
+  }
+}

http://git-wip-us.apache.org/repos/asf/drill/blob/ee94a37e/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/columnreaders/TestNullableFixedAlignedReaders.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/columnreaders/TestNullableFixedAlignedReaders.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/columnreaders/TestNullableFixedAlignedReaders.java
deleted file mode 100644
index 7e83482..0000000
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/columnreaders/TestNullableFixedAlignedReaders.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.exec.store.parquet.columnreaders;
-
-import org.apache.drill.BaseTestQuery;
-import org.junit.Test;
-
-public class TestNullableFixedAlignedReaders extends BaseTestQuery {
-
-  /**
-   * check if NullableDateReader works well with dictionary encoding.
-   */
-  @Test
-  public void testNullableDateReaderWithDictionary() throws Exception {
-    // the file 'date_dictionary.parquet' contains one single DATE column with 600 rows and 290 distinct date values
-    // and uses the PLAIN_DICTIONARY encoder
-
-    // query parquet file. We shouldn't get any exception
-    testNoResult("SELECT * FROM cp.`parquet/date_dictionary.parquet`");
-  }
-}

http://git-wip-us.apache.org/repos/asf/drill/blob/ee94a37e/exec/java-exec/src/test/resources/parquet/date_dictionary.parquet
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/parquet/date_dictionary.parquet b/exec/java-exec/src/test/resources/parquet/date_dictionary.parquet
index e1adfc0..02c8b34 100644
Binary files a/exec/java-exec/src/test/resources/parquet/date_dictionary.parquet and b/exec/java-exec/src/test/resources/parquet/date_dictionary.parquet differ

http://git-wip-us.apache.org/repos/asf/drill/blob/ee94a37e/exec/java-exec/src/test/resources/parquet/date_nodictionary.parquet
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/parquet/date_nodictionary.parquet b/exec/java-exec/src/test/resources/parquet/date_nodictionary.parquet
new file mode 100644
index 0000000..45d28aa
Binary files /dev/null and b/exec/java-exec/src/test/resources/parquet/date_nodictionary.parquet differ