You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by ja...@apache.org on 2015/04/29 09:30:53 UTC
[5/6] drill git commit: DRILL-2842: Fix reading of parquet files with
large file level metadata.
DRILL-2842: Fix reading of parquet files with large file level metadata.
Add result verification of the read operation. The actual bug happened when we were trying to read the meta-data to make read assignments, but its good to have the test case around to make sure that we can read a parquet file with a lot of columns.
Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/a7a60a24
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/a7a60a24
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/a7a60a24
Branch: refs/heads/master
Commit: a7a60a2455f3a0ec6ae9cfac12a132ca1a84cb66
Parents: 4a23037
Author: Jason Altekruse <al...@gmail.com>
Authored: Tue Apr 21 15:16:06 2015 -0700
Committer: Jacques Nadeau <ja...@apache.org>
Committed: Wed Apr 29 00:28:44 2015 -0700
----------------------------------------------------------------------
.../exec/store/parquet/FooterGatherer.java | 2 +-
.../exec/fn/interp/TestConstantFolding.java | 52 ++++++++++++++++++--
.../physical/impl/writer/TestParquetWriter.java | 38 ++++++++++++++
3 files changed, 86 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/drill/blob/a7a60a24/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/FooterGatherer.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/FooterGatherer.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/FooterGatherer.java
index 0bb86e1..d8495c9 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/FooterGatherer.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/FooterGatherer.java
@@ -152,7 +152,7 @@ public class FooterGatherer {
footerBytes = new byte[size];
readFully(file, fileLength - size - FOOTER_METADATA_SIZE, footerBytes, 0, size - origFooterRead);
- System.arraycopy(origFooterBytes, 0, footerBytes, size - footerBytes.length, origFooterRead);
+ System.arraycopy(origFooterBytes, 0, footerBytes, size - origFooterRead, origFooterRead);
}else{
int start = footerBytes.length - (size + FOOTER_METADATA_SIZE);
footerBytes = ArrayUtils.subarray(footerBytes, start, start + size);
http://git-wip-us.apache.org/repos/asf/drill/blob/a7a60a24/exec/java-exec/src/test/java/org/apache/drill/exec/fn/interp/TestConstantFolding.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/fn/interp/TestConstantFolding.java b/exec/java-exec/src/test/java/org/apache/drill/exec/fn/interp/TestConstantFolding.java
index b17935a..2c23df4 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/fn/interp/TestConstantFolding.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/fn/interp/TestConstantFolding.java
@@ -17,6 +17,9 @@
******************************************************************************/
package org.apache.drill.exec.fn.interp;
+import com.google.common.base.Joiner;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Lists;
import org.apache.drill.PlanTestBase;
import org.apache.drill.exec.util.JsonStringArrayList;
import org.apache.hadoop.io.Text;
@@ -27,6 +30,7 @@ import org.junit.rules.TemporaryFolder;
import java.io.File;
import java.io.PrintWriter;
+import java.util.List;
public class TestConstantFolding extends PlanTestBase {
@@ -41,34 +45,72 @@ public class TestConstantFolding extends PlanTestBase {
public static class SmallFileCreator {
private final TemporaryFolder folder;
+ private static final List<String> values = Lists.newArrayList("1","2","3");
+ private static final String jsonRecord = "{\"col1\" : 1,\"col2\" : 2, \"col3\" : 3}";
+ private String record;
public SmallFileCreator(TemporaryFolder folder) {
this.folder = folder;
+ this.record = null;
}
- public void createFiles(int smallFileLines, int bigFileLines) throws Exception{
+ public SmallFileCreator setRecord(String record) {
+ this.record = record;
+ return this;
+ }
+
+ public void createFiles(int smallFileLines, int bigFileLines, String extension, String delimiter) throws Exception{
+ if (record == null) {
+ if (extension.equals("csv") || extension.equals("tsv")) {
+ record = Joiner.on(delimiter).join(values);
+ } else if (extension.equals("json") ){
+ record = jsonRecord;
+ } else {
+ throw new UnsupportedOperationException(
+ String.format("Extension %s not supported by %s",
+ extension, SmallFileCreator.class.getSimpleName()));
+ }
+ }
PrintWriter out;
for (String fileAndFolderName : new String[]{"bigfile", "BIGFILE_2"}) {
File bigFolder = folder.newFolder(fileAndFolderName);
- File bigFile = new File (bigFolder, fileAndFolderName + ".csv");
+ File bigFile = new File (bigFolder, fileAndFolderName + "." + extension);
out = new PrintWriter(bigFile);
for (int i = 0; i < bigFileLines; i++ ) {
- out.println("1,2,3");
+ out.println(record);
}
out.close();
}
for (String fileAndFolderName : new String[]{"smallfile", "SMALLFILE_2"}) {
File smallFolder = folder.newFolder(fileAndFolderName);
- File smallFile = new File (smallFolder, fileAndFolderName + ".csv");
+ File smallFile = new File (smallFolder, fileAndFolderName + "." + extension);
out = new PrintWriter(smallFile);
for (int i = 0; i < smallFileLines; i++ ) {
- out.println("1,2,3");
+ out.println(record);
}
out.close();
}
}
+ public void createFiles(int smallFileLines, int bigFileLines, String extension) throws Exception{
+ String delimiter;
+ if (extension.equals("json")) {
+ delimiter = null;
+ } else if (extension.equals("csv")) {
+ delimiter = ",";
+ } else if (extension.equals("tsv")) {
+ delimiter = "\t";
+ } else {
+ throw new UnsupportedOperationException("Extension not recognized, please explicitly provide a delimiter.");
+ }
+ createFiles(smallFileLines, bigFileLines, extension, delimiter);
+ }
+
+ public void createFiles(int smallFileLines, int bigFileLines) throws Exception{
+ createFiles(smallFileLines, bigFileLines, "csv", ",");
+ }
+
}
@Test
http://git-wip-us.apache.org/repos/asf/drill/blob/a7a60a24/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java
index 89837e7..5670e1e 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java
@@ -23,6 +23,7 @@ import java.sql.Date;
import org.apache.drill.BaseTestQuery;
import org.apache.drill.exec.ExecConstants;
+import org.apache.drill.exec.fn.interp.TestConstantFolding;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -30,11 +31,15 @@ import org.joda.time.DateTime;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Ignore;
+import org.junit.Rule;
import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
public class TestParquetWriter extends BaseTestQuery {
// private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TestParquetWriter.class);
+ @Rule
+ public TemporaryFolder folder = new TemporaryFolder();
static FileSystem fs;
@BeforeClass
@@ -53,6 +58,39 @@ public class TestParquetWriter extends BaseTestQuery {
}
@Test
+ public void testLargeFooter() throws Exception {
+ StringBuffer sb = new StringBuffer();
+ // create a JSON document with a lot of columns
+ sb.append("{");
+ final int numCols = 1000;
+ String[] colNames = new String[numCols];
+ Object[] values = new Object[numCols];
+ for (int i = 0 ; i < numCols - 1; i++) {
+ sb.append(String.format("\"col_%d\" : 100,", i));
+ colNames[i] = "col_" + i;
+ values[i] = 100l;
+ }
+ // add one column without a comma after it
+ sb.append(String.format("\"col_%d\" : 100", numCols - 1));
+ sb.append("}");
+ colNames[numCols - 1] = "col_" + (numCols - 1);
+ values[numCols - 1] = 100l;
+
+ // write it to a file in the temp directory for the test
+ new TestConstantFolding.SmallFileCreator(folder).setRecord(sb.toString()).createFiles(1, 1, "json");
+
+ String path = folder.getRoot().toPath().toString();
+ test("use dfs_test.tmp");
+ test("create table WIDE_PARQUET_TABLE_TestParquetWriter_testLargeFooter as select * from dfs.`" + path + "/smallfile/smallfile.json`");
+ testBuilder()
+ .sqlQuery("select * from dfs_test.tmp.WIDE_PARQUET_TABLE_TestParquetWriter_testLargeFooter")
+ .unOrdered()
+ .baselineColumns(colNames)
+ .baselineValues(values)
+ .build().run();
+ }
+
+ @Test
public void testComplex() throws Exception {
String selection = "*";
String inputTable = "cp.`donuts.json`";