You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by su...@apache.org on 2016/11/09 17:55:49 UTC
[1/2] drill git commit: DRILL-5007: Dynamic UDF lazy-init does not
work correctly in multi-node cluster
Repository: drill
Updated Branches:
refs/heads/master 5a4394245 -> 4b1902c04
DRILL-5007: Dynamic UDF lazy-init does not work correctly in multi-node cluster
closes #650
Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/e03507a4
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/e03507a4
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/e03507a4
Branch: refs/heads/master
Commit: e03507a48765f83464917319476cdbe3a9adec45
Parents: 5a43942
Author: Arina Ielchiieva <ar...@gmail.com>
Authored: Wed Nov 9 13:08:18 2016 +0200
Committer: Sudheesh Katkam <su...@apache.org>
Committed: Wed Nov 9 09:16:33 2016 -0800
----------------------------------------------------------------------
.../drill/exec/expr/fn/FunctionImplementationRegistry.java | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/drill/blob/e03507a4/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/FunctionImplementationRegistry.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/FunctionImplementationRegistry.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/FunctionImplementationRegistry.java
index ede255a..988a9f6 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/FunctionImplementationRegistry.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/FunctionImplementationRegistry.java
@@ -157,7 +157,7 @@ public class FunctionImplementationRegistry implements FunctionLookupContext, Au
if (holder == null && retry) {
if (optionManager != null && optionManager.getOption(ExecConstants.DYNAMIC_UDF_SUPPORT_ENABLED).bool_val) {
if (loadRemoteFunctions(version.get())) {
- findDrillFunction(functionResolver, functionCall, false);
+ return findDrillFunction(functionResolver, functionCall, false);
}
}
}
@@ -200,7 +200,7 @@ public class FunctionImplementationRegistry implements FunctionLookupContext, Au
if (retry && optionManager != null && optionManager.getOption(ExecConstants.DYNAMIC_UDF_SUPPORT_ENABLED).bool_val) {
if (loadRemoteFunctions(version.get())) {
- findExactMatchingDrillFunction(name, argTypes, returnType, false);
+ return findExactMatchingDrillFunction(name, argTypes, returnType, false);
}
}
return null;
[2/2] drill git commit: DRILL-5009: Skip reading of empty row groups
while reading Parquet metadata
Posted by su...@apache.org.
DRILL-5009: Skip reading of empty row groups while reading Parquet metadata
+ We will no longer attempt to scan such row groups.
closes #651
Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/4b1902c0
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/4b1902c0
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/4b1902c0
Branch: refs/heads/master
Commit: 4b1902c042d3e8f426f54ec04b78813ac64aa120
Parents: e03507a
Author: Parth Chandra <pa...@apache.org>
Authored: Mon Nov 7 20:29:23 2016 -0800
Committer: Sudheesh Katkam <su...@apache.org>
Committed: Wed Nov 9 09:16:34 2016 -0800
----------------------------------------------------------------------
.../hive/HiveDrillNativeScanBatchCreator.java | 4 ++++
.../drill/exec/store/parquet/Metadata.java | 22 ++++++++++++++++++++
2 files changed, 26 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/drill/blob/4b1902c0/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveDrillNativeScanBatchCreator.java
----------------------------------------------------------------------
diff --git a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveDrillNativeScanBatchCreator.java b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveDrillNativeScanBatchCreator.java
index d78c620..4be2ced 100644
--- a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveDrillNativeScanBatchCreator.java
+++ b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveDrillNativeScanBatchCreator.java
@@ -119,6 +119,10 @@ public class HiveDrillNativeScanBatchCreator implements BatchCreator<HiveDrillNa
final List<Integer> rowGroupNums = getRowGroupNumbersFromFileSplit(fileSplit, parquetMetadata);
for(int rowGroupNum : rowGroupNums) {
+ //DRILL-5009 : Skip the row group if the row count is zero
+ if (parquetMetadata.getBlocks().get(rowGroupNum).getRowCount() == 0) {
+ continue;
+ }
// Drill has only ever written a single row group per file, only detect corruption
// in the first row group
ParquetReaderUtility.DateCorruptionStatus containsCorruptDates =
http://git-wip-us.apache.org/repos/asf/drill/blob/4b1902c0/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/Metadata.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/Metadata.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/Metadata.java
index ead0a8f..04a2476 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/Metadata.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/Metadata.java
@@ -22,6 +22,8 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
+import java.util.Iterator;
+
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
@@ -64,10 +66,12 @@ import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.DeserializationContext;
import com.fasterxml.jackson.databind.DeserializationFeature;
+import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.JsonDeserializer;
import com.fasterxml.jackson.databind.JsonSerializer;
import com.fasterxml.jackson.databind.KeyDeserializer;
import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.deser.std.StdDeserializer;
import com.fasterxml.jackson.databind.SerializerProvider;
import com.fasterxml.jackson.databind.module.SimpleModule;
import com.fasterxml.jackson.module.afterburner.AfterburnerModule;
@@ -437,6 +441,11 @@ public class Metadata {
length += col.getTotalSize();
}
+ // DRILL-5009: Skip the RowGroup if it is empty
+ // Note we still read the schema even if there are no values in the RowGroup
+ if (rowGroup.getRowCount() == 0) {
+ continue;
+ }
RowGroupMetadata_v3 rowGroupMeta =
new RowGroupMetadata_v3(rowGroup.getStartingPos(), length, rowGroup.getRowCount(),
getHostAffinity(file, rowGroup.getStartingPos(), length), columnMetadataList);
@@ -566,6 +575,19 @@ public class Metadata {
(createMetaFilesRecursively(Path.getPathWithoutSchemeAndAuthority(p.getParent()).toString())).getLeft();
newMetadata = true;
}
+
+ // DRILL-5009: Remove the RowGroup if it is empty
+ List<? extends ParquetFileMetadata> files = parquetTableMetadata.getFiles();
+ for (ParquetFileMetadata file : files) {
+ List<? extends RowGroupMetadata> rowGroups = file.getRowGroups();
+ for (Iterator<? extends RowGroupMetadata> iter = rowGroups.iterator(); iter.hasNext(); ) {
+ RowGroupMetadata r = iter.next();
+ if (r.getRowCount() == 0) {
+ iter.remove();
+ }
+ }
+ }
+
}
if (newMetadata && metaContext != null) {