You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by su...@apache.org on 2016/12/13 00:40:11 UTC

[14/14] drill git commit: DRILL-5065: Optimize count(*) queries on MapR-DB JSON Tables

DRILL-5065: Optimize count(*) queries on MapR-DB JSON Tables

In MapR-DB v5.2.0, we enabled '_id' only projection for JSON
tables. Hence, we can now optimize the following queries:

a. count(*) by projecting only the '_id' column.

b. '_id' only projections, including count(_id)

Change the format plugin config parameter name.

Fix setter of config parameter `disableCountOptimization` for drill-maprdb plugin

closes #678


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/97da0199
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/97da0199
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/97da0199

Branch: refs/heads/master
Commit: 97da01995ecfd9a7d157ad183f0656cd15993f26
Parents: 94f3ba0
Author: Smidth Panchamia <sp...@mapr.com>
Authored: Mon Nov 28 13:59:34 2016 -0800
Committer: Sudheesh Katkam <su...@apache.org>
Committed: Mon Dec 12 15:59:59 2016 -0800

----------------------------------------------------------------------
 .../store/mapr/db/MapRDBFormatPluginConfig.java | 12 ++++
 .../mapr/db/json/MaprDBJsonRecordReader.java    | 64 ++++++++++++--------
 2 files changed, 51 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/97da0199/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/MapRDBFormatPluginConfig.java
----------------------------------------------------------------------
diff --git a/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/MapRDBFormatPluginConfig.java b/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/MapRDBFormatPluginConfig.java
index 1bb07ed..8b89b78 100644
--- a/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/MapRDBFormatPluginConfig.java
+++ b/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/MapRDBFormatPluginConfig.java
@@ -31,6 +31,7 @@ public class MapRDBFormatPluginConfig extends TableFormatPluginConfig {
   public boolean enablePushdown = true;
   public boolean ignoreSchemaChange = false;
   public boolean readAllNumbersAsDouble = false;
+  public boolean disableCountOptimization = false;
 
   @Override
   public int hashCode() {
@@ -48,6 +49,8 @@ public class MapRDBFormatPluginConfig extends TableFormatPluginConfig {
       return false;
     } else if (enablePushdown != other.enablePushdown) {
       return false;
+    } else if (disableCountOptimization != other.disableCountOptimization) {
+      return false;
     }
     return true;
   }
@@ -65,6 +68,15 @@ public class MapRDBFormatPluginConfig extends TableFormatPluginConfig {
     allTextMode = mode;
   }
 
+  @JsonProperty("disableCountOptimization")
+  public void setDisableCountOptimization(boolean mode) {
+    disableCountOptimization = mode;
+  }
+
+  public boolean shouldDisableCountOptimization() {
+    return disableCountOptimization;
+  }
+
   @JsonProperty("readAllNumbersAsDouble")
   public void setReadAllNumbersAsDouble(boolean read) {
     readAllNumbersAsDouble = read;

http://git-wip-us.apache.org/repos/asf/drill/blob/97da0199/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/json/MaprDBJsonRecordReader.java
----------------------------------------------------------------------
diff --git a/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/json/MaprDBJsonRecordReader.java b/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/json/MaprDBJsonRecordReader.java
index ee6b15d..3105bec 100644
--- a/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/json/MaprDBJsonRecordReader.java
+++ b/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/json/MaprDBJsonRecordReader.java
@@ -18,6 +18,7 @@
 package org.apache.drill.exec.store.mapr.db.json;
 
 import static org.ojai.DocumentConstants.ID_KEY;
+import static org.ojai.DocumentConstants.ID_FIELD;
 
 import java.nio.ByteBuffer;
 import java.util.Collection;
@@ -93,6 +94,7 @@ public class MaprDBJsonRecordReader extends AbstractRecordReader {
   private boolean disablePushdown;
   private final boolean allTextMode;
   private final boolean ignoreSchemaChange;
+  private final boolean disableCountOptimization;
 
   public MaprDBJsonRecordReader(MapRDBSubScanSpec subScanSpec,
       MapRDBFormatPluginConfig formatPluginConfig,
@@ -110,6 +112,7 @@ public class MaprDBJsonRecordReader extends AbstractRecordReader {
       condition = com.mapr.db.impl.ConditionImpl.parseFrom(ByteBufs.wrap(serializedFilter));
     }
 
+    disableCountOptimization = formatPluginConfig.shouldDisableCountOptimization();
     setColumns(projectedColumns);
     unionEnabled = context.getOptions().getOption(ExecConstants.ENABLE_UNION_TYPE);
     readNumbersAsDouble = formatPluginConfig.isReadAllNumbersAsDouble();
@@ -121,36 +124,47 @@ public class MaprDBJsonRecordReader extends AbstractRecordReader {
   @Override
   protected Collection<SchemaPath> transformColumns(Collection<SchemaPath> columns) {
     Set<SchemaPath> transformed = Sets.newLinkedHashSet();
-    if (!isStarQuery() && !disablePushdown) {
-      Set<FieldPath> projectedFieldsSet = Sets.newTreeSet();
-      for (SchemaPath column : columns) {
-        if (column.getRootSegment().getPath().equalsIgnoreCase(ID_KEY)) {
-          /*
-           * we do not include _id field in the set of projected fields
-           * because the DB currently can not return a document if only
-           * the _id field was projected. This should really be fixed in
-           * the DB client (Bug 21708) to avoid transferring the entire
-           * document when only _id is requested.
-           */
-          // projectedFieldsList.add(ID_FIELD);
-          includeId = true;
-        } else {
-          projectedFieldsSet.add(getFieldPathForProjection(column));
+    if (disablePushdown) {
+      transformed.add(AbstractRecordReader.STAR_COLUMN);
+      includeId = true;
+      return transformed;
+    }
+
+    if (isStarQuery()) {
+      transformed.add(AbstractRecordReader.STAR_COLUMN);
+      includeId = true;
+      if (isSkipQuery()) {
+    	// `SELECT COUNT(*)` query
+    	if (!disableCountOptimization) {
+          projectedFields = new FieldPath[1];
+          projectedFields[0] = ID_FIELD;
         }
-        transformed.add(column);
       }
-      if (projectedFieldsSet.size() > 0) {
-        projectedFields = projectedFieldsSet.toArray(new FieldPath[projectedFieldsSet.size()]);
+      return transformed;
+    }
+
+    Set<FieldPath> projectedFieldsSet = Sets.newTreeSet();
+    for (SchemaPath column : columns) {
+      if (column.getRootSegment().getPath().equalsIgnoreCase(ID_KEY)) {
+        includeId = true;
+        if (!disableCountOptimization) {
+          projectedFieldsSet.add(ID_FIELD);
+        }
+      } else {
+        projectedFieldsSet.add(getFieldPathForProjection(column));
       }
-    } else {
-      transformed.add(AbstractRecordReader.STAR_COLUMN);
-      includeId = true;
+
+      transformed.add(column);
+    }
+
+    if (projectedFieldsSet.size() > 0) {
+      projectedFields = projectedFieldsSet.toArray(new FieldPath[projectedFieldsSet.size()]);
+    }
+
+    if (disableCountOptimization) {
+      idOnly = (projectedFields == null);
     }
 
-    /*
-     * (Bug 21708) if we are projecting only the id field, save that condition here.
-     */
-    idOnly = !isStarQuery() && (projectedFields == null);
     return transformed;
   }