You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sz...@apache.org on 2014/08/28 07:39:27 UTC

svn commit: r1621045 - in /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql: hooks/ReadEntity.java parse/SemanticAnalyzer.java

Author: szehon
Date: Thu Aug 28 05:39:27 2014
New Revision: 1621045

URL: http://svn.apache.org/r1621045
Log:
HIVE-7730: Extend ReadEntity to add accessed columns from query (Xiaomeng Huang via Szehon)

Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadEntity.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadEntity.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadEntity.java?rev=1621045&r1=1621044&r2=1621045&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadEntity.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadEntity.java Thu Aug 28 05:39:27 2014
@@ -19,7 +19,9 @@
 package org.apache.hadoop.hive.ql.hooks;
 
 import java.io.Serializable;
+import java.util.ArrayList;
 import java.util.HashSet;
+import java.util.List;
 import java.util.Set;
 
 import org.apache.hadoop.fs.Path;
@@ -49,7 +51,8 @@ public class ReadEntity extends Entity i
   // For views, the entities can be nested - by default, entities are at the top level
   private final Set<ReadEntity> parents = new HashSet<ReadEntity>();
 
-
+  // The accessed columns of query
+  private final List<String> accessedColumns = new ArrayList<String>();
 
   /**
    * For serialization only.
@@ -159,4 +162,8 @@ public class ReadEntity extends Entity i
   public void noLockNeeded() {
     needsLock = false;
   }
+
+  public List<String> getAccessedColumns() {
+    return accessedColumns;
+  }
 }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1621045&r1=1621044&r2=1621045&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Thu Aug 28 05:39:27 2014
@@ -9517,6 +9517,11 @@ public class SemanticAnalyzer extends Ba
 
     LOG.info("Completed plan generation");
 
+    // put accessed columns to readEntity
+    if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) {
+      putAccessedColumnsToReadEntity(inputs, columnAccessInfo);
+    }
+
     if (!ctx.getExplain()) {
       // if desired check we're not going over partition scan limits
       enforceScanLimits(pCtx, origFetchTask);
@@ -9525,6 +9530,26 @@ public class SemanticAnalyzer extends Ba
     return;
   }
 
+  private void putAccessedColumnsToReadEntity(HashSet<ReadEntity> inputs, ColumnAccessInfo columnAccessInfo) {
+    Map<String, List<String>> tableToColumnAccessMap = columnAccessInfo.getTableToColumnAccessMap();
+    if (tableToColumnAccessMap != null && !tableToColumnAccessMap.isEmpty()) {
+      for(ReadEntity entity: inputs) {
+        switch (entity.getType()) {
+          case TABLE:
+            entity.getAccessedColumns().addAll(
+                tableToColumnAccessMap.get(entity.getTable().getCompleteName()));
+            break;
+          case PARTITION:
+            entity.getAccessedColumns().addAll(
+                tableToColumnAccessMap.get(entity.getPartition().getTable().getCompleteName()));
+            break;
+          default:
+            // no-op
+        }
+      }
+    }
+  }
+
   private void enforceScanLimits(ParseContext pCtx, FetchTask fTask)
       throws SemanticException {
     int scanLimit = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVELIMITTABLESCANPARTITION);