You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sz...@apache.org on 2014/08/28 07:39:27 UTC
svn commit: r1621045 - in /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql:
hooks/ReadEntity.java parse/SemanticAnalyzer.java
Author: szehon
Date: Thu Aug 28 05:39:27 2014
New Revision: 1621045
URL: http://svn.apache.org/r1621045
Log:
HIVE-7730: Extend ReadEntity to add accessed columns from query (Xiaomeng Huang via Szehon)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadEntity.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadEntity.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadEntity.java?rev=1621045&r1=1621044&r2=1621045&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadEntity.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadEntity.java Thu Aug 28 05:39:27 2014
@@ -19,7 +19,9 @@
package org.apache.hadoop.hive.ql.hooks;
import java.io.Serializable;
+import java.util.ArrayList;
import java.util.HashSet;
+import java.util.List;
import java.util.Set;
import org.apache.hadoop.fs.Path;
@@ -49,7 +51,8 @@ public class ReadEntity extends Entity i
// For views, the entities can be nested - by default, entities are at the top level
private final Set<ReadEntity> parents = new HashSet<ReadEntity>();
-
+ // The accessed columns of query
+ private final List<String> accessedColumns = new ArrayList<String>();
/**
* For serialization only.
@@ -159,4 +162,8 @@ public class ReadEntity extends Entity i
public void noLockNeeded() {
needsLock = false;
}
+
+ public List<String> getAccessedColumns() {
+ return accessedColumns;
+ }
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1621045&r1=1621044&r2=1621045&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Thu Aug 28 05:39:27 2014
@@ -9517,6 +9517,11 @@ public class SemanticAnalyzer extends Ba
LOG.info("Completed plan generation");
+ // put accessed columns to readEntity
+ if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) {
+ putAccessedColumnsToReadEntity(inputs, columnAccessInfo);
+ }
+
if (!ctx.getExplain()) {
// if desired check we're not going over partition scan limits
enforceScanLimits(pCtx, origFetchTask);
@@ -9525,6 +9530,26 @@ public class SemanticAnalyzer extends Ba
return;
}
+ private void putAccessedColumnsToReadEntity(HashSet<ReadEntity> inputs, ColumnAccessInfo columnAccessInfo) {
+ Map<String, List<String>> tableToColumnAccessMap = columnAccessInfo.getTableToColumnAccessMap();
+ if (tableToColumnAccessMap != null && !tableToColumnAccessMap.isEmpty()) {
+ for(ReadEntity entity: inputs) {
+ switch (entity.getType()) {
+ case TABLE:
+ entity.getAccessedColumns().addAll(
+ tableToColumnAccessMap.get(entity.getTable().getCompleteName()));
+ break;
+ case PARTITION:
+ entity.getAccessedColumns().addAll(
+ tableToColumnAccessMap.get(entity.getPartition().getTable().getCompleteName()));
+ break;
+ default:
+ // no-op
+ }
+ }
+ }
+ }
+
private void enforceScanLimits(ParseContext pCtx, FetchTask fTask)
throws SemanticException {
int scanLimit = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVELIMITTABLESCANPARTITION);