You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2014/06/03 00:58:04 UTC
svn commit: r1599388 - in /hive/trunk:
ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java
Author: prasanthj
Date: Mon Jun 2 22:58:03 2014
New Revision: 1599388
URL: http://svn.apache.org/r1599388
Log:
HIVE-7052: Optimize split calculation time (Rajesh Balamohan reviewed by Prasanth J
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java?rev=1599388&r1=1599387&r2=1599388&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java Mon Jun 2 22:58:03 2014
@@ -37,6 +37,7 @@ import org.apache.hadoop.conf.Configurat
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil;
+import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
@@ -321,6 +322,12 @@ public class HiveInputFormat<K extends W
TableDesc currentTable = null;
TableScanOperator currentTableScan = null;
+ boolean pushDownProjection = false;
+ //Buffers to hold filter pushdown information
+ StringBuilder readColumnsBuffer = new StringBuilder(newjob.
+ get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, ""));;
+ StringBuilder readColumnNamesBuffer = new StringBuilder(newjob.
+ get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, ""));
// for each dir, get the InputFormat, and do getSplits.
for (Path dir : dirs) {
PartitionDesc part = getPartitionDescFromPath(pathToPartitionInfo, dir);
@@ -336,9 +343,13 @@ public class HiveInputFormat<K extends W
Operator op = mrwork.getAliasToWork().get(aliases.get(0));
if ((op != null) && (op instanceof TableScanOperator)) {
tableScan = (TableScanOperator) op;
+ //Reset buffers to store filter push down columns
+ readColumnsBuffer.setLength(0);
+ readColumnNamesBuffer.setLength(0);
// push down projections.
- ColumnProjectionUtils.appendReadColumns(
- newjob, tableScan.getNeededColumnIDs(), tableScan.getNeededColumns());
+ ColumnProjectionUtils.appendReadColumns(readColumnsBuffer, readColumnNamesBuffer,
+ tableScan.getNeededColumnIDs(), tableScan.getNeededColumns());
+ pushDownProjection = true;
// push down filters
pushFilters(newjob, tableScan);
}
@@ -366,6 +377,13 @@ public class HiveInputFormat<K extends W
currentTable = table;
currentInputFormatClass = inputFormatClass;
}
+ if (pushDownProjection) {
+ newjob.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
+ newjob.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, readColumnsBuffer.toString());
+ newjob.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, readColumnNamesBuffer.toString());
+ LOG.info(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR + "=" + readColumnsBuffer.toString());
+ LOG.info(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR + "=" + readColumnNamesBuffer.toString());
+ }
if (dirs.length != 0) {
LOG.info("Generating splits");
Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java?rev=1599388&r1=1599387&r2=1599388&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java Mon Jun 2 22:58:03 2014
@@ -103,6 +103,36 @@ public final class ColumnProjectionUtils
appendReadColumnNames(conf, names);
}
+ public static void appendReadColumns(
+ StringBuilder readColumnsBuffer, StringBuilder readColumnNamesBuffer, List<Integer> ids,
+ List<String> names) {
+ appendReadColumns(readColumnsBuffer, ids);
+ appendReadColumnNames(readColumnNamesBuffer, names);
+ }
+
+ public static void appendReadColumns(StringBuilder readColumnsBuffer, List<Integer> ids) {
+ String id = toReadColumnIDString(ids);
+ String newConfStr = id;
+ if (readColumnsBuffer.length() > 0) {
+ readColumnsBuffer.append(StringUtils.COMMA_STR).append(newConfStr);
+ }
+ if (readColumnsBuffer.length() == 0) {
+ readColumnsBuffer.append(READ_COLUMN_IDS_CONF_STR_DEFAULT);
+ }
+ }
+
+ private static void appendReadColumnNames(StringBuilder readColumnNamesBuffer, List<String> cols) {
+ boolean first = readColumnNamesBuffer.length() > 0;
+ for(String col: cols) {
+ if (first) {
+ first = false;
+ } else {
+ readColumnNamesBuffer.append(',');
+ }
+ readColumnNamesBuffer.append(col);
+ }
+ }
+
/**
* Returns an array of column ids(start from zero) which is set in the given
* parameter <tt>conf</tt>.