You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jd...@apache.org on 2016/05/06 17:25:14 UTC
[24/39] hive git commit: HIVE-13594: Misc cleanup on llap branch
HIVE-13594: Misc cleanup on llap branch
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/53b43cd4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/53b43cd4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/53b43cd4
Branch: refs/heads/master
Commit: 53b43cd440a2ee32efa5ad62f2684a4578f390ca
Parents: e69bd1e
Author: Jason Dere <jd...@hortonworks.com>
Authored: Fri Apr 22 14:33:36 2016 -0700
Committer: Jason Dere <jd...@hortonworks.com>
Committed: Fri Apr 22 14:33:36 2016 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hive/conf/HiveConf.java | 3 --
.../test/resources/testconfiguration.properties | 3 +-
jdbc/pom.xml | 16 +++----
.../hadoop/hive/llap/LlapBaseRecordReader.java | 2 +-
.../ext/LlapTaskUmbilicalExternalClient.java | 4 +-
.../org/apache/hadoop/hive/llap/LlapDump.java | 3 +-
.../hadoop/hive/ql/exec/FileSinkOperator.java | 3 --
.../hive/ql/exec/SerializationUtilities.java | 2 +-
.../hive/ql/exec/tez/HiveSplitGenerator.java | 23 ++++-----
.../hive/ql/optimizer/SimpleFetchOptimizer.java | 50 +++++++++++---------
.../ql/udf/generic/GenericUDTFGetSplits.java | 6 +--
11 files changed, 51 insertions(+), 64 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/53b43cd4/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index fa724ae..c8c26db 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2763,9 +2763,6 @@ public class HiveConf extends Configuration {
LLAP_DAEMON_OUTPUT_SERVICE_PORT("hive.llap.daemon.output.service.port", 15003,
"LLAP daemon output service port"),
- LLAP_TMP_SUBMITWORK_USING_TEZ_AM("hive.llap.tmp.submit.work.using.tez.am", true,""),
- LLAP_TMP_EXT_CLIENT_NUM_SERVER_HANDLERS("hive.llap.tmp.ext.client.num.server.handlers", 1, ""),
-
SPARK_CLIENT_FUTURE_TIMEOUT("hive.spark.client.future.timeout",
"60s", new TimeValidator(TimeUnit.SECONDS),
"Timeout for requests from Hive client to remote Spark driver."),
http://git-wip-us.apache.org/repos/asf/hive/blob/53b43cd4/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 1669b9c..e46e6ce 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -503,8 +503,7 @@ minillap.shared.query.files=bucket_map_join_tez1.q,\
vectorized_dynamic_partition_pruning.q,\
tez_multi_union.q,\
tez_join.q,\
- tez_union_multiinsert.q,\
- udtf_get_splits.q
+ tez_union_multiinsert.q
minillap.query.files=llap_udf.q
http://git-wip-us.apache.org/repos/asf/hive/blob/53b43cd4/jdbc/pom.xml
----------------------------------------------------------------------
diff --git a/jdbc/pom.xml b/jdbc/pom.xml
index c99a351..f87ab59 100644
--- a/jdbc/pom.xml
+++ b/jdbc/pom.xml
@@ -42,13 +42,14 @@
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
- <artifactId>hive-exec</artifactId>
- <version>${project.version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.hive</groupId>
<artifactId>hive-service</artifactId>
<version>${project.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.apache.hive</groupId>
+ <artifactId>hive-exec</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
@@ -70,11 +71,6 @@
<artifactId>hive-service-rpc</artifactId>
<version>${project.version}</version>
</dependency>
- <dependency>
- <groupId>org.apache.hive</groupId>
- <artifactId>hive-llap-common</artifactId>
- <version>${project.version}</version>
- </dependency>
<!-- inter-project -->
<dependency>
<groupId>org.apache.httpcomponents</groupId>
http://git-wip-us.apache.org/repos/asf/hive/blob/53b43cd4/llap-client/src/java/org/apache/hadoop/hive/llap/LlapBaseRecordReader.java
----------------------------------------------------------------------
diff --git a/llap-client/src/java/org/apache/hadoop/hive/llap/LlapBaseRecordReader.java b/llap-client/src/java/org/apache/hadoop/hive/llap/LlapBaseRecordReader.java
index 7073280..0cd9672 100644
--- a/llap-client/src/java/org/apache/hadoop/hive/llap/LlapBaseRecordReader.java
+++ b/llap-client/src/java/org/apache/hadoop/hive/llap/LlapBaseRecordReader.java
@@ -191,7 +191,7 @@ public class LlapBaseRecordReader<V extends WritableComparable> implements Recor
ReaderEvent event = readerEvents.take();
return event;
} catch (InterruptedException ie) {
- throw new RuntimeException("Interrupted while getting readerEvents, not expected", ie);
+ throw new RuntimeException("Interrupted while getting readerEvents, not expected: " + ie.getMessage(), ie);
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/53b43cd4/llap-client/src/java/org/apache/hadoop/hive/llap/ext/LlapTaskUmbilicalExternalClient.java
----------------------------------------------------------------------
diff --git a/llap-client/src/java/org/apache/hadoop/hive/llap/ext/LlapTaskUmbilicalExternalClient.java b/llap-client/src/java/org/apache/hadoop/hive/llap/ext/LlapTaskUmbilicalExternalClient.java
index 7d06637..8598bc8 100644
--- a/llap-client/src/java/org/apache/hadoop/hive/llap/ext/LlapTaskUmbilicalExternalClient.java
+++ b/llap-client/src/java/org/apache/hadoop/hive/llap/ext/LlapTaskUmbilicalExternalClient.java
@@ -102,8 +102,8 @@ public class LlapTaskUmbilicalExternalClient extends AbstractService {
@Override
public void serviceStart() throws IOException {
- int numHandlers = HiveConf.getIntVar(conf,
- HiveConf.ConfVars.LLAP_TMP_EXT_CLIENT_NUM_SERVER_HANDLERS);
+ // If we use a single server for multiple external clients, then consider using more than one handler.
+ int numHandlers = 1;
llapTaskUmbilicalServer = new LlapTaskUmbilicalServer(conf, umbilical, numHandlers, tokenIdentifier, sessionToken);
communicator.start();
}
http://git-wip-us.apache.org/repos/asf/hive/blob/53b43cd4/llap-ext-client/src/java/org/apache/hadoop/hive/llap/LlapDump.java
----------------------------------------------------------------------
diff --git a/llap-ext-client/src/java/org/apache/hadoop/hive/llap/LlapDump.java b/llap-ext-client/src/java/org/apache/hadoop/hive/llap/LlapDump.java
index 1c4397f..d485bfa 100644
--- a/llap-ext-client/src/java/org/apache/hadoop/hive/llap/LlapDump.java
+++ b/llap-ext-client/src/java/org/apache/hadoop/hive/llap/LlapDump.java
@@ -40,7 +40,6 @@ import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.ql.io.RCFile.Reader;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
@@ -74,7 +73,7 @@ public class LlapDump {
if (cli.hasOption('h')) {
HelpFormatter formatter = new HelpFormatter();
- formatter.printHelp("orcfiledump", opts);
+ formatter.printHelp("llapdump", opts);
return;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/53b43cd4/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
index d2dfbb7..ec6381b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
@@ -36,13 +36,11 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.llap.LlapOutputFormat;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.ql.CompilationOpContext;
-import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
@@ -203,7 +201,6 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements
}
}
}
-
try {
for (int i = 0; i < updaters.length; i++) {
if (updaters[i] != null) {
http://git-wip-us.apache.org/repos/asf/hive/blob/53b43cd4/ql/src/java/org/apache/hadoop/hive/ql/exec/SerializationUtilities.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/SerializationUtilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/SerializationUtilities.java
index eaa4293..b05a79e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/SerializationUtilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/SerializationUtilities.java
@@ -570,7 +570,7 @@ public class SerializationUtilities {
* @param plan Usually of type MapredWork, MapredLocalWork etc.
* @param out stream in which serialized plan is written into
*/
- public static void serializeObjectByKryo(Kryo kryo, Object plan, OutputStream out) {
+ private static void serializeObjectByKryo(Kryo kryo, Object plan, OutputStream out) {
Output output = new Output(out);
kryo.setClassLoader(Utilities.getSessionSpecifiedClassLoader());
kryo.writeObject(output, plan);
http://git-wip-us.apache.org/repos/asf/hive/blob/53b43cd4/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java
index b16368f..4e6272f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java
@@ -78,17 +78,16 @@ public class HiveSplitGenerator extends InputInitializer {
private static final Logger LOG = LoggerFactory.getLogger(HiveSplitGenerator.class);
- private DynamicPartitionPruner pruner = null;
- private Configuration conf = null;
- private JobConf jobConf = null;
- private MRInputUserPayloadProto userPayloadProto = null;
- private MapWork work = null;
+ private final DynamicPartitionPruner pruner;
+ private final Configuration conf;
+ private final JobConf jobConf;
+ private final MRInputUserPayloadProto userPayloadProto;
+ private final MapWork work;
private final SplitGrouper splitGrouper = new SplitGrouper();
- private SplitLocationProvider splitLocationProvider = null;
+ private final SplitLocationProvider splitLocationProvider;
-
- // TODO RSHACK This entire method needs to be reworked. Put back final fields, separate into reusable components etc.
- public void initializeSplitGenerator(Configuration conf, MapWork work) throws IOException {
+ public HiveSplitGenerator(Configuration conf, MapWork work) throws IOException {
+ super(null);
this.conf = conf;
this.work = work;
@@ -103,8 +102,6 @@ public class HiveSplitGenerator extends InputInitializer {
// Read all credentials into the credentials instance stored in JobConf.
ShimLoader.getHadoopShims().getMergedCredentials(jobConf);
- this.work = Utilities.getMapWork(jobConf);
-
// Events can start coming in the moment the InputInitializer is created. The pruner
// must be setup and initialized here so that it sets up it's structures to start accepting events.
// Setting it up in initialize leads to a window where events may come in before the pruner is
@@ -116,9 +113,7 @@ public class HiveSplitGenerator extends InputInitializer {
public HiveSplitGenerator(InputInitializerContext initializerContext) throws IOException,
SerDeException {
super(initializerContext);
- if (initializerContext == null) {
- return;
- }
+
Preconditions.checkNotNull(initializerContext);
userPayloadProto =
MRInputHelpers.parseMRInputPayload(initializerContext.getInputUserPayload());
http://git-wip-us.apache.org/repos/asf/hive/blob/53b43cd4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
index ca8dccf..b5ceb14 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
@@ -38,7 +38,6 @@ import org.apache.hadoop.hive.ql.exec.FetchTask;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.LimitOperator;
-import org.apache.hadoop.hive.ql.exec.UDTFOperator;
import org.apache.hadoop.hive.ql.exec.ListSinkOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorFactory;
@@ -141,31 +140,35 @@ public class SimpleFetchOptimizer extends Transform {
}
private boolean checkThreshold(FetchData data, int limit, ParseContext pctx) throws Exception {
- boolean result = false;
-
if (limit > 0) {
if (data.hasOnlyPruningFilter()) {
/* partitioned table + query has only pruning filters */
- result = true;
+ return true;
} else if (data.isPartitioned() == false && data.isFiltered() == false) {
/* unpartitioned table + no filters */
- result = true;
+ return true;
}
/* fall through */
- } else {
- long threshold = HiveConf.getLongVar(pctx.getConf(),
- HiveConf.ConfVars.HIVEFETCHTASKCONVERSIONTHRESHOLD);
- if (threshold < 0) {
- result = true;
- } else {
- long remaining = threshold;
- remaining -= data.getInputLength(pctx, remaining);
- if (remaining >= 0) {
- result = true;
- }
+ }
+ long threshold = HiveConf.getLongVar(pctx.getConf(),
+ HiveConf.ConfVars.HIVEFETCHTASKCONVERSIONTHRESHOLD);
+ if (threshold < 0) {
+ return true;
+ }
+ Operator child = data.scanOp.getChildOperators().get(0);
+ if(child instanceof SelectOperator) {
+ // select *, constant and casts can be allowed without a threshold check
+ if (checkExpressions((SelectOperator)child)) {
+ return true;
}
}
- return result;
+ long remaining = threshold;
+ remaining -= data.getInputLength(pctx, remaining);
+ if (remaining < 0) {
+ LOG.info("Threshold " + remaining + " exceeded for pseudoMR mode");
+ return false;
+ }
+ return true;
}
// all we can handle is LimitOperator, FilterOperator SelectOperator and final FS
@@ -184,20 +187,23 @@ public class SimpleFetchOptimizer extends Transform {
return null;
}
Table table = ts.getConf().getTableMetadata();
+ if (table == null) {
+ return null;
+ }
ReadEntity parent = PlanUtils.getParentViewInfo(alias, pctx.getViewAliasToInput());
- if (table != null && !table.isPartitioned()) {
+ if (!table.isPartitioned()) {
FetchData fetch = new FetchData(ts, parent, table, splitSample);
return checkOperators(fetch, aggressive, false);
}
boolean bypassFilter = false;
- if (table != null && HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVEOPTPPD)) {
+ if (HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVEOPTPPD)) {
ExprNodeDesc pruner = pctx.getOpToPartPruner().get(ts);
if (PartitionPruner.onlyContainsPartnCols(table, pruner)) {
bypassFilter = !pctx.getPrunedPartitions(alias, ts).hasUnknownPartitions();
}
}
- if (table != null && !aggressive && !bypassFilter) {
+ if (!aggressive && !bypassFilter) {
return null;
}
PrunedPartitionList partitions = pctx.getPrunedPartitions(alias, ts);
@@ -225,7 +231,7 @@ public class SimpleFetchOptimizer extends Transform {
continue;
}
- if (!(op instanceof LimitOperator || (op instanceof FilterOperator && bypassFilter)) || op instanceof UDTFOperator) {
+ if (!(op instanceof LimitOperator || (op instanceof FilterOperator && bypassFilter))) {
break;
}
@@ -283,7 +289,7 @@ public class SimpleFetchOptimizer extends Transform {
private boolean isConvertible(FetchData fetch, Operator<?> operator, Set<Operator<?>> traversed) {
if (operator instanceof ReduceSinkOperator || operator instanceof CommonJoinOperator
- || operator instanceof ScriptOperator) {
+ || operator instanceof ScriptOperator || operator instanceof UDTFOperator) {
return false;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/53b43cd4/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java
index 9a52c7d..2d36e5c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java
@@ -298,15 +298,13 @@ public class GenericUDTFGetSplits extends GenericUDTF {
// we have the dag now proceed to get the splits:
- HiveSplitGenerator splitGenerator = new HiveSplitGenerator(null);
Preconditions.checkState(HiveConf.getBoolVar(wxConf,
HiveConf.ConfVars.HIVE_TEZ_GENERATE_CONSISTENT_SPLITS));
Preconditions.checkState(HiveConf.getBoolVar(wxConf,
HiveConf.ConfVars.LLAP_CLIENT_CONSISTENT_SPLITS));
- splitGenerator.initializeSplitGenerator(wxConf, mapWork);
+ HiveSplitGenerator splitGenerator = new HiveSplitGenerator(wxConf, mapWork);
List<Event> eventList = splitGenerator.initialize();
- // hack - just serializing with kryo for now. This needs to be done properly
InputSplit[] result = new InputSplit[eventList.size() - 1];
DataOutputBuffer dob = new DataOutputBuffer();
@@ -458,7 +456,7 @@ public class GenericUDTFGetSplits extends GenericUDTF {
break;
case VARCHAR:
VarcharTypeInfo varcharTypeInfo = (VarcharTypeInfo) typeInfo;
- typeDesc = new TypeDesc(TypeDesc.Type.CHAR, varcharTypeInfo.getLength());
+ typeDesc = new TypeDesc(TypeDesc.Type.VARCHAR, varcharTypeInfo.getLength());
break;
case DATE:
typeDesc = new TypeDesc(TypeDesc.Type.DATE);