You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2017/02/24 01:08:33 UTC
[01/50] [abbrv] hive git commit: HIVE-15161: migrate ColumnStats to
use jackson (Zoltan Haindrich, via Pengcheng Xiong)
Repository: hive
Updated Branches:
refs/heads/hive-14535 74d93333c -> 2014ece97
HIVE-15161: migrate ColumnStats to use jackson (Zoltan Haindrich, via Pengcheng Xiong)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6e652a3b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6e652a3b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6e652a3b
Branch: refs/heads/hive-14535
Commit: 6e652a3b990bc53e61970ddc1aa2c0b503cd13be
Parents: e732aa2
Author: Pengcheng Xiong <px...@apache.org>
Authored: Thu Feb 16 10:13:56 2017 -0800
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Thu Feb 16 10:13:56 2017 -0800
----------------------------------------------------------------------
.../hadoop/hive/common/StatsSetupConst.java | 223 ++++++++++---------
.../hadoop/hive/common/TestStatsSetupConst.java | 54 +++++
2 files changed, 171 insertions(+), 106 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/6e652a3b/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java b/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java
index c78f005..926b4a6 100644
--- a/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java
+++ b/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java
@@ -17,19 +17,31 @@
*/
package org.apache.hadoop.hive.common;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.json.JSONException;
-import org.json.JSONObject;
-
-import java.util.LinkedHashMap;
+import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.DeserializationContext;
+import com.fasterxml.jackson.databind.JsonDeserializer;
+import com.fasterxml.jackson.databind.JsonSerializer;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.ObjectReader;
+import com.fasterxml.jackson.databind.ObjectWriter;
+import com.fasterxml.jackson.databind.SerializerProvider;
+import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
+import com.fasterxml.jackson.databind.annotation.JsonSerialize;
+
/**
* A class that defines the constant strings used by the statistics implementation.
@@ -144,35 +156,62 @@ public class StatsSetupConst {
public static final String[] TABLE_PARAMS_STATS_KEYS = new String[] {
COLUMN_STATS_ACCURATE, NUM_FILES, TOTAL_SIZE,ROW_COUNT, RAW_DATA_SIZE, NUM_PARTITIONS};
+ private static class ColumnStatsAccurate {
+ private static ObjectReader objectReader;
+ private static ObjectWriter objectWriter;
+
+ static {
+ ObjectMapper objectMapper = new ObjectMapper();
+ objectReader = objectMapper.readerFor(ColumnStatsAccurate.class);
+ objectWriter = objectMapper.writerFor(ColumnStatsAccurate.class);
+ }
+
+ static class BooleanSerializer extends JsonSerializer<Boolean> {
+
+ @Override
+ public void serialize(Boolean value, JsonGenerator jsonGenerator,
+ SerializerProvider serializerProvider) throws IOException, JsonProcessingException {
+ jsonGenerator.writeString(value.toString());
+ }
+ }
+
+ static class BooleanDeserializer extends JsonDeserializer<Boolean> {
+
+ public Boolean deserialize(JsonParser jsonParser,
+ DeserializationContext deserializationContext)
+ throws IOException, JsonProcessingException {
+ return Boolean.valueOf(jsonParser.getValueAsString());
+ }
+ }
+
+ @JsonInclude(JsonInclude.Include.NON_DEFAULT)
+ @JsonSerialize(using = BooleanSerializer.class)
+ @JsonDeserialize(using = BooleanDeserializer.class)
+ @JsonProperty(BASIC_STATS)
+ boolean basicStats;
+
+ @JsonInclude(JsonInclude.Include.NON_EMPTY)
+ @JsonProperty(COLUMN_STATS)
+ @JsonSerialize(contentUsing = BooleanSerializer.class)
+ @JsonDeserialize(contentUsing = BooleanDeserializer.class)
+ TreeMap<String, Boolean> columnStats = new TreeMap<>();
+
+ };
+
public static boolean areBasicStatsUptoDate(Map<String, String> params) {
- JSONObject stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
- if (stats != null && stats.has(BASIC_STATS)) {
- return true;
- } else {
+ if (params == null) {
return false;
}
+ ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
+ return stats.basicStats;
}
public static boolean areColumnStatsUptoDate(Map<String, String> params, String colName) {
- JSONObject stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
- try {
- if (!stats.has(COLUMN_STATS)) {
- return false;
- } else {
- JSONObject columns = stats.getJSONObject(COLUMN_STATS);
- if (columns != null && columns.has(colName)) {
- return true;
- } else {
- return false;
- }
- }
- } catch (JSONException e) {
- // For backward compatibility, if previous value can not be parsed to a
- // json object, it will come here.
- LOG.debug("In StatsSetupConst, JsonParser can not parse COLUMN_STATS.");
+ if (params == null) {
return false;
}
-
+ ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
+ return stats.columnStats.containsKey(colName);
}
// It will only throw JSONException when stats.put(BASIC_STATS, TRUE)
@@ -180,79 +219,67 @@ public class StatsSetupConst {
// note that set basic stats false will wipe out column stats too.
public static void setBasicStatsState(Map<String, String> params, String setting) {
if (setting.equals(FALSE)) {
- if (params != null && params.containsKey(COLUMN_STATS_ACCURATE)) {
+ if (params!=null && params.containsKey(COLUMN_STATS_ACCURATE)) {
params.remove(COLUMN_STATS_ACCURATE);
}
- } else {
- JSONObject stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
-
- try {
- stats.put(BASIC_STATS, TRUE);
- } catch (JSONException e) {
- // impossible to throw any json exceptions.
- LOG.trace(e.getMessage());
- }
- params.put(COLUMN_STATS_ACCURATE, stats.toString());
+ return;
+ }
+ if (params == null) {
+ throw new RuntimeException("params are null...cant set columnstatstate!");
+ }
+ ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
+ stats.basicStats = true;
+ try {
+ params.put(COLUMN_STATS_ACCURATE, ColumnStatsAccurate.objectWriter.writeValueAsString(stats));
+ } catch (JsonProcessingException e) {
+ throw new RuntimeException("can't serialize column stats", e);
}
}
public static void setColumnStatsState(Map<String, String> params, List<String> colNames) {
- try {
- JSONObject stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
+ if (params == null) {
+ throw new RuntimeException("params are null...cant set columnstatstate!");
+ }
+ ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
- JSONObject colStats;
- if (stats.has(COLUMN_STATS)) {
- colStats = stats.getJSONObject(COLUMN_STATS);
- } else {
- colStats = new JSONObject(new TreeMap<String,String>());
- }
- for (String colName : colNames) {
- if (!colStats.has(colName)) {
- colStats.put(colName, TRUE);
- }
+ for (String colName : colNames) {
+ if (!stats.columnStats.containsKey(colName)) {
+ stats.columnStats.put(colName, true);
}
- stats.put(COLUMN_STATS, colStats);
- params.put(COLUMN_STATS_ACCURATE, stats.toString());
- } catch (JSONException e) {
- // impossible to throw any json exceptions.
+ }
+ try {
+ params.put(COLUMN_STATS_ACCURATE, ColumnStatsAccurate.objectWriter.writeValueAsString(stats));
+ } catch (JsonProcessingException e) {
LOG.trace(e.getMessage());
}
}
public static void clearColumnStatsState(Map<String, String> params) {
- String statsAcc;
- if (params != null && (statsAcc = params.get(COLUMN_STATS_ACCURATE)) != null) {
- // statsAcc may not be jason format, which will throw exception
- JSONObject stats = parseStatsAcc(statsAcc);
-
- if (stats.has(COLUMN_STATS)) {
- stats.remove(COLUMN_STATS);
- }
- params.put(COLUMN_STATS_ACCURATE, stats.toString());
+ if (params == null) {
+ return;
+ }
+ ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
+ stats.columnStats.clear();
+
+ try {
+ params.put(COLUMN_STATS_ACCURATE, ColumnStatsAccurate.objectWriter.writeValueAsString(stats));
+ } catch (JsonProcessingException e) {
+ LOG.trace(e.getMessage());
}
}
public static void removeColumnStatsState(Map<String, String> params, List<String> colNames) {
- String statsAcc;
- if (params != null && (statsAcc = params.get(COLUMN_STATS_ACCURATE)) != null) {
- // statsAcc may not be jason format, which will throw exception
- JSONObject stats = parseStatsAcc(statsAcc);
- try {
- JSONObject colStats = stats.getJSONObject(COLUMN_STATS);
- for (String colName : colNames) {
- if (colStats.has(colName)) {
- colStats.remove(colName);
- }
- }
- if (colStats.length() != 0) {
- stats.put(COLUMN_STATS, colStats);
- } else {
- stats.remove(COLUMN_STATS);
- }
- params.put(COLUMN_STATS_ACCURATE, stats.toString());
- } catch (JSONException e) {
- LOG.debug(e.getMessage());
+ if (params == null) {
+ return;
+ }
+ try {
+ ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
+ for (String string : colNames) {
+ stats.columnStats.remove(string);
}
+ params.put(COLUMN_STATS_ACCURATE, ColumnStatsAccurate.objectWriter.writeValueAsString(stats));
+ } catch (JsonProcessingException e) {
+ LOG.trace(e.getMessage());
}
}
@@ -265,34 +292,18 @@ public class StatsSetupConst {
setBasicStatsState(params, setting);
}
- private static JSONObject parseStatsAcc(String statsAcc) {
+ private static ColumnStatsAccurate parseStatsAcc(String statsAcc) {
if (statsAcc == null) {
- return new JSONObject(new LinkedHashMap<String,Object>());
- } else {
- try {
- return new JSONObject(statsAcc);
- } catch (JSONException e) {
- return statsAccUpgrade(statsAcc);
- }
+ return new ColumnStatsAccurate();
}
- }
-
- private static JSONObject statsAccUpgrade(String statsAcc) {
- JSONObject stats;
- // old format of statsAcc, e.g., TRUE or FALSE
- LOG.debug("In StatsSetupConst, JsonParser can not parse statsAcc.");
- stats = new JSONObject(new LinkedHashMap<String,Object>());
try {
- if (statsAcc.equals(TRUE)) {
- stats.put(BASIC_STATS, TRUE);
- } else {
- stats.put(BASIC_STATS, FALSE);
+ return ColumnStatsAccurate.objectReader.readValue(statsAcc);
+ } catch (Exception e) {
+ ColumnStatsAccurate ret = new ColumnStatsAccurate();
+ if (TRUE.equalsIgnoreCase(statsAcc)) {
+ ret.basicStats = true;
}
- } catch (JSONException e1) {
- // impossible to throw any json exceptions.
- LOG.trace(e1.getMessage());
+ return ret;
}
- return stats;
}
-
}
http://git-wip-us.apache.org/repos/asf/hive/blob/6e652a3b/common/src/test/org/apache/hadoop/hive/common/TestStatsSetupConst.java
----------------------------------------------------------------------
diff --git a/common/src/test/org/apache/hadoop/hive/common/TestStatsSetupConst.java b/common/src/test/org/apache/hadoop/hive/common/TestStatsSetupConst.java
index 7a7ad42..792b862 100644
--- a/common/src/test/org/apache/hadoop/hive/common/TestStatsSetupConst.java
+++ b/common/src/test/org/apache/hadoop/hive/common/TestStatsSetupConst.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.common;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
import java.util.HashMap;
import java.util.Map;
@@ -53,4 +54,57 @@ public class TestStatsSetupConst {
assertEquals("{\"BASIC_STATS\":\"true\"}",params.get(StatsSetupConst.COLUMN_STATS_ACCURATE));
}
+ @Test
+ public void testSetBasicStatsState_falseIsAbsent() {
+ Map<String, String> params=new HashMap<>();
+ StatsSetupConst.setBasicStatsState(params, String.valueOf(true));
+ StatsSetupConst.setBasicStatsState(params, String.valueOf(false));
+ assertNull(params.get(StatsSetupConst.COLUMN_STATS_ACCURATE));
+ }
+
+ // earlier implementation have quoted boolean values...so the new implementation should preserve this
+ @Test
+ public void testStatColumnEntriesCompat() {
+ Map<String, String> params0=new HashMap<>();
+ StatsSetupConst.setBasicStatsState(params0, String.valueOf(true));
+ StatsSetupConst.setColumnStatsState(params0, Lists.newArrayList("Foo"));
+
+ assertEquals("{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"Foo\":\"true\"}}",params0.get(StatsSetupConst.COLUMN_STATS_ACCURATE));
+ }
+
+ @Test
+ public void testColumnEntries_orderIndependence() {
+ Map<String, String> params0=new HashMap<>();
+ StatsSetupConst.setBasicStatsState(params0, String.valueOf(true));
+ StatsSetupConst.setColumnStatsState(params0, Lists.newArrayList("Foo","Bar"));
+ Map<String, String> params1=new HashMap<>();
+ StatsSetupConst.setColumnStatsState(params1, Lists.newArrayList("Bar","Foo"));
+ StatsSetupConst.setBasicStatsState(params1, String.valueOf(true));
+
+ assertEquals(params0.get(StatsSetupConst.COLUMN_STATS_ACCURATE),params1.get(StatsSetupConst.COLUMN_STATS_ACCURATE));
+ }
+
+ @Test
+ public void testColumnEntries_orderIndependence2() {
+ Map<String, String> params0=new HashMap<>();
+ // in case jackson is able to deserialize...it may use a different implementation for the map - which may not preserve order
+ StatsSetupConst.setBasicStatsState(params0, String.valueOf(true));
+ StatsSetupConst.setColumnStatsState(params0, Lists.newArrayList("year"));
+ StatsSetupConst.setColumnStatsState(params0, Lists.newArrayList("year","month"));
+ Map<String, String> params1=new HashMap<>();
+ StatsSetupConst.setColumnStatsState(params1, Lists.newArrayList("month","year"));
+ StatsSetupConst.setBasicStatsState(params1, String.valueOf(true));
+
+ System.out.println(params0.get(StatsSetupConst.COLUMN_STATS_ACCURATE));
+ assertEquals(params0.get(StatsSetupConst.COLUMN_STATS_ACCURATE),params1.get(StatsSetupConst.COLUMN_STATS_ACCURATE));
+ }
+
+ // FIXME: current objective is to keep the previous outputs...but this is possibly bad..
+ @Test
+ public void testColumnEntries_areKept_whenBasicIsAbsent() {
+ Map<String, String> params=new HashMap<>();
+ StatsSetupConst.setBasicStatsState(params, String.valueOf(false));
+ StatsSetupConst.setColumnStatsState(params, Lists.newArrayList("Foo"));
+ assertEquals("{\"COLUMN_STATS\":{\"Foo\":\"true\"}}",params.get(StatsSetupConst.COLUMN_STATS_ACCURATE));
+ }
}
[35/50] [abbrv] hive git commit: HIVE-15928: Parallelization of
Select queries in Druid handler (Jesus Camacho Rodriguez,
reviewed by Ashutosh Chauhan)
Posted by se...@apache.org.
HIVE-15928: Parallelization of Select queries in Druid handler (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8ab1889d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8ab1889d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8ab1889d
Branch: refs/heads/hive-14535
Commit: 8ab1889dd9afe958e96cc62fc973771f61cadcba
Parents: 8973d2c
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Thu Feb 16 14:40:41 2017 +0000
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Wed Feb 22 10:17:28 2017 +0000
----------------------------------------------------------------------
.../org/apache/hadoop/hive/conf/HiveConf.java | 10 +-
.../druid/io/DruidQueryBasedInputFormat.java | 124 ++++++++++++++++---
.../hadoop/hive/druid/io/HiveDruidSplit.java | 30 ++---
.../druid/serde/DruidQueryRecordReader.java | 3 +-
.../TestHiveDruidQueryBasedInputFormat.java | 21 ++--
5 files changed, 132 insertions(+), 56 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/8ab1889d/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 7c88f4f..3777fa9 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1933,12 +1933,20 @@ public class HiveConf extends Configuration {
HIVE_DRUID_COORDINATOR_DEFAULT_ADDRESS("hive.druid.coordinator.address.default", "localhost:8081",
"Address of the Druid coordinator. It is used to check the load status of newly created segments"
),
+ HIVE_DRUID_SELECT_DISTRIBUTE("hive.druid.select.distribute", true,
+ "If it is set to true, we distribute the execution of Druid Select queries. Concretely, we retrieve\n" +
+ "the result for Select queries directly from the Druid nodes containing the segments data.\n" +
+ "In particular, first we contact the Druid broker node to obtain the nodes containing the segments\n" +
+ "for the given query, and then we contact those nodes to retrieve the results for the query.\n" +
+ "If it is set to false, we do not execute the Select queries in a distributed fashion. Instead, results\n" +
+ "for those queries are returned by the Druid broker node."),
HIVE_DRUID_SELECT_THRESHOLD("hive.druid.select.threshold", 10000,
+ "Takes only effect when hive.druid.select.distribute is set to false. \n" +
"When we can split a Select query, this is the maximum number of rows that we try to retrieve\n" +
"per query. In order to do that, we obtain the estimated size for the complete result. If the\n" +
"number of records of the query results is larger than this threshold, we split the query in\n" +
"total number of rows/threshold parts across the time dimension. Note that we assume the\n" +
- "records to be split uniformly across the time dimension"),
+ "records to be split uniformly across the time dimension."),
HIVE_DRUID_NUM_HTTP_CONNECTION("hive.druid.http.numConnection", 20, "Number of connections used by\n" +
"the HTTP client."),
HIVE_DRUID_HTTP_READ_TIMEOUT("hive.druid.http.read.timeout", "PT1M", "Read timeout period for the HTTP\n" +
http://git-wip-us.apache.org/repos/asf/hive/blob/8ab1889d/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java
----------------------------------------------------------------------
diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java
index 8b37840..0b35428 100644
--- a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java
+++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidQueryBasedInputFormat.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hive.druid.io;
import java.io.IOException;
import java.io.InputStream;
+import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
@@ -51,6 +52,7 @@ import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.jboss.netty.handler.codec.http.HttpMethod;
import org.joda.time.Interval;
import org.joda.time.Period;
import org.joda.time.chrono.ISOChronology;
@@ -60,23 +62,28 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.JsonMappingException;
+import com.google.common.collect.Lists;
import com.metamx.common.lifecycle.Lifecycle;
import com.metamx.http.client.HttpClient;
import com.metamx.http.client.HttpClientConfig;
import com.metamx.http.client.HttpClientInit;
+import com.metamx.http.client.Request;
import io.druid.query.BaseQuery;
import io.druid.query.Druids;
import io.druid.query.Druids.SegmentMetadataQueryBuilder;
import io.druid.query.Druids.SelectQueryBuilder;
import io.druid.query.Druids.TimeBoundaryQueryBuilder;
+import io.druid.query.LocatedSegmentDescriptor;
import io.druid.query.Query;
import io.druid.query.Result;
+import io.druid.query.SegmentDescriptor;
import io.druid.query.metadata.metadata.SegmentAnalysis;
import io.druid.query.metadata.metadata.SegmentMetadataQuery;
import io.druid.query.select.PagingSpec;
import io.druid.query.select.SelectQuery;
import io.druid.query.spec.MultipleIntervalSegmentSpec;
+import io.druid.query.spec.MultipleSpecificSegmentSpec;
import io.druid.query.timeboundary.TimeBoundaryQuery;
import io.druid.query.timeboundary.TimeBoundaryResultValue;
@@ -143,12 +150,17 @@ public class DruidQueryBasedInputFormat extends InputFormat<NullWritable, DruidW
case Query.TIMESERIES:
case Query.TOPN:
case Query.GROUP_BY:
- return new HiveDruidSplit[] { new HiveDruidSplit(address,
- deserializeSerialize(druidQuery), paths[0]) };
+ return new HiveDruidSplit[] { new HiveDruidSplit(deserializeSerialize(druidQuery),
+ paths[0], new String[] {address}) };
case Query.SELECT:
SelectQuery selectQuery = DruidStorageHandlerUtils.JSON_MAPPER.readValue(
druidQuery, SelectQuery.class);
- return splitSelectQuery(conf, address, selectQuery, paths[0]);
+ boolean distributed = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_DRUID_SELECT_DISTRIBUTE);
+ if (distributed) {
+ return distributeSelectQuery(conf, address, selectQuery, paths[0]);
+ } else {
+ return splitSelectQuery(conf, address, selectQuery, paths[0]);
+ }
default:
throw new IOException("Druid query type not recognized");
}
@@ -166,8 +178,83 @@ public class DruidQueryBasedInputFormat extends InputFormat<NullWritable, DruidW
return DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(builder.build());
}
+ /* New method that distributes the Select query by creating splits containing
+ * information about different Druid nodes that have the data for the given
+ * query. */
+ private static HiveDruidSplit[] distributeSelectQuery(Configuration conf, String address,
+ SelectQuery query, Path dummyPath) throws IOException {
+ // If it has a limit, we use it and we do not distribute the query
+ final boolean isFetch = query.getContextBoolean(Constants.DRUID_QUERY_FETCH, false);
+ if (isFetch) {
+ return new HiveDruidSplit[] { new HiveDruidSplit(
+ DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(query), dummyPath,
+ new String[]{address} ) };
+ }
+
+ // Properties from configuration
+ final int numConnection = HiveConf.getIntVar(conf,
+ HiveConf.ConfVars.HIVE_DRUID_NUM_HTTP_CONNECTION);
+ final Period readTimeout = new Period(
+ HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_DRUID_HTTP_READ_TIMEOUT));
+
+ // Create request to obtain nodes that are holding data for the given datasource and intervals
+ final Lifecycle lifecycle = new Lifecycle();
+ final HttpClient client = HttpClientInit.createClient(
+ HttpClientConfig.builder().withNumConnections(numConnection)
+ .withReadTimeout(readTimeout.toStandardDuration()).build(), lifecycle);
+ try {
+ lifecycle.start();
+ } catch (Exception e) {
+ LOG.error("Lifecycle start issue");
+ throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e));
+ }
+ final String intervals =
+ StringUtils.join(query.getIntervals(), ","); // Comma-separated intervals without brackets
+ final String request = String.format(
+ "http://%s/druid/v2/datasources/%s/candidates?intervals=%s",
+ address, query.getDataSource().getNames().get(0), intervals);
+ final InputStream response;
+ try {
+ response = DruidStorageHandlerUtils.submitRequest(client, new Request(HttpMethod.GET, new URL(request)));
+ } catch (Exception e) {
+ lifecycle.stop();
+ throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e));
+ }
+
+ // Retrieve results
+ final List<LocatedSegmentDescriptor> segmentDescriptors;
+ try {
+ segmentDescriptors = DruidStorageHandlerUtils.JSON_MAPPER.readValue(response,
+ new TypeReference<List<LocatedSegmentDescriptor>>() {});
+ } catch (Exception e) {
+ response.close();
+ throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e));
+ } finally {
+ lifecycle.stop();
+ }
+
+ // Create one input split for each segment
+ final int numSplits = segmentDescriptors.size();
+ final HiveDruidSplit[] splits = new HiveDruidSplit[segmentDescriptors.size()];
+ for (int i = 0; i < numSplits; i++) {
+ final LocatedSegmentDescriptor locatedSD = segmentDescriptors.get(i);
+ final String[] hosts = new String[locatedSD.getLocations().size()];
+ for (int j = 0; j < locatedSD.getLocations().size(); j++) {
+ hosts[j] = locatedSD.getLocations().get(j).getHost();
+ }
+ // Create partial Select query
+ final SegmentDescriptor newSD = new SegmentDescriptor(
+ locatedSD.getInterval(), locatedSD.getVersion(), locatedSD.getPartitionNumber());
+ final SelectQuery partialQuery = query.withQuerySegmentSpec(
+ new MultipleSpecificSegmentSpec(Lists.newArrayList(newSD)));
+ splits[i] = new HiveDruidSplit(DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(partialQuery),
+ dummyPath, hosts);
+ }
+ return splits;
+ }
+
/* Method that splits Select query depending on the threshold so read can be
- * parallelized */
+ * parallelized. We will only contact the Druid broker to obtain all results. */
private static HiveDruidSplit[] splitSelectQuery(Configuration conf, String address,
SelectQuery query, Path dummyPath
) throws IOException {
@@ -182,7 +269,8 @@ public class DruidQueryBasedInputFormat extends InputFormat<NullWritable, DruidW
if (isFetch) {
// If it has a limit, we use it and we do not split the query
return new HiveDruidSplit[] { new HiveDruidSplit(
- address, DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(query), dummyPath) };
+ DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(query), dummyPath,
+ new String[] {address} ) };
}
// We do not have the number of rows, thus we need to execute a
@@ -200,7 +288,8 @@ public class DruidQueryBasedInputFormat extends InputFormat<NullWritable, DruidW
try {
lifecycle.start();
} catch (Exception e) {
- LOG.error("Lifecycle start issue", e);
+ LOG.error("Lifecycle start issue");
+ throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e));
}
InputStream response;
try {
@@ -231,7 +320,8 @@ public class DruidQueryBasedInputFormat extends InputFormat<NullWritable, DruidW
if (metadataList.isEmpty()) {
// There are no rows for that time range, we can submit query as it is
return new HiveDruidSplit[] { new HiveDruidSplit(
- address, DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(query), dummyPath) };
+ DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(query), dummyPath,
+ new String[] {address} ) };
}
if (metadataList.size() != 1) {
throw new IOException("Information about segments should have been merged");
@@ -242,9 +332,9 @@ public class DruidQueryBasedInputFormat extends InputFormat<NullWritable, DruidW
query = query.withPagingSpec(PagingSpec.newSpec(Integer.MAX_VALUE));
if (numRows <= selectThreshold) {
// We are not going to split it
- return new HiveDruidSplit[] { new HiveDruidSplit(address,
- DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(query), dummyPath
- ) };
+ return new HiveDruidSplit[] { new HiveDruidSplit(
+ DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(query), dummyPath,
+ new String[] {address} ) };
}
// If the query does not specify a timestamp, we obtain the total time using
@@ -266,12 +356,8 @@ public class DruidQueryBasedInputFormat extends InputFormat<NullWritable, DruidW
try {
lifecycle.start();
} catch (Exception e) {
- LOG.error("Lifecycle start issue", e);
- }
- try {
- lifecycle.start();
- } catch (Exception e) {
- LOG.error("Lifecycle start issue", e);
+ LOG.error("Lifecycle start issue");
+ throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e));
}
try {
response = DruidStorageHandlerUtils.submitRequest(client,
@@ -318,9 +404,9 @@ public class DruidQueryBasedInputFormat extends InputFormat<NullWritable, DruidW
// Create partial Select query
final SelectQuery partialQuery = query.withQuerySegmentSpec(
new MultipleIntervalSegmentSpec(newIntervals.get(i)));
- splits[i] = new HiveDruidSplit(address,
- DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(partialQuery), dummyPath
- );
+ splits[i] = new HiveDruidSplit(
+ DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(partialQuery), dummyPath,
+ new String[] {address});
}
return splits;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/8ab1889d/druid-handler/src/java/org/apache/hadoop/hive/druid/io/HiveDruidSplit.java
----------------------------------------------------------------------
diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/HiveDruidSplit.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/HiveDruidSplit.java
index 861075d..58cb47a 100644
--- a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/HiveDruidSplit.java
+++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/HiveDruidSplit.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.druid.io;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
+import java.util.Arrays;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileSplit;
@@ -29,56 +30,41 @@ import org.apache.hadoop.mapred.FileSplit;
*/
public class HiveDruidSplit extends FileSplit implements org.apache.hadoop.mapred.InputSplit {
- private String address;
-
private String druidQuery;
+ private String[] hosts;
+
// required for deserialization
public HiveDruidSplit() {
super((Path) null, 0, 0, (String[]) null);
}
- public HiveDruidSplit(String address, String druidQuery, Path dummyPath) {
- super(dummyPath, 0, 0, (String[]) null);
- this.address = address;
+ public HiveDruidSplit(String druidQuery, Path dummyPath, String hosts[]) {
+ super(dummyPath, 0, 0, hosts);
this.druidQuery = druidQuery;
+ this.hosts = hosts;
}
@Override
public void write(DataOutput out) throws IOException {
super.write(out);
- out.writeUTF(address);
out.writeUTF(druidQuery);
}
@Override
public void readFields(DataInput in) throws IOException {
super.readFields(in);
- address = in.readUTF();
druidQuery = in.readUTF();
}
- @Override
- public long getLength() {
- return 0L;
- }
-
- @Override
- public String[] getLocations() {
- return new String[] { "" };
- }
-
- public String getAddress() {
- return address;
- }
-
public String getDruidQuery() {
return druidQuery;
}
@Override
public String toString() {
- return "HiveDruidSplit{" + address + ", " + druidQuery + "}";
+ return "HiveDruidSplit{" + druidQuery + ", "
+ + (hosts == null ? "empty hosts" : Arrays.toString(hosts)) + "}";
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/8ab1889d/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidQueryRecordReader.java
----------------------------------------------------------------------
diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidQueryRecordReader.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidQueryRecordReader.java
index 0d5f0b1..8d099c7 100644
--- a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidQueryRecordReader.java
+++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidQueryRecordReader.java
@@ -98,8 +98,7 @@ public abstract class DruidQueryRecordReader<T extends BaseQuery<R>, R extends C
InputStream response;
try {
response = DruidStorageHandlerUtils.submitRequest(client,
- DruidStorageHandlerUtils.createRequest(hiveDruidSplit.getAddress(), query)
- );
+ DruidStorageHandlerUtils.createRequest(hiveDruidSplit.getLocations()[0], query));
} catch (Exception e) {
lifecycle.stop();
throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e));
http://git-wip-us.apache.org/repos/asf/hive/blob/8ab1889d/druid-handler/src/test/org/apache/hadoop/hive/druid/TestHiveDruidQueryBasedInputFormat.java
----------------------------------------------------------------------
diff --git a/druid-handler/src/test/org/apache/hadoop/hive/druid/TestHiveDruidQueryBasedInputFormat.java b/druid-handler/src/test/org/apache/hadoop/hive/druid/TestHiveDruidQueryBasedInputFormat.java
index 9b7a1da..bb4011b 100644
--- a/druid-handler/src/test/org/apache/hadoop/hive/druid/TestHiveDruidQueryBasedInputFormat.java
+++ b/druid-handler/src/test/org/apache/hadoop/hive/druid/TestHiveDruidQueryBasedInputFormat.java
@@ -143,8 +143,7 @@ public class TestHiveDruidQueryBasedInputFormat extends TestCase {
+ " \"descending\": \"true\", "
+ " \"intervals\": [ \"2012-01-01T00:00:00.000/2012-01-03T00:00:00.000\" ]}";
private static final String TIMESERIES_QUERY_SPLIT =
- "[HiveDruidSplit{localhost:8082, "
- + "{\"queryType\":\"timeseries\","
+ "[HiveDruidSplit{{\"queryType\":\"timeseries\","
+ "\"dataSource\":{\"type\":\"table\",\"name\":\"sample_datasource\"},"
+ "\"intervals\":{\"type\":\"LegacySegmentSpec\",\"intervals\":[\"2012-01-01T00:00:00.000-08:00/2012-01-03T00:00:00.000-08:00\"]},"
+ "\"descending\":true,"
@@ -152,7 +151,7 @@ public class TestHiveDruidQueryBasedInputFormat extends TestCase {
+ "\"granularity\":{\"type\":\"duration\",\"duration\":86400000,\"origin\":\"1969-12-31T16:00:00.000-08:00\"},"
+ "\"aggregations\":[],"
+ "\"postAggregations\":[],"
- + "\"context\":null}}]";
+ + "\"context\":null}, [localhost:8082]}]";
private static final String TOPN_QUERY =
"{ \"queryType\": \"topN\", "
@@ -177,8 +176,7 @@ public class TestHiveDruidQueryBasedInputFormat extends TestCase {
+ " \"2013-08-31T00:00:00.000/2013-09-03T00:00:00.000\" "
+ " ]}";
private static final String TOPN_QUERY_SPLIT =
- "[HiveDruidSplit{localhost:8082, "
- + "{\"queryType\":\"topN\","
+ "[HiveDruidSplit{{\"queryType\":\"topN\","
+ "\"dataSource\":{\"type\":\"table\",\"name\":\"sample_data\"},"
+ "\"dimension\":{\"type\":\"LegacyDimensionSpec\",\"dimension\":\"sample_dim\",\"outputName\":\"sample_dim\"},"
+ "\"metric\":{\"type\":\"LegacyTopNMetricSpec\",\"metric\":\"count\"},"
@@ -190,7 +188,7 @@ public class TestHiveDruidQueryBasedInputFormat extends TestCase {
+ "{\"type\":\"doubleSum\",\"name\":\"some_metric\",\"fieldName\":\"some_metric\"}],"
+ "\"postAggregations\":[],"
+ "\"context\":null,"
- + "\"descending\":false}}]";
+ + "\"descending\":false}, [localhost:8082]}]";
private static final String GROUP_BY_QUERY =
"{ \"queryType\": \"groupBy\", "
@@ -208,8 +206,7 @@ public class TestHiveDruidQueryBasedInputFormat extends TestCase {
+ " \"intervals\": [ \"2012-01-01T00:00:00.000/2012-01-03T00:00:00.000\" ]"
+ " }";
private static final String GROUP_BY_QUERY_SPLIT =
- "[HiveDruidSplit{localhost:8082, "
- + "{\"queryType\":\"groupBy\","
+ "[HiveDruidSplit{{\"queryType\":\"groupBy\","
+ "\"dataSource\":{\"type\":\"table\",\"name\":\"sample_datasource\"},"
+ "\"intervals\":{\"type\":\"LegacySegmentSpec\",\"intervals\":[\"2012-01-01T00:00:00.000-08:00/2012-01-03T00:00:00.000-08:00\"]},"
+ "\"filter\":null,"
@@ -223,7 +220,7 @@ public class TestHiveDruidQueryBasedInputFormat extends TestCase {
+ "\"limitSpec\":{\"type\":\"default\",\"columns\":[{\"dimension\":\"country\",\"direction\":\"ascending\",\"dimensionOrder\":{\"type\":\"lexicographic\"}},"
+ "{\"dimension\":\"data_transfer\",\"direction\":\"ascending\",\"dimensionOrder\":{\"type\":\"lexicographic\"}}],\"limit\":5000},"
+ "\"context\":null,"
- + "\"descending\":false}}]";
+ + "\"descending\":false}, [localhost:8082]}]";
private static final String SELECT_QUERY =
"{ \"queryType\": \"select\", "
@@ -235,8 +232,7 @@ public class TestHiveDruidQueryBasedInputFormat extends TestCase {
+ " \"pagingSpec\":{\"pagingIdentifiers\": {}, \"threshold\":5}, "
+ " \"context\":{\"druid.query.fetch\":true}}";
private static final String SELECT_QUERY_SPLIT =
- "[HiveDruidSplit{localhost:8082, "
- + "{\"queryType\":\"select\","
+ "[HiveDruidSplit{{\"queryType\":\"select\","
+ "\"dataSource\":{\"type\":\"table\",\"name\":\"wikipedia\"},"
+ "\"intervals\":{\"type\":\"LegacySegmentSpec\",\"intervals\":[\"2013-01-01T00:00:00.000-08:00/2013-01-02T00:00:00.000-08:00\"]},"
+ "\"descending\":false,"
@@ -252,7 +248,7 @@ public class TestHiveDruidQueryBasedInputFormat extends TestCase {
+ "{\"type\":\"LegacyDimensionSpec\",\"dimension\":\"user\",\"outputName\":\"user\"}],"
+ "\"metrics\":[\"count\",\"added\",\"delta\",\"variation\",\"deleted\"],"
+ "\"pagingSpec\":{\"pagingIdentifiers\":{},\"threshold\":5,\"fromNext\":false},"
- + "\"context\":{\"druid.query.fetch\":true}}}]";
+ + "\"context\":{\"druid.query.fetch\":true}}, [localhost:8082]}]";
@Test
public void testTimeZone() throws Exception {
@@ -289,6 +285,7 @@ public class TestHiveDruidQueryBasedInputFormat extends TestCase {
conf.set(Constants.DRUID_DATA_SOURCE, dataSource);
conf.set(Constants.DRUID_QUERY_JSON, jsonQuery);
conf.set(Constants.DRUID_QUERY_TYPE, queryType);
+ conf.setBoolean(HiveConf.ConfVars.HIVE_DRUID_SELECT_DISTRIBUTE.varname, false);
return conf;
}
[10/50] [abbrv] hive git commit: HIVE-15957 : Follow Hive's rules for
type inference instead of Calcite (Ashutosh Chauhan via Jesus Camacho
Rodriguez)
Posted by se...@apache.org.
HIVE-15957 : Follow Hive's rules for type inference instead of Calcite (Ashutosh Chauhan via Jesus Camacho Rodriguez)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6f6a5586
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6f6a5586
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6f6a5586
Branch: refs/heads/hive-14535
Commit: 6f6a5586ef4a05903e81e865914cd3ab9b2e0555
Parents: 1677ed9
Author: Ashutosh Chauhan <ha...@apache.org>
Authored: Thu Feb 16 17:05:08 2017 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Fri Feb 17 10:51:59 2017 -0800
----------------------------------------------------------------------
.../translator/SqlFunctionConverter.java | 33 +++++++++++-------
.../clientpositive/interval_arithmetic.q | 3 ++
.../results/clientpositive/interval_alt.q.out | 6 ++--
.../clientpositive/interval_arithmetic.q.out | 35 ++++++++++++++++++++
.../llap/metadata_only_queries.q.out | 4 +--
.../clientpositive/metadata_only_queries.q.out | 4 +--
.../spark/metadata_only_queries.q.out | 4 +--
7 files changed, 68 insertions(+), 21 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/6f6a5586/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
index 5c85dce..85450c9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
@@ -329,6 +329,7 @@ public class SqlFunctionConverter {
StaticBlockBuilder() {
registerFunction("+", SqlStdOperatorTable.PLUS, hToken(HiveParser.PLUS, "+"));
+ registerFunction("-", SqlStdOperatorTable.MINUS, hToken(HiveParser.MINUS, "-"));
registerFunction("*", SqlStdOperatorTable.MULTIPLY, hToken(HiveParser.STAR, "*"));
registerFunction("/", SqlStdOperatorTable.DIVIDE, hToken(HiveParser.DIVIDE, "/"));
registerFunction("%", SqlStdOperatorTable.MOD, hToken(HiveParser.Identifier, "%"));
@@ -482,21 +483,29 @@ public class SqlFunctionConverter {
// this.So, bail out for now.
throw new CalciteSemanticException("<=> is not yet supported for cbo.", UnsupportedFeature.Less_than_equal_greater_than);
}
- SqlOperator calciteOp = hiveToCalcite.get(hiveUdfName);
- if (calciteOp == null) {
- CalciteUDFInfo uInf = getUDFInfo(hiveUdfName, calciteArgTypes, calciteRetType);
- if ("-".equals(hiveUdfName)) {
- // Calcite native - has broken inference for return type, so we override it with explicit return type
- // e.g. timestamp - timestamp is inferred as timestamp, where it really should be interval.
+ SqlOperator calciteOp;
+ CalciteUDFInfo uInf = getUDFInfo(hiveUdfName, calciteArgTypes, calciteRetType);
+ switch (hiveUdfName) {
+ // Follow hive's rules for type inference as oppose to Calcite's
+ // for return type.
+ //TODO: Perhaps we should do this for all functions, not just +,-
+ case "-":
calciteOp = new SqlMonotonicBinaryOperator("-", SqlKind.MINUS, 40, true,
uInf.returnTypeInference, uInf.operandTypeInference, OperandTypes.MINUS_OPERATOR);
- } else {
- calciteOp = new CalciteSqlFn(uInf.udfName, SqlKind.OTHER_FUNCTION, uInf.returnTypeInference,
- uInf.operandTypeInference, uInf.operandTypeChecker,
- SqlFunctionCategory.USER_DEFINED_FUNCTION, deterministic);
- }
+ break;
+ case "+":
+ calciteOp = new SqlMonotonicBinaryOperator("+", SqlKind.PLUS, 40, true,
+ uInf.returnTypeInference, uInf.operandTypeInference, OperandTypes.PLUS_OPERATOR);
+ break;
+ default:
+ calciteOp = hiveToCalcite.get(hiveUdfName);
+ if (null == calciteOp) {
+ calciteOp = new CalciteSqlFn(uInf.udfName, SqlKind.OTHER_FUNCTION, uInf.returnTypeInference,
+ uInf.operandTypeInference, uInf.operandTypeChecker,
+ SqlFunctionCategory.USER_DEFINED_FUNCTION, deterministic);
+ }
+ break;
}
-
return calciteOp;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/6f6a5586/ql/src/test/queries/clientpositive/interval_arithmetic.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/interval_arithmetic.q b/ql/src/test/queries/clientpositive/interval_arithmetic.q
index 06acbd7..445cdfe 100644
--- a/ql/src/test/queries/clientpositive/interval_arithmetic.q
+++ b/ql/src/test/queries/clientpositive/interval_arithmetic.q
@@ -159,4 +159,7 @@ select
from interval_arithmetic_1
limit 2;
+explain
+select current_date + interval '1 2:02:00' day to second + interval '2' day + interval '1' hour + interval '1' minute + interval '60' second from interval_arithmetic_1 limit 1;
+select current_date + interval '1 2:02:00' day to second + interval '2' day + interval '1' hour + interval '1' minute + interval '60' second from interval_arithmetic_1 limit 1;
drop table interval_arithmetic_1;
http://git-wip-us.apache.org/repos/asf/hive/blob/6f6a5586/ql/src/test/results/clientpositive/interval_alt.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/interval_alt.q.out b/ql/src/test/results/clientpositive/interval_alt.q.out
index 9884ec3..eba8420 100644
--- a/ql/src/test/results/clientpositive/interval_alt.q.out
+++ b/ql/src/test/results/clientpositive/interval_alt.q.out
@@ -137,7 +137,7 @@ STAGE PLANS:
alias: t
Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: (2012-01-01 + IntervalDayLiteralProcessor(((- dt) * dt))) (type: timestamp), (2012-01-01 - IntervalDayLiteralProcessor(((- dt) * dt))) (type: timestamp), 2012-01-04 (type: date), (2012-01-01 + IntervalYearMonthLiteralProcessor(concat(dt, '-1'))) (type: date)
+ expressions: (2012-01-01 + IntervalDayLiteralProcessor(((- dt) * dt))) (type: timestamp), (2012-01-01 - IntervalDayLiteralProcessor(((- dt) * dt))) (type: timestamp), 2012-01-04 00:00:00.0 (type: timestamp), (2012-01-01 + IntervalYearMonthLiteralProcessor(concat(dt, '-1'))) (type: date)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -172,5 +172,5 @@ POSTHOOK: query: select
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t
#### A masked pattern was here ####
-2011-12-31 00:00:00 2012-01-02 00:00:00 2012-01-04 2013-02-01
-2011-12-28 00:00:00 2012-01-05 00:00:00 2012-01-04 2014-02-01
+2011-12-31 00:00:00 2012-01-02 00:00:00 2012-01-04 00:00:00 2013-02-01
+2011-12-28 00:00:00 2012-01-05 00:00:00 2012-01-04 00:00:00 2014-02-01
http://git-wip-us.apache.org/repos/asf/hive/blob/6f6a5586/ql/src/test/results/clientpositive/interval_arithmetic.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/interval_arithmetic.q.out b/ql/src/test/results/clientpositive/interval_arithmetic.q.out
index 64882f8..c1fc738 100644
--- a/ql/src/test/results/clientpositive/interval_arithmetic.q.out
+++ b/ql/src/test/results/clientpositive/interval_arithmetic.q.out
@@ -606,6 +606,41 @@ POSTHOOK: Input: default@interval_arithmetic_1
#### A masked pattern was here ####
109 20:30:40.246913578 89 02:14:26.000000000
109 20:30:40.246913578 89 02:14:26.000000000
+PREHOOK: query: explain
+select current_date + interval '1 2:02:00' day to second + interval '2' day + interval '1' hour + interval '1' minute + interval '60' second from interval_arithmetic_1 limit 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select current_date + interval '1 2:02:00' day to second + interval '2' day + interval '1' hour + interval '1' minute + interval '60' second from interval_arithmetic_1 limit 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ TableScan
+ alias: interval_arithmetic_1
+ Statistics: Num rows: 12288 Data size: 326837 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 2017-02-19 03:04:00.0 (type: timestamp)
+ outputColumnNames: _col0
+ Statistics: Num rows: 12288 Data size: 491520 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ ListSink
+
+PREHOOK: query: select current_date + interval '1 2:02:00' day to second + interval '2' day + interval '1' hour + interval '1' minute + interval '60' second from interval_arithmetic_1 limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@interval_arithmetic_1
+#### A masked pattern was here ####
+POSTHOOK: query: select current_date + interval '1 2:02:00' day to second + interval '2' day + interval '1' hour + interval '1' minute + interval '60' second from interval_arithmetic_1 limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@interval_arithmetic_1
+#### A masked pattern was here ####
+2017-02-19 03:04:00
PREHOOK: query: drop table interval_arithmetic_1
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@interval_arithmetic_1
http://git-wip-us.apache.org/repos/asf/hive/blob/6f6a5586/ql/src/test/results/clientpositive/llap/metadata_only_queries.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/llap/metadata_only_queries.q.out
index 25be543..c8190bd 100644
--- a/ql/src/test/results/clientpositive/llap/metadata_only_queries.q.out
+++ b/ql/src/test/results/clientpositive/llap/metadata_only_queries.q.out
@@ -343,7 +343,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(11,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+ expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(2,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -407,7 +407,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(11,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+ expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(2,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE
File Output Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/6f6a5586/ql/src/test/results/clientpositive/metadata_only_queries.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/metadata_only_queries.q.out
index 2e3331e..57b59dd 100644
--- a/ql/src/test/results/clientpositive/metadata_only_queries.q.out
+++ b/ql/src/test/results/clientpositive/metadata_only_queries.q.out
@@ -313,7 +313,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(11,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+ expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(2,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -367,7 +367,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(11,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+ expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(2,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE
File Output Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/6f6a5586/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
index dc96a0d..543d0ef 100644
--- a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
+++ b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out
@@ -331,7 +331,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(11,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+ expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(2,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -391,7 +391,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(11,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
+ expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(2,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE
File Output Operator
[41/50] [abbrv] hive git commit: HIVE-15955: make explain formatted
to include opId and etc (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Posted by se...@apache.org.
HIVE-15955: make explain formatted to include opId and etc (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/759766ee
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/759766ee
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/759766ee
Branch: refs/heads/hive-14535
Commit: 759766eeb5d8047d4acdbd2faca755985bb5a39f
Parents: 89310fe
Author: Pengcheng Xiong <px...@apache.org>
Authored: Wed Feb 22 20:10:19 2017 -0800
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Wed Feb 22 20:10:19 2017 -0800
----------------------------------------------------------------------
.../apache/hadoop/hive/ql/exec/ExplainTask.java | 10 ++-
.../AnnotateReduceSinkOutputOperator.java | 73 ++++++++++++++++++++
.../hadoop/hive/ql/optimizer/Optimizer.java | 4 ++
.../hadoop/hive/ql/plan/ReduceSinkDesc.java | 14 ++++
.../clientpositive/explain_formatted_oid.q | 18 +++++
.../clientpositive/explain_formatted_oid.q.out | 38 ++++++++++
ql/src/test/results/clientpositive/input4.q.out | 2 +-
ql/src/test/results/clientpositive/join0.q.out | 2 +-
.../results/clientpositive/parallel_join0.q.out | 2 +-
.../test/results/clientpositive/plan_json.q.out | 2 +-
.../clientpositive/vector_outer_join3.q.out | 6 +-
.../clientpositive/vector_outer_join4.q.out | 6 +-
.../clientpositive/vector_outer_join6.q.out | 4 +-
13 files changed, 168 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/759766ee/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
index 74cec3e..086ccb2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
@@ -115,6 +115,7 @@ import org.slf4j.LoggerFactory;
public class ExplainTask extends Task<ExplainWork> implements Serializable {
private static final long serialVersionUID = 1L;
public static final String EXPL_COLUMN_NAME = "Explain";
+ public static final String OUTPUT_OPERATORS = "OutputOperators:";
private final Set<Operator<?>> visitedOps = new HashSet<Operator<?>>();
private boolean isLogical = false;
protected final Logger LOG;
@@ -790,10 +791,17 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable {
String appender = isLogical ? " (" + operator.getOperatorId() + ")" : "";
JSONObject jsonOut = outputPlan(operator.getConf(), out, extended,
jsonOutput, jsonOutput ? 0 : indent, appender);
- if (this.work != null && this.work.isUserLevelExplain()) {
+ if (this.work != null && (this.work.isUserLevelExplain() || this.work.isFormatted())) {
if (jsonOut != null && jsonOut.length() > 0) {
((JSONObject) jsonOut.get(JSONObject.getNames(jsonOut)[0])).put("OperatorId:",
operator.getOperatorId());
+ if (!this.work.isUserLevelExplain() && this.work.isFormatted()
+ && operator instanceof ReduceSinkOperator) {
+ ((JSONObject) jsonOut.get(JSONObject.getNames(jsonOut)[0])).put(
+ OUTPUT_OPERATORS,
+ Arrays.toString(((ReduceSinkOperator) operator).getConf().getOutputOperators()
+ .toArray()));
+ }
}
}
if (jsonOutput) {
http://git-wip-us.apache.org/repos/asf/hive/blob/759766ee/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AnnotateReduceSinkOutputOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AnnotateReduceSinkOutputOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AnnotateReduceSinkOutputOperator.java
new file mode 100644
index 0000000..0b61f4b
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AnnotateReduceSinkOutputOperator.java
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer;
+
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Stack;
+
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
+import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
+import org.apache.hadoop.hive.ql.lib.GraphWalker;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.lib.Rule;
+import org.apache.hadoop.hive.ql.lib.RuleRegExp;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+
+/**
+ * Implementation of AnnotateReduceSinkOutputOperator optimization step.
+ */
+public class AnnotateReduceSinkOutputOperator extends Transform {
+ @Override
+ public ParseContext transform(ParseContext pctx) throws SemanticException {
+
+ // 1. We apply the transformation
+ Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
+ opRules.put(new RuleRegExp("R1",
+ "(" + ReduceSinkOperator.getOperatorName() + "%)"), new ReduceSinkOutputOperatorAnnotator());
+ GraphWalker ogw = new DefaultGraphWalker(new DefaultRuleDispatcher(null, opRules, null));
+ ArrayList<Node> topNodes = new ArrayList<Node>();
+ topNodes.addAll(pctx.getTopOps().values());
+ ogw.startWalking(topNodes, null);
+ return pctx;
+ }
+
+ private static class ReduceSinkOutputOperatorAnnotator implements NodeProcessor {
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+ ReduceSinkOperator rs = (ReduceSinkOperator) nd;
+ List<Operator<? extends OperatorDesc>> children = rs.getChildOperators();
+ List<String> outputOperators = new ArrayList<>();
+ for (Operator<? extends OperatorDesc> operator : children) {
+ outputOperators.add(operator.getOperatorId());
+ }
+ rs.getConf().setOutputOperators(outputOperators);
+ return null;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/759766ee/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
index c6287e4..a3a19f4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
@@ -232,6 +232,10 @@ public class Optimizer {
transformations.add(new SimpleFetchAggregation());
}
+ if (pctx.getContext().getExplainConfig() != null
+ && pctx.getContext().getExplainConfig().isFormatted()) {
+ transformations.add(new AnnotateReduceSinkOutputOperator());
+ }
}
/**
http://git-wip-us.apache.org/repos/asf/hive/blob/759766ee/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java
index b8c2d42..d77a223 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java
@@ -80,6 +80,12 @@ public class ReduceSinkDesc extends AbstractOperatorDesc {
private String outputName;
/**
+ * Holds the name of the output operators
+ * that this reduce sink is outputing to.
+ */
+ private List<String> outputOperators;
+
+ /**
* The partition columns (CLUSTER BY or DISTRIBUTE BY in Hive language).
* Partition columns decide the reducer that the current row goes to.
* Partition columns are not passed to reducer.
@@ -587,4 +593,12 @@ public class ReduceSinkDesc extends AbstractOperatorDesc {
}
return new ReduceSinkOperatorExplainVectorization(this, vectorDesc);
}
+
+ public List<String> getOutputOperators() {
+ return outputOperators;
+ }
+
+ public void setOutputOperators(List<String> outputOperators) {
+ this.outputOperators = outputOperators;
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/759766ee/ql/src/test/queries/clientpositive/explain_formatted_oid.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/explain_formatted_oid.q b/ql/src/test/queries/clientpositive/explain_formatted_oid.q
new file mode 100644
index 0000000..932f119
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/explain_formatted_oid.q
@@ -0,0 +1,18 @@
+set hive.auto.convert.join=false;
+
+create table srcTable (key string, value string);
+
+explain formatted
+SELECT x.key, z.value, y.value
+FROM srcTable x JOIN srcTable y ON (x.key = y.key)
+JOIN srcTable z ON (x.value = z.value);
+
+explain formatted
+SELECT x.key, z.value, y.value
+FROM srcTable x JOIN srcTable y ON (x.key = y.key)
+JOIN (select * from srcTable union select * from srcTable)z ON (x.value = z.value)
+union
+SELECT x.key, z.value, y.value
+FROM srcTable x JOIN srcTable y ON (x.key = y.key)
+JOIN (select * from srcTable union select * from srcTable)z ON (x.value = z.value);
+
http://git-wip-us.apache.org/repos/asf/hive/blob/759766ee/ql/src/test/results/clientpositive/explain_formatted_oid.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/explain_formatted_oid.q.out b/ql/src/test/results/clientpositive/explain_formatted_oid.q.out
new file mode 100644
index 0000000..46c2090
--- /dev/null
+++ b/ql/src/test/results/clientpositive/explain_formatted_oid.q.out
@@ -0,0 +1,38 @@
+PREHOOK: query: create table srcTable (key string, value string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@srcTable
+POSTHOOK: query: create table srcTable (key string, value string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@srcTable
+PREHOOK: query: explain formatted
+SELECT x.key, z.value, y.value
+FROM srcTable x JOIN srcTable y ON (x.key = y.key)
+JOIN srcTable z ON (x.value = z.value)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain formatted
+SELECT x.key, z.value, y.value
+FROM srcTable x JOIN srcTable y ON (x.key = y.key)
+JOIN srcTable z ON (x.value = z.value)
+POSTHOOK: type: QUERY
+{"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-2":{"DEPENDENT STAGES":"Stage-1"},"Stage-0":{"DEPENDENT STAGES":"Stage-2"}},"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"x","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"TS_0","children":{"Filter Operator":{"predicate:":"(key is not null and value is not null) (type: boolean)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"FIL_17","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"SEL_2","children":{"Reduce Output Operator":{"key expressions:":"_col0 (type: string)","sort order:":"+","Map-reduce partition columns:":"_col0 (type: string)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NON
E","value expressions:":"_col1 (type: string)","OperatorId:":"RS_9","OutputOperators:":"[JOIN_11]"}}}}}}}},{"TableScan":{"alias:":"y","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"TS_3","children":{"Filter Operator":{"predicate:":"key is not null (type: boolean)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"FIL_18","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"SEL_5","children":{"Reduce Output Operator":{"key expressions:":"_col0 (type: string)","sort order:":"+","Map-reduce partition columns:":"_col0 (type: string)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","value expressions:":"_col1 (type: string)","OperatorId:":"RS_10","OutputOperators:":"[JOIN_11]"}}}}}}}}],"Reduce
Operator Tree:":{"Join Operator":{"condition map:":[{"":"Inner Join 0 to 1"}],"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"outputColumnNames:":["_col0","_col1","_col3"],"Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"JOIN_11","children":{"File Output Operator":{"compressed:":"false","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe"},"OperatorId:":"FS_20"}}}}}},"Stage-2":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"OperatorId:":"TS_21","children":{"Reduce Output Operator":{"key expressions:":"_col1 (type: string)","sort order:":"+","Map-reduce partition columns:":"_col1 (type: string)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","value expressions:":"_col0 (type: string), _col3 (type: string)","OperatorId
:":"RS_12","OutputOperators:":"[JOIN_14]"}}}},{"TableScan":{"alias:":"z","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"TS_6","children":{"Filter Operator":{"predicate:":"value is not null (type: boolean)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"FIL_19","children":{"Select Operator":{"expressions:":"value (type: string)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"SEL_8","children":{"Reduce Output Operator":{"key expressions:":"_col0 (type: string)","sort order:":"+","Map-reduce partition columns:":"_col0 (type: string)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"RS_13","OutputOperators:":"[JOIN_14]"}}}}}}}}],"Reduce Operator Tree:":{"Join Operator":{"condition map:":[{"":"Inner Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type:
string)"},"outputColumnNames:":["_col0","_col3","_col4"],"Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"JOIN_14","children":{"Select Operator":{"expressions:":"_col0 (type: string), _col4 (type: string), _col3 (type: string)","outputColumnNames:":["_col0","_col1","_col2"],"Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"SEL_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_16"}}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_22"}}}}}}
+PREHOOK: query: explain formatted
+SELECT x.key, z.value, y.value
+FROM srcTable x JOIN srcTable y ON (x.key = y.key)
+JOIN (select * from srcTable union select * from srcTable)z ON (x.value = z.value)
+union
+SELECT x.key, z.value, y.value
+FROM srcTable x JOIN srcTable y ON (x.key = y.key)
+JOIN (select * from srcTable union select * from srcTable)z ON (x.value = z.value)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain formatted
+SELECT x.key, z.value, y.value
+FROM srcTable x JOIN srcTable y ON (x.key = y.key)
+JOIN (select * from srcTable union select * from srcTable)z ON (x.value = z.value)
+union
+SELECT x.key, z.value, y.value
+FROM srcTable x JOIN srcTable y ON (x.key = y.key)
+JOIN (select * from srcTable union select * from srcTable)z ON (x.value = z.value)
+POSTHOOK: type: QUERY
+{"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-2":{"DEPENDENT STAGES":"Stage-1, Stage-5"},"Stage-3":{"DEPENDENT STAGES":"Stage-2, Stage-8"},"Stage-5":{"ROOT STAGE":"TRUE"},"Stage-7":{"ROOT STAGE":"TRUE"},"Stage-8":{"DEPENDENT STAGES":"Stage-7, Stage-10"},"Stage-10":{"ROOT STAGE":"TRUE"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"x","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"TS_0","children":{"Filter Operator":{"predicate:":"(key is not null and value is not null) (type: boolean)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"FIL_60","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"SEL_2","children":{"Reduce Outp
ut Operator":{"key expressions:":"_col0 (type: string)","sort order:":"+","Map-reduce partition columns:":"_col0 (type: string)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","value expressions:":"_col1 (type: string)","OperatorId:":"RS_19","OutputOperators:":"[JOIN_21]"}}}}}}}},{"TableScan":{"alias:":"y","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"TS_3","children":{"Filter Operator":{"predicate:":"key is not null (type: boolean)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"FIL_61","children":{"Select Operator":{"expressions:":"key (type: string)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"SEL_5","children":{"Reduce Output Operator":{"key expressions:":"_col0 (type: string)","sort order:":"+","Map-reduce partition columns:":"_col0 (type: string)","Statistics:":"Nu
m rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"RS_20","OutputOperators:":"[JOIN_21]"}}}}}}}}],"Reduce Operator Tree:":{"Join Operator":{"condition map:":[{"":"Inner Join 0 to 1"}],"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"JOIN_21","children":{"File Output Operator":{"compressed:":"false","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe"},"OperatorId:":"FS_68"}}}}}},"Stage-2":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"OperatorId:":"TS_69","children":{"Reduce Output Operator":{"key expressions:":"_col1 (type: string)","sort order:":"+","Map-reduce partition columns:":"_col1 (type: string)","Statistics:":"Num rows: 1 Data
size: 0 Basic stats: PARTIAL Column stats: NONE","value expressions:":"_col0 (type: string)","OperatorId:":"RS_22","OutputOperators:":"[JOIN_24]"}}}},{"TableScan":{"OperatorId:":"TS_73","children":{"Reduce Output Operator":{"key expressions:":"_col1 (type: string)","sort order:":"+","Map-reduce partition columns:":"_col1 (type: string)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"RS_23","OutputOperators:":"[JOIN_24]"}}}}],"Reduce Operator Tree:":{"Join Operator":{"condition map:":[{"":"Inner Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col1 (type: string)"},"outputColumnNames:":["_col0","_col4"],"Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"JOIN_24","children":{"Select Operator":{"expressions:":"_col0 (type: string), _col4 (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":
"SEL_25","children":{"File Output Operator":{"compressed:":"false","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe"},"OperatorId:":"FS_70"}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"OperatorId:":"TS_71","children":{"Union":{"Statistics:":"Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"UNION_52","children":{"Group By Operator":{"keys:":"_col0 (type: string), _col1 (type: string)","mode:":"hash","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"GBY_55","children":{"Reduce Output Operator":{"key expressions:":"_col0 (type: string), _col1 (type: string)","sort order:":"++","Map-reduce partition columns:":"_col0 (type: string), _col1 (type: string)","Statistics:":"Num rows:
2 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"RS_56","OutputOperators:":"[GBY_57]"}}}}}}}},{"TableScan":{"OperatorId:":"TS_77","children":{"Union":{"Statistics:":"Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"UNION_52","children":{"Group By Operator":{"keys:":"_col0 (type: string), _col1 (type: string)","mode:":"hash","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"GBY_55","children":{"Reduce Output Operator":{"key expressions:":"_col0 (type: string), _col1 (type: string)","sort order:":"++","Map-reduce partition columns:":"_col0 (type: string), _col1 (type: string)","Statistics:":"Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"RS_56","OutputOperators:":"[GBY_57]"}}}}}}}}],"Reduce Operator Tree:":{"Group By Operator":{"keys:":"KEY._col0 (type: string), KEY._col1 (type: string)","mode:":"mergepartial","out
putColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"GBY_57","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_59"}}}}}},"Stage-5":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"srctable","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"TS_6","children":{"Filter Operator":{"predicate:":"value is not null (type: boolean)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"FIL_62","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames
:":["_col0","_col1"],"Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"SEL_8","children":{"Union":{"Statistics:":"Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"UNION_12","children":{"Group By Operator":{"keys:":"_col1 (type: string), _col0 (type: string)","mode:":"hash","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"GBY_15","children":{"Reduce Output Operator":{"key expressions:":"_col0 (type: string), _col1 (type: string)","sort order:":"++","Map-reduce partition columns:":"_col0 (type: string), _col1 (type: string)","Statistics:":"Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"RS_16","OutputOperators:":"[GBY_17]"}}}}}}}}}}}},{"TableScan":{"alias:":"srctable","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"TS_9","children":{"Filter Ope
rator":{"predicate:":"value is not null (type: boolean)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"FIL_63","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"SEL_11","children":{"Union":{"Statistics:":"Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"UNION_12","children":{"Group By Operator":{"keys:":"_col1 (type: string), _col0 (type: string)","mode:":"hash","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"GBY_15","children":{"Reduce Output Operator":{"key expressions:":"_col0 (type: string), _col1 (type: string)","sort order:":"++","Map-reduce partition columns:":"_col0 (type: string), _col1 (type: string)","Statistics:":"Num rows: 2 Data siz
e: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"RS_16","OutputOperators:":"[GBY_17]"}}}}}}}}}}}}],"Reduce Operator Tree:":{"Group By Operator":{"keys:":"KEY._col0 (type: string), KEY._col1 (type: string)","mode:":"mergepartial","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"GBY_17","children":{"Select Operator":{"expressions:":"_col0 (type: string)","outputColumnNames:":["_col1"],"Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"SEL_18","children":{"File Output Operator":{"compressed:":"false","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe"},"OperatorId:":"FS_72"}}}}}}}},"Stage-7":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"x","Statistics:":"Num rows:
1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"TS_26","children":{"Filter Operator":{"predicate:":"(key is not null and value is not null) (type: boolean)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"FIL_64","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"SEL_28","children":{"Reduce Output Operator":{"key expressions:":"_col0 (type: string)","sort order:":"+","Map-reduce partition columns:":"_col0 (type: string)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","value expressions:":"_col1 (type: string)","OperatorId:":"RS_45","OutputOperators:":"[JOIN_47]"}}}}}}}},{"TableScan":{"alias:":"y","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"TS_29","children":
{"Filter Operator":{"predicate:":"key is not null (type: boolean)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"FIL_65","children":{"Select Operator":{"expressions:":"key (type: string)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"SEL_31","children":{"Reduce Output Operator":{"key expressions:":"_col0 (type: string)","sort order:":"+","Map-reduce partition columns:":"_col0 (type: string)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"RS_46","OutputOperators:":"[JOIN_47]"}}}}}}}}],"Reduce Operator Tree:":{"Join Operator":{"condition map:":[{"":"Inner Join 0 to 1"}],"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"JOIN_47","children":{"File Output Operator"
:{"compressed:":"false","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe"},"OperatorId:":"FS_74"}}}}}},"Stage-8":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"OperatorId:":"TS_75","children":{"Reduce Output Operator":{"key expressions:":"_col1 (type: string)","sort order:":"+","Map-reduce partition columns:":"_col1 (type: string)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","value expressions:":"_col0 (type: string)","OperatorId:":"RS_48","OutputOperators:":"[JOIN_50]"}}}},{"TableScan":{"OperatorId:":"TS_79","children":{"Reduce Output Operator":{"key expressions:":"_col1 (type: string)","sort order:":"+","Map-reduce partition columns:":"_col1 (type: string)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"RS_49","OutputOpera
tors:":"[JOIN_50]"}}}}],"Reduce Operator Tree:":{"Join Operator":{"condition map:":[{"":"Inner Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col1 (type: string)"},"outputColumnNames:":["_col0","_col4"],"Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"JOIN_50","children":{"Select Operator":{"expressions:":"_col0 (type: string), _col4 (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"SEL_51","children":{"File Output Operator":{"compressed:":"false","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe"},"OperatorId:":"FS_76"}}}}}}}},"Stage-10":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"srctable","Statistics:":"Num rows: 1 Data size: 0 Basic stats:
PARTIAL Column stats: NONE","OperatorId:":"TS_32","children":{"Filter Operator":{"predicate:":"value is not null (type: boolean)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"FIL_66","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"SEL_34","children":{"Union":{"Statistics:":"Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"UNION_38","children":{"Group By Operator":{"keys:":"_col1 (type: string), _col0 (type: string)","mode:":"hash","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"GBY_41","children":{"Reduce Output Operator":{"key expressions:":"_col0 (type: string), _col1 (type: string)","sort order:":"++","Map-reduce partition columns:":"_col0
(type: string), _col1 (type: string)","Statistics:":"Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"RS_42","OutputOperators:":"[GBY_43]"}}}}}}}}}}}},{"TableScan":{"alias:":"srctable","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"TS_35","children":{"Filter Operator":{"predicate:":"value is not null (type: boolean)","Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"FIL_67","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"SEL_37","children":{"Union":{"Statistics:":"Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"UNION_38","children":{"Group By Operator":{"keys:":"_col1 (type: string), _col0 (type: string)","mode:":"hash","outputColumnNames:":["_col0","_
col1"],"Statistics:":"Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"GBY_41","children":{"Reduce Output Operator":{"key expressions:":"_col0 (type: string), _col1 (type: string)","sort order:":"++","Map-reduce partition columns:":"_col0 (type: string), _col1 (type: string)","Statistics:":"Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"RS_42","OutputOperators:":"[GBY_43]"}}}}}}}}}}}}],"Reduce Operator Tree:":{"Group By Operator":{"keys:":"KEY._col0 (type: string), KEY._col1 (type: string)","mode:":"mergepartial","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"GBY_43","children":{"Select Operator":{"expressions:":"_col0 (type: string)","outputColumnNames:":["_col1"],"Statistics:":"Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE","OperatorId:":"SEL_44","children":{"File Output Operator":{"compressed:":"false","table:
":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe"},"OperatorId:":"FS_78"}}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_80"}}}}}}
http://git-wip-us.apache.org/repos/asf/hive/blob/759766ee/ql/src/test/results/clientpositive/input4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/input4.q.out b/ql/src/test/results/clientpositive/input4.q.out
index 83912f6..6984318 100644
--- a/ql/src/test/results/clientpositive/input4.q.out
+++ b/ql/src/test/results/clientpositive/input4.q.out
@@ -44,7 +44,7 @@ PREHOOK: type: QUERY
POSTHOOK: query: EXPLAIN FORMATTED
SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4 AS Input4Alias
POSTHOOK: type: QUERY
-{"STAGE DEPENDENCIES":{"Stage-0":{"ROOT STAGE":"TRUE"}},"STAGE PLANS":{"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"TableScan":{"alias:":"input4alias","Statistics:":"Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"value (type: string), key (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE","children":{"ListSink":{}}}}}}}}}}
+{"STAGE DEPENDENCIES":{"Stage-0":{"ROOT STAGE":"TRUE"}},"STAGE PLANS":{"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"TableScan":{"alias:":"input4alias","Statistics:":"Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"value (type: string), key (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_1","children":{"ListSink":{"OperatorId:":"LIST_SINK_3"}}}}}}}}}}
PREHOOK: query: SELECT Input4Alias.VALUE, Input4Alias.KEY FROM INPUT4 AS Input4Alias
PREHOOK: type: QUERY
PREHOOK: Input: default@input4
http://git-wip-us.apache.org/repos/asf/hive/blob/759766ee/ql/src/test/results/clientpositive/join0.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/join0.q.out b/ql/src/test/results/clientpositive/join0.q.out
index c02319e..b1e85a3 100644
--- a/ql/src/test/results/clientpositive/join0.q.out
+++ b/ql/src/test/results/clientpositive/join0.q.out
@@ -112,7 +112,7 @@ SELECT src1.key as k1, src1.value as v1,
(SELECT * FROM src WHERE src.key < 10) src2
SORT BY k1, v1, k2, v2
POSTHOOK: type: QUERY
-{"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-2":{"DEPENDENT STAGES":"Stage-1"},"Stage-0":{"DEPENDENT STAGES":"Stage-2"}},"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"src","Statistics:":"Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE","children":{"Filter Operator":{"predicate:":"(key < 10) (type: boolean)","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","children":{"Reduce Output Operator":{"sort order:":"","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: string), _col1 (type: string)"}}}}}}}},{"TableScan":{"alias:":"src","Statistics:":"Num rows: 500 Data size: 5312 Basic stats: COM
PLETE Column stats: NONE","children":{"Filter Operator":{"predicate:":"(key < 10) (type: boolean)","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","children":{"Reduce Output Operator":{"sort order:":"","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: string), _col1 (type: string)"}}}}}}}}],"Reduce Operator Tree:":{"Join Operator":{"condition map:":[{"":"Inner Join 0 to 1"}],"keys:":{},"outputColumnNames:":["_col0","_col1","_col2","_col3"],"Statistics:":"Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","
output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe"}}}}}}},"Stage-2":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"children":{"Reduce Output Operator":{"key expressions:":"_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)","sort order:":"++++","Statistics:":"Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE"}}}}],"Reduce Operator Tree:":{"Select Operator":{"expressions:":"KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string)","outputColumnNames:":["_col0","_col1","_col2","_col3"],"Statistics:":"Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"
org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}}
+{"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-2":{"DEPENDENT STAGES":"Stage-1"},"Stage-0":{"DEPENDENT STAGES":"Stage-2"}},"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"src","Statistics:":"Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_0","children":{"Filter Operator":{"predicate:":"(key < 10) (type: boolean)","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"FIL_13","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_2","children":{"Reduce Output Operator":{"sort order:":"","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: string), _col1 (type: string)","OperatorId:":"RS_6","OutputOper
ators:":"[JOIN_8]"}}}}}}}},{"TableScan":{"alias:":"src","Statistics:":"Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_3","children":{"Filter Operator":{"predicate:":"(key < 10) (type: boolean)","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"FIL_14","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"Reduce Output Operator":{"sort order:":"","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: string), _col1 (type: string)","OperatorId:":"RS_7","OutputOperators:":"[JOIN_8]"}}}}}}}}],"Reduce Operator Tree:":{"Join Operator":{"condition map:":[{"":"Inner Join 0 to 1"}],"keys:":{},"outputColumnNames:":["_col0","_col1","_col2","_
col3"],"Statistics:":"Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"JOIN_8","children":{"File Output Operator":{"compressed:":"false","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe"},"OperatorId:":"FS_15"}}}}}},"Stage-2":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"OperatorId:":"TS_16","children":{"Reduce Output Operator":{"key expressions:":"_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)","sort order:":"++++","Statistics:":"Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"RS_10","OutputOperators:":"[SEL_11]"}}}}],"Reduce Operator Tree:":{"Select Operator":{"expressions:":"KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.
reducesinkkey3 (type: string)","outputColumnNames:":["_col0","_col1","_col2","_col3"],"Statistics:":"Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_11","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_12"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_17"}}}}}}
Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Stage 'Stage-1:MAPRED' is a cross product
PREHOOK: query: SELECT src1.key as k1, src1.value as v1,
src2.key as k2, src2.value as v2 FROM
http://git-wip-us.apache.org/repos/asf/hive/blob/759766ee/ql/src/test/results/clientpositive/parallel_join0.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parallel_join0.q.out b/ql/src/test/results/clientpositive/parallel_join0.q.out
index c02319e..b1e85a3 100644
--- a/ql/src/test/results/clientpositive/parallel_join0.q.out
+++ b/ql/src/test/results/clientpositive/parallel_join0.q.out
@@ -112,7 +112,7 @@ SELECT src1.key as k1, src1.value as v1,
(SELECT * FROM src WHERE src.key < 10) src2
SORT BY k1, v1, k2, v2
POSTHOOK: type: QUERY
-{"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-2":{"DEPENDENT STAGES":"Stage-1"},"Stage-0":{"DEPENDENT STAGES":"Stage-2"}},"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"src","Statistics:":"Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE","children":{"Filter Operator":{"predicate:":"(key < 10) (type: boolean)","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","children":{"Reduce Output Operator":{"sort order:":"","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: string), _col1 (type: string)"}}}}}}}},{"TableScan":{"alias:":"src","Statistics:":"Num rows: 500 Data size: 5312 Basic stats: COM
PLETE Column stats: NONE","children":{"Filter Operator":{"predicate:":"(key < 10) (type: boolean)","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","children":{"Reduce Output Operator":{"sort order:":"","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: string), _col1 (type: string)"}}}}}}}}],"Reduce Operator Tree:":{"Join Operator":{"condition map:":[{"":"Inner Join 0 to 1"}],"keys:":{},"outputColumnNames:":["_col0","_col1","_col2","_col3"],"Statistics:":"Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","
output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe"}}}}}}},"Stage-2":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"children":{"Reduce Output Operator":{"key expressions:":"_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)","sort order:":"++++","Statistics:":"Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE"}}}}],"Reduce Operator Tree:":{"Select Operator":{"expressions:":"KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string)","outputColumnNames:":["_col0","_col1","_col2","_col3"],"Statistics:":"Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"
org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}}
+{"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-2":{"DEPENDENT STAGES":"Stage-1"},"Stage-0":{"DEPENDENT STAGES":"Stage-2"}},"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"src","Statistics:":"Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_0","children":{"Filter Operator":{"predicate:":"(key < 10) (type: boolean)","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"FIL_13","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_2","children":{"Reduce Output Operator":{"sort order:":"","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: string), _col1 (type: string)","OperatorId:":"RS_6","OutputOper
ators:":"[JOIN_8]"}}}}}}}},{"TableScan":{"alias:":"src","Statistics:":"Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_3","children":{"Filter Operator":{"predicate:":"(key < 10) (type: boolean)","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"FIL_14","children":{"Select Operator":{"expressions:":"key (type: string), value (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"Reduce Output Operator":{"sort order:":"","Statistics:":"Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: string), _col1 (type: string)","OperatorId:":"RS_7","OutputOperators:":"[JOIN_8]"}}}}}}}}],"Reduce Operator Tree:":{"Join Operator":{"condition map:":[{"":"Inner Join 0 to 1"}],"keys:":{},"outputColumnNames:":["_col0","_col1","_col2","_
col3"],"Statistics:":"Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"JOIN_8","children":{"File Output Operator":{"compressed:":"false","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe"},"OperatorId:":"FS_15"}}}}}},"Stage-2":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"OperatorId:":"TS_16","children":{"Reduce Output Operator":{"key expressions:":"_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)","sort order:":"++++","Statistics:":"Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"RS_10","OutputOperators:":"[SEL_11]"}}}}],"Reduce Operator Tree:":{"Select Operator":{"expressions:":"KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.
reducesinkkey3 (type: string)","outputColumnNames:":["_col0","_col1","_col2","_col3"],"Statistics:":"Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_11","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_12"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_17"}}}}}}
Warning: Shuffle Join JOIN[8][tables = [src1, src2]] in Stage 'Stage-1:MAPRED' is a cross product
PREHOOK: query: SELECT src1.key as k1, src1.value as v1,
src2.key as k2, src2.value as v2 FROM
http://git-wip-us.apache.org/repos/asf/hive/blob/759766ee/ql/src/test/results/clientpositive/plan_json.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/plan_json.q.out b/ql/src/test/results/clientpositive/plan_json.q.out
index 75d5b73..ba6d0be 100644
--- a/ql/src/test/results/clientpositive/plan_json.q.out
+++ b/ql/src/test/results/clientpositive/plan_json.q.out
@@ -2,4 +2,4 @@ PREHOOK: query: EXPLAIN FORMATTED SELECT count(1) FROM src
PREHOOK: type: QUERY
POSTHOOK: query: EXPLAIN FORMATTED SELECT count(1) FROM src
POSTHOOK: type: QUERY
-{"STAGE DEPENDENCIES":{"Stage-0":{"ROOT STAGE":"TRUE"}},"STAGE PLANS":{"Stage-0":{"Fetch Operator":{"limit:":"1","Processor Tree:":{"ListSink":{}}}}}}
+{"STAGE DEPENDENCIES":{"Stage-0":{"ROOT STAGE":"TRUE"}},"STAGE PLANS":{"Stage-0":{"Fetch Operator":{"limit:":"1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_8"}}}}}}
[44/50] [abbrv] hive git commit: HIVE-16010 : incorrect conf.set in
TezSessionPoolManager (Sergey Shelukhin, reviewed by Illya Yalovyy,
Siddharth Seth)
Posted by se...@apache.org.
HIVE-16010 : incorrect conf.set in TezSessionPoolManager (Sergey Shelukhin, reviewed by Illya Yalovyy, Siddharth Seth)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/53f03358
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/53f03358
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/53f03358
Branch: refs/heads/hive-14535
Commit: 53f03358377f3dde21f58e6c841142c6db8a9c32
Parents: 6ca79e3
Author: Sergey Shelukhin <se...@apache.org>
Authored: Thu Feb 23 11:44:44 2017 -0800
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Thu Feb 23 11:44:44 2017 -0800
----------------------------------------------------------------------
.../org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/53f03358/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java
index ecac85c..b70dbd8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java
@@ -314,7 +314,7 @@ public class TezSessionPoolManager {
LOG.warn("User has specified " + queueName + " queue; ignoring the setting");
queueName = null;
hasQueue = false;
- conf.set("tez.queue.name", null);
+ conf.unset("tez.queue.name");
}
default: // All good.
}
[47/50] [abbrv] hive git commit: HIVE-16020: LLAP : Reduce IPC
connection misses (Rajesh Balamohan, Siddharth Seth,
reviewed by Sergey Shelukhin)
Posted by se...@apache.org.
HIVE-16020: LLAP : Reduce IPC connection misses (Rajesh Balamohan, Siddharth Seth, reviewed by Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b8d7192f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b8d7192f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b8d7192f
Branch: refs/heads/hive-14535
Commit: b8d7192f5f28dbc832d4de3e4afc763523a4bf12
Parents: 657236e
Author: Rajesh Balamohan <rb...@apache.org>
Authored: Fri Feb 24 04:00:20 2017 +0530
Committer: Rajesh Balamohan <rb...@apache.org>
Committed: Fri Feb 24 04:00:20 2017 +0530
----------------------------------------------------------------------
.../hive/llap/daemon/impl/AMReporter.java | 5 ++--
.../llap/daemon/impl/ContainerRunnerImpl.java | 16 ++++++----
.../hive/llap/daemon/impl/LlapDaemon.java | 8 +++--
.../hadoop/hive/llap/daemon/impl/QueryInfo.java | 29 ++++++++++++++++++
.../hive/llap/daemon/impl/QueryTracker.java | 4 ++-
.../llap/daemon/impl/TaskRunnerCallable.java | 31 +++++++++++---------
.../daemon/impl/TaskExecutorTestHelpers.java | 4 ++-
7 files changed, 72 insertions(+), 25 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/b8d7192f/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/AMReporter.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/AMReporter.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/AMReporter.java
index 93237e6..a30f8b9 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/AMReporter.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/AMReporter.java
@@ -119,7 +119,8 @@ public class AMReporter extends AbstractService {
private final DaemonId daemonId;
public AMReporter(int numExecutors, int maxThreads, AtomicReference<InetSocketAddress>
- localAddress, QueryFailedHandler queryFailedHandler, Configuration conf, DaemonId daemonId) {
+ localAddress, QueryFailedHandler queryFailedHandler, Configuration conf, DaemonId daemonId,
+ SocketFactory socketFactory) {
super(AMReporter.class.getName());
this.localAddress = localAddress;
this.queryFailedHandler = queryFailedHandler;
@@ -151,7 +152,7 @@ public class AMReporter extends AbstractService {
.retryUpToMaximumTimeWithFixedSleep(retryTimeout, retrySleep,
TimeUnit.MILLISECONDS);
- this.socketFactory = NetUtils.getDefaultSocketFactory(conf);
+ this.socketFactory = socketFactory;
LOG.info("Setting up AMReporter with " +
"heartbeatInterval(ms)=" + heartbeatInterval +
http://git-wip-us.apache.org/repos/asf/hive/blob/b8d7192f/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java
index 6908138..cc4eff0 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java
@@ -83,6 +83,8 @@ import com.google.common.base.Preconditions;
import com.google.protobuf.ByteString;
import com.google.protobuf.InvalidProtocolBufferException;
+import javax.net.SocketFactory;
+
public class ContainerRunnerImpl extends CompositeService implements ContainerRunner, FragmentCompletionHandler, QueryFailedHandler {
// TODO Setup a set of threads to process incoming requests.
@@ -107,12 +109,14 @@ public class ContainerRunnerImpl extends CompositeService implements ContainerRu
private final String clusterId;
private final DaemonId daemonId;
private final UgiFactory fsUgiFactory;
+ private final SocketFactory socketFactory;
public ContainerRunnerImpl(Configuration conf, int numExecutors, int waitQueueSize,
boolean enablePreemption, String[] localDirsBase, AtomicReference<Integer> localShufflePort,
AtomicReference<InetSocketAddress> localAddress,
long totalMemoryAvailableBytes, LlapDaemonExecutorMetrics metrics,
- AMReporter amReporter, ClassLoader classLoader, DaemonId daemonId, UgiFactory fsUgiFactory) {
+ AMReporter amReporter, ClassLoader classLoader, DaemonId daemonId, UgiFactory fsUgiFactory,
+ SocketFactory socketFactory) {
super("ContainerRunnerImpl");
Preconditions.checkState(numExecutors > 0,
"Invalid number of executors: " + numExecutors + ". Must be > 0");
@@ -122,6 +126,7 @@ public class ContainerRunnerImpl extends CompositeService implements ContainerRu
this.signer = UserGroupInformation.isSecurityEnabled()
? new LlapSignerImpl(conf, daemonId.getClusterString()) : null;
this.fsUgiFactory = fsUgiFactory;
+ this.socketFactory = socketFactory;
this.clusterId = daemonId.getClusterString();
this.daemonId = daemonId;
@@ -239,7 +244,8 @@ public class ContainerRunnerImpl extends CompositeService implements ContainerRu
queryIdentifier, qIdProto.getApplicationIdString(), dagId,
vertex.getDagName(), vertex.getHiveQueryId(), dagIdentifier,
vertex.getVertexName(), request.getFragmentNumber(), request.getAttemptNumber(),
- vertex.getUser(), vertex, jobToken, fragmentIdString, tokenInfo);
+ vertex.getUser(), vertex, jobToken, fragmentIdString, tokenInfo, request.getAmHost(),
+ request.getAmPort());
String[] localDirs = fragmentInfo.getLocalDirs();
Preconditions.checkNotNull(localDirs);
@@ -250,12 +256,12 @@ public class ContainerRunnerImpl extends CompositeService implements ContainerRu
// Used for re-localization, to add the user specified configuration (conf_pb_binary_stream)
Configuration callableConf = new Configuration(getConfig());
- UserGroupInformation taskUgi = fsUgiFactory == null ? null : fsUgiFactory.createUgi();
+ UserGroupInformation fsTaskUgi = fsUgiFactory == null ? null : fsUgiFactory.createUgi();
TaskRunnerCallable callable = new TaskRunnerCallable(request, fragmentInfo, callableConf,
new ExecutionContextImpl(localAddress.get().getHostName()), env,
credentials, memoryPerExecutor, amReporter, confParams, metrics, killedTaskHandler,
- this, tezHadoopShim, attemptId, vertex, initialEvent, taskUgi,
- completionListener);
+ this, tezHadoopShim, attemptId, vertex, initialEvent, fsTaskUgi,
+ completionListener, socketFactory);
submissionState = executorService.schedule(callable);
if (LOG.isInfoEnabled()) {
http://git-wip-us.apache.org/repos/asf/hive/blob/b8d7192f/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java
index fc9f530..eb05f4c 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java
@@ -29,6 +29,7 @@ import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import javax.management.ObjectName;
+import javax.net.SocketFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.common.JvmPauseMonitor;
@@ -64,6 +65,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge.UdfWhitelistChecker;
import org.apache.hadoop.metrics2.util.MBeans;
+import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.service.CompositeService;
import org.apache.hadoop.util.ExitUtil;
@@ -105,6 +107,7 @@ public class LlapDaemon extends CompositeService implements ContainerRunner, Lla
private final long maxJvmMemory;
private final String[] localDirs;
private final DaemonId daemonId;
+ private final SocketFactory socketFactory;
// TODO Not the best way to share the address
private final AtomicReference<InetSocketAddress> srvAddress = new AtomicReference<>(),
@@ -255,8 +258,9 @@ public class LlapDaemon extends CompositeService implements ContainerRunner, Lla
" sessionId: " + sessionId);
int maxAmReporterThreads = HiveConf.getIntVar(daemonConf, ConfVars.LLAP_DAEMON_AM_REPORTER_MAX_THREADS);
+ this.socketFactory = NetUtils.getDefaultSocketFactory(daemonConf);
this.amReporter = new AMReporter(numExecutors, maxAmReporterThreads, srvAddress,
- new QueryFailedHandlerProxy(), daemonConf, daemonId);
+ new QueryFailedHandlerProxy(), daemonConf, daemonId, socketFactory);
SecretManager sm = null;
if (UserGroupInformation.isSecurityEnabled()) {
@@ -274,7 +278,7 @@ public class LlapDaemon extends CompositeService implements ContainerRunner, Lla
}
this.containerRunner = new ContainerRunnerImpl(daemonConf, numExecutors, waitQueueSize,
enablePreemption, localDirs, this.shufflePort, srvAddress, executorMemoryPerInstance, metrics,
- amReporter, executorClassLoader, daemonId, fsUgiFactory);
+ amReporter, executorClassLoader, daemonId, fsUgiFactory, socketFactory);
addIfService(containerRunner);
// Not adding the registry as a service, since we need to control when it is initialized - conf used to pickup properties.
http://git-wip-us.apache.org/repos/asf/hive/blob/b8d7192f/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/QueryInfo.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/QueryInfo.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/QueryInfo.java
index 1080d3e..eaa3e7e 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/QueryInfo.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/QueryInfo.java
@@ -16,6 +16,7 @@ package org.apache.hadoop.hive.llap.daemon.impl;
import java.io.File;
import java.io.IOException;
+import java.net.InetSocketAddress;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
@@ -25,6 +26,7 @@ import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.ReentrantLock;
import com.google.common.base.Preconditions;
@@ -36,6 +38,11 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.llap.daemon.FinishableStateUpdateHandler;
import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SignableVertexSpec;
import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SourceStateProto;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.security.SecurityUtil;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.token.Token;
+import org.apache.tez.common.security.JobTokenIdentifier;
public class QueryInfo {
private final QueryIdentifier queryIdentifier;
@@ -57,6 +64,7 @@ public class QueryInfo {
private final FinishableStateTracker finishableStateTracker = new FinishableStateTracker();
private final String tokenUserName, appId;
+ private final AtomicReference<UserGroupInformation> umbilicalUgi;
public QueryInfo(QueryIdentifier queryIdentifier, String appIdString, String dagIdString,
String dagName, String hiveQueryIdString,
@@ -76,6 +84,7 @@ public class QueryInfo {
this.localFs = localFs;
this.tokenUserName = tokenUserName;
this.appId = tokenAppId;
+ this.umbilicalUgi = new AtomicReference<>();
}
public QueryIdentifier getQueryIdentifier() {
@@ -297,4 +306,24 @@ public class QueryInfo {
public String getTokenAppId() {
return appId;
}
+
+ public void setupUmbilicalUgi(String umbilicalUser, Token<JobTokenIdentifier> appToken, String amHost, int amPort) {
+ synchronized (umbilicalUgi) {
+ if (umbilicalUgi.get() == null) {
+ UserGroupInformation taskOwner =
+ UserGroupInformation.createRemoteUser(umbilicalUser);
+ final InetSocketAddress address =
+ NetUtils.createSocketAddrForHost(amHost, amPort);
+ SecurityUtil.setTokenService(appToken, address);
+ taskOwner.addToken(appToken);
+ umbilicalUgi.set(taskOwner);
+ }
+ }
+ }
+
+ public UserGroupInformation getUmbilicalUgi() {
+ synchronized (umbilicalUgi) {
+ return umbilicalUgi.get();
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/b8d7192f/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/QueryTracker.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/QueryTracker.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/QueryTracker.java
index 9eaddd2..5cf3a38 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/QueryTracker.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/QueryTracker.java
@@ -139,7 +139,7 @@ public class QueryTracker extends AbstractService {
QueryFragmentInfo registerFragment(QueryIdentifier queryIdentifier, String appIdString, String dagIdString,
String dagName, String hiveQueryIdString, int dagIdentifier, String vertexName, int fragmentNumber, int attemptNumber,
String user, SignableVertexSpec vertex, Token<JobTokenIdentifier> appToken,
- String fragmentIdString, LlapTokenInfo tokenInfo) throws IOException {
+ String fragmentIdString, LlapTokenInfo tokenInfo, String amHost, int amPort) throws IOException {
ReadWriteLock dagLock = getDagLock(queryIdentifier);
// Note: This is a readLock to prevent a race with queryComplete. Operations
@@ -174,6 +174,8 @@ public class QueryTracker extends AbstractService {
if (old != null) {
queryInfo = old;
} else {
+ // Ensure the UGI is setup once.
+ queryInfo.setupUmbilicalUgi(vertex.getTokenIdentifier(), appToken, amHost, amPort);
isExistingQueryInfo = false;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/b8d7192f/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java
index 4b677aa..8fce546 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/TaskRunnerCallable.java
@@ -41,7 +41,6 @@ import org.apache.hadoop.hive.ql.io.IOContextMap;
import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.Credentials;
-import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
import org.apache.log4j.MDC;
@@ -65,6 +64,7 @@ import org.apache.tez.runtime.task.TezTaskRunner2;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import javax.net.SocketFactory;
import java.net.InetSocketAddress;
import java.nio.ByteBuffer;
import java.security.PrivilegedExceptionAction;
@@ -116,7 +116,8 @@ public class TaskRunnerCallable extends CallableWithNdc<TaskRunner2Result> {
private final SignableVertexSpec vertex;
private final TezEvent initialEvent;
private final SchedulerFragmentCompletingListener completionListener;
- private UserGroupInformation taskUgi;
+ private UserGroupInformation fsTaskUgi;
+ private final SocketFactory socketFactory;
@VisibleForTesting
public TaskRunnerCallable(SubmitWorkRequestProto request, QueryFragmentInfo fragmentInfo,
@@ -125,7 +126,8 @@ public class TaskRunnerCallable extends CallableWithNdc<TaskRunner2Result> {
LlapDaemonExecutorMetrics metrics, KilledTaskHandler killedTaskHandler,
FragmentCompletionHandler fragmentCompleteHandler, HadoopShim tezHadoopShim,
TezTaskAttemptID attemptId, SignableVertexSpec vertex, TezEvent initialEvent,
- UserGroupInformation taskUgi, SchedulerFragmentCompletingListener completionListener) {
+ UserGroupInformation fsTaskUgi, SchedulerFragmentCompletingListener completionListener,
+ SocketFactory socketFactory) {
this.request = request;
this.fragmentInfo = fragmentInfo;
this.conf = conf;
@@ -153,8 +155,9 @@ public class TaskRunnerCallable extends CallableWithNdc<TaskRunner2Result> {
this.fragmentCompletionHanler = fragmentCompleteHandler;
this.tezHadoopShim = tezHadoopShim;
this.initialEvent = initialEvent;
- this.taskUgi = taskUgi;
+ this.fsTaskUgi = fsTaskUgi;
this.completionListener = completionListener;
+ this.socketFactory = socketFactory;
}
public long getStartTime() {
@@ -196,27 +199,27 @@ public class TaskRunnerCallable extends CallableWithNdc<TaskRunner2Result> {
// TODO Consolidate this code with TezChild.
runtimeWatch.start();
- if (taskUgi == null) {
- taskUgi = UserGroupInformation.createRemoteUser(vertex.getUser());
+ if (fsTaskUgi == null) {
+ fsTaskUgi = UserGroupInformation.createRemoteUser(vertex.getUser());
}
- taskUgi.addCredentials(credentials);
+ fsTaskUgi.addCredentials(credentials);
Map<String, ByteBuffer> serviceConsumerMetadata = new HashMap<>();
serviceConsumerMetadata.put(TezConstants.TEZ_SHUFFLE_HANDLER_SERVICE_ID,
TezCommonUtils.convertJobTokenToBytes(jobToken));
Multimap<String, String> startedInputsMap = createStartedInputMap(vertex);
- UserGroupInformation taskOwner =
- UserGroupInformation.createRemoteUser(vertex.getTokenIdentifier());
+ final UserGroupInformation taskOwner = fragmentInfo.getQueryInfo().getUmbilicalUgi();
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("taskOwner hashCode:" + taskOwner.hashCode());
+ }
final InetSocketAddress address =
NetUtils.createSocketAddrForHost(request.getAmHost(), request.getAmPort());
- SecurityUtil.setTokenService(jobToken, address);
- taskOwner.addToken(jobToken);
umbilical = taskOwner.doAs(new PrivilegedExceptionAction<LlapTaskUmbilicalProtocol>() {
@Override
public LlapTaskUmbilicalProtocol run() throws Exception {
return RPC.getProxy(LlapTaskUmbilicalProtocol.class,
- LlapTaskUmbilicalProtocol.versionID, address, conf);
+ LlapTaskUmbilicalProtocol.versionID, address, taskOwner, conf, socketFactory);
}
});
@@ -238,7 +241,7 @@ public class TaskRunnerCallable extends CallableWithNdc<TaskRunner2Result> {
try {
synchronized (this) {
if (shouldRunTask) {
- taskRunner = new TezTaskRunner2(conf, taskUgi, fragmentInfo.getLocalDirs(),
+ taskRunner = new TezTaskRunner2(conf, fsTaskUgi, fragmentInfo.getLocalDirs(),
taskSpec,
vertex.getQueryIdentifier().getAppAttemptNumber(),
serviceConsumerMetadata, envMap, startedInputsMap, taskReporter, executor,
@@ -260,7 +263,7 @@ public class TaskRunnerCallable extends CallableWithNdc<TaskRunner2Result> {
isCompleted.set(true);
return result;
} finally {
- FileSystem.closeAllForUGI(taskUgi);
+ FileSystem.closeAllForUGI(fsTaskUgi);
LOG.info("ExecutionTime for Container: " + request.getContainerIdString() + "=" +
runtimeWatch.stop().elapsedMillis());
if (LOG.isDebugEnabled()) {
http://git-wip-us.apache.org/repos/asf/hive/blob/b8d7192f/llap-server/src/test/org/apache/hadoop/hive/llap/daemon/impl/TaskExecutorTestHelpers.java
----------------------------------------------------------------------
diff --git a/llap-server/src/test/org/apache/hadoop/hive/llap/daemon/impl/TaskExecutorTestHelpers.java b/llap-server/src/test/org/apache/hadoop/hive/llap/daemon/impl/TaskExecutorTestHelpers.java
index 5dc1be5..ae3328a 100644
--- a/llap-server/src/test/org/apache/hadoop/hive/llap/daemon/impl/TaskExecutorTestHelpers.java
+++ b/llap-server/src/test/org/apache/hadoop/hive/llap/daemon/impl/TaskExecutorTestHelpers.java
@@ -44,6 +44,8 @@ import org.apache.tez.runtime.task.TaskRunner2Result;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import javax.net.SocketFactory;
+
public class TaskExecutorTestHelpers {
private static final Logger LOG = LoggerFactory.getLogger(TestTaskExecutorService.class);
@@ -184,7 +186,7 @@ public class TaskExecutorTestHelpers {
mock(KilledTaskHandler.class), mock(
FragmentCompletionHandler.class), new DefaultHadoopShim(), null,
requestProto.getWorkSpec().getVertex(), initialEvent, null, mock(
- SchedulerFragmentCompletingListener.class));
+ SchedulerFragmentCompletingListener.class), mock(SocketFactory.class));
this.workTime = workTime;
this.canFinish = canFinish;
}
[28/50] [abbrv] hive git commit: HIVE-15953 : better error messages
for LLAP registry properties (Sergey Shelukhin,
reviewed by Prasanth Jayachandran)
Posted by se...@apache.org.
HIVE-15953 : better error messages for LLAP registry properties (Sergey Shelukhin, reviewed by Prasanth Jayachandran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/af606ffd
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/af606ffd
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/af606ffd
Branch: refs/heads/hive-14535
Commit: af606ffd4bbd04a6def0132c8cfcba3d9067e44c
Parents: 32ab6c0
Author: Sergey Shelukhin <se...@apache.org>
Authored: Tue Feb 21 13:45:04 2017 -0800
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Tue Feb 21 13:45:04 2017 -0800
----------------------------------------------------------------------
.../llap/registry/impl/LlapZookeeperRegistryImpl.java | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/af606ffd/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapZookeeperRegistryImpl.java
----------------------------------------------------------------------
diff --git a/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapZookeeperRegistryImpl.java b/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapZookeeperRegistryImpl.java
index 7ae80b0..0f8ff66 100644
--- a/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapZookeeperRegistryImpl.java
+++ b/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapZookeeperRegistryImpl.java
@@ -451,9 +451,14 @@ public class LlapZookeeperRegistryImpl implements ServiceRegistry {
AddressTypes.ADDRESS_PORT_FIELD));
this.serviceAddress =
RegistryTypeUtils.getAddressField(services.addresses.get(0), AddressTypes.ADDRESS_URI);
- int memory = Integer.parseInt(srv.get(ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB.varname));
- int vCores = Integer.parseInt(srv.get(ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname));
- this.resource = Resource.newInstance(memory, vCores);
+ String memStr = srv.get(ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB.varname, "");
+ String coreStr = srv.get(ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname, "");
+ try {
+ this.resource = Resource.newInstance(Integer.parseInt(memStr), Integer.parseInt(coreStr));
+ } catch (NumberFormatException ex) {
+ throw new IOException("Invalid resource configuration for a LLAP node: memory "
+ + memStr + ", vcores " + coreStr);
+ }
}
@Override
[43/50] [abbrv] hive git commit: HIVE-16012 : BytesBytes hash table -
better capacity exhaustion handling (Sergey Shelukhin, reviewed by Wei Zheng)
Posted by se...@apache.org.
HIVE-16012 : BytesBytes hash table - better capacity exhaustion handling (Sergey Shelukhin, reviewed by Wei Zheng)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6ca79e3a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6ca79e3a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6ca79e3a
Branch: refs/heads/hive-14535
Commit: 6ca79e3aa2b5a8812e5c4aaee80c4115e2b9def8
Parents: e941e63
Author: Sergey Shelukhin <se...@apache.org>
Authored: Thu Feb 23 11:25:18 2017 -0800
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Thu Feb 23 11:25:18 2017 -0800
----------------------------------------------------------------------
.../hive/ql/exec/persistence/BytesBytesMultiHashMap.java | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/6ca79e3a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java
index 6b89e98..04e24bd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java
@@ -26,6 +26,7 @@ import java.util.TreeMap;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.ql.debug.Utils;
import org.apache.hadoop.hive.serde2.ByteStream.RandomAccessOutput;
import org.apache.hadoop.hive.serde2.SerDeException;
@@ -553,6 +554,12 @@ public final class BytesBytesMultiHashMap {
if (capacity <= 0) {
throw new AssertionError("Invalid capacity " + capacity);
}
+ if (capacity > Integer.MAX_VALUE) {
+ throw new RuntimeException("Attempting to expand the hash table to " + capacity
+ + " that overflows maximum array size. For this query, you may want to disable "
+ + ConfVars.HIVEDYNAMICPARTITIONHASHJOIN.varname + " or reduce "
+ + ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD.varname);
+ }
}
/**
@@ -715,8 +722,7 @@ public final class BytesBytesMultiHashMap {
}
private void expandAndRehash() {
- long capacity = refs.length << 1;
- expandAndRehashImpl(capacity);
+ expandAndRehashImpl(((long)refs.length) << 1);
}
private void expandAndRehashImpl(long capacity) {
[08/50] [abbrv] hive git commit: HIVE-15950 Make DbTxnManager use
Metastore client consistently with callers (Eugene Koifman,
reviewed by Vaibhav Gumashta)
Posted by se...@apache.org.
HIVE-15950 Make DbTxnManager use Metastore client consistently with callers (Eugene Koifman, reviewed by Vaibhav Gumashta)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bb4d8db5
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bb4d8db5
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bb4d8db5
Branch: refs/heads/hive-14535
Commit: bb4d8db5093984b94d3bb996e286e10a0dc2bef3
Parents: 3485d02
Author: Eugene Koifman <ek...@hortonworks.com>
Authored: Fri Feb 17 09:32:15 2017 -0800
Committer: Eugene Koifman <ek...@hortonworks.com>
Committed: Fri Feb 17 09:32:15 2017 -0800
----------------------------------------------------------------------
.../hive/metastore/RetryingMetaStoreClient.java | 4 +-
.../hadoop/hive/ql/lockmgr/DbLockManager.java | 28 ++--
.../hadoop/hive/ql/lockmgr/DbTxnManager.java | 144 +++++++------------
3 files changed, 68 insertions(+), 108 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/bb4d8db5/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java b/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java
index a6545a9..d3e5f7e 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java
@@ -215,8 +215,8 @@ public class RetryingMetaStoreClient implements InvocationHandler {
throw caughtException;
}
retriesMade++;
- LOG.warn("MetaStoreClient lost connection. Attempting to reconnect.",
- caughtException);
+ LOG.warn("MetaStoreClient lost connection. Attempting to reconnect (" + retriesMade + " of " +
+ retryLimit + ") after " + retryDelaySeconds + "s. " + method.getName(), caughtException);
Thread.sleep(retryDelaySeconds * 1000);
}
return ret;
http://git-wip-us.apache.org/repos/asf/hive/blob/bb4d8db5/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java
index 529e64c..c3725ad 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java
@@ -18,7 +18,6 @@
package org.apache.hadoop.hive.ql.lockmgr;
import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.SynchronizedMetaStoreClient;
import org.apache.hadoop.hive.ql.exec.DDLTask;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -43,10 +42,10 @@ import java.util.concurrent.TimeUnit;
/**
* An implementation of HiveLockManager for use with {@link org.apache.hadoop.hive.ql.lockmgr.DbTxnManager}.
- * Note, this lock manager is not meant to stand alone. It cannot be used
- * without the DbTxnManager.
+ * Note, this lock manager is not meant to be stand alone. It cannot be used without the DbTxnManager.
+ * See {@link DbTxnManager#getMS()} for important concurrency/metastore access notes.
*/
-public class DbLockManager implements HiveLockManager{
+public final class DbLockManager implements HiveLockManager{
static final private String CLASS_NAME = DbLockManager.class.getName();
static final private Logger LOG = LoggerFactory.getLogger(CLASS_NAME);
@@ -54,14 +53,14 @@ public class DbLockManager implements HiveLockManager{
private long MAX_SLEEP;
//longer term we should always have a txn id and then we won't need to track locks here at all
private Set<DbHiveLock> locks;
- private SynchronizedMetaStoreClient client;
private long nextSleep = 50;
private final HiveConf conf;
+ private final DbTxnManager txnManager;
- DbLockManager(SynchronizedMetaStoreClient client, HiveConf conf) {
+ DbLockManager(HiveConf conf, DbTxnManager txnManager) {
locks = new HashSet<>();
- this.client = client;
this.conf = conf;
+ this.txnManager = txnManager;
}
@Override
@@ -100,7 +99,7 @@ public class DbLockManager implements HiveLockManager{
int maxNumWaits = Math.max(0, conf.getIntVar(HiveConf.ConfVars.HIVE_LOCK_NUMRETRIES));
try {
LOG.info("Requesting: queryId=" + queryId + " " + lock);
- LockResponse res = client.lock(lock);
+ LockResponse res = txnManager.getMS().lock(lock);
//link lockId to queryId
LOG.info("Response to queryId=" + queryId + " " + res);
if(!isBlocking) {
@@ -112,8 +111,7 @@ public class DbLockManager implements HiveLockManager{
long startRetry = System.currentTimeMillis();
while (res.getState() == LockState.WAITING && numRetries++ < maxNumWaits) {
backoff();
- res = client.checkLock(res.getLockid());
-
+ res = txnManager.getMS().checkLock(res.getLockid());
}
long retryDuration = System.currentTimeMillis() - startRetry;
DbHiveLock hl = new DbHiveLock(res.getLockid(), queryId, lock.getTxnid());
@@ -203,7 +201,7 @@ public class DbLockManager implements HiveLockManager{
*/
LockState checkLock(long extLockId) throws LockException {
try {
- return client.checkLock(extLockId).getState();
+ return txnManager.getMS().checkLock(extLockId).getState();
} catch (TException e) {
throw new LockException(ErrorMsg.METASTORE_COMMUNICATION_FAILED.getMsg(),
e);
@@ -216,7 +214,7 @@ public class DbLockManager implements HiveLockManager{
boolean removed = false;
try {
LOG.debug("Unlocking " + hiveLock);
- client.unlock(lockId);
+ txnManager.getMS().unlock(lockId);
//important to remove after unlock() in case it fails
removed = locks.remove(hiveLock);
Metrics metrics = MetricsFactory.getInstance();
@@ -283,7 +281,7 @@ public class DbLockManager implements HiveLockManager{
public ShowLocksResponse getLocks(ShowLocksRequest showLocksRequest) throws LockException {
try {
- return client.showLocks(showLocksRequest);
+ return txnManager.getMS().showLocks(showLocksRequest);
} catch (TException e) {
throw new LockException(ErrorMsg.METASTORE_COMMUNICATION_FAILED.getMsg(), e);
}
@@ -354,8 +352,8 @@ public class DbLockManager implements HiveLockManager{
/**
* Clear the memory of the locks in this object. This won't clear the locks from the database.
* It is for use with
- * {@link #DbLockManager(org.apache.hadoop.hive.metastore.IMetaStoreClient, org.apache.hadoop.hive.conf.HiveConf)} .commitTxn} and
- * {@link #DbLockManager(org.apache.hadoop.hive.metastore.IMetaStoreClient, org.apache.hadoop.hive.conf.HiveConf)} .rollbackTxn}.
+ * {@link #DbLockManager(HiveConf, DbTxnManager)} .commitTxn} and
+ * {@link #DbLockManager(HiveConf, DbTxnManager)} .rollbackTxn}.
*/
void clearLocalLockRecords() {
locks.clear();
http://git-wip-us.apache.org/repos/asf/hive/blob/bb4d8db5/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java
index a985eb1..62f7c5a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java
@@ -19,7 +19,7 @@ package org.apache.hadoop.hive.ql.lockmgr;
import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.metastore.SynchronizedMetaStoreClient;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hive.common.util.ShutdownHookManager;
import org.slf4j.Logger;
@@ -52,21 +52,23 @@ import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
/**
- * An implementation of HiveTxnManager that stores the transactions in the
- * metastore database.
+ * An implementation of HiveTxnManager that stores the transactions in the metastore database.
+ * There should be 1 instance o {@link DbTxnManager} per {@link org.apache.hadoop.hive.ql.session.SessionState}
+ * with a single thread accessing it at a time, with the exception of {@link #heartbeat()} method.
+ * The later may (usually will) be called from a timer thread.
+ * See {@link #getMS()} for more important concurrency/metastore access notes.
*/
-public class DbTxnManager extends HiveTxnManagerImpl {
+public final class DbTxnManager extends HiveTxnManagerImpl {
static final private String CLASS_NAME = DbTxnManager.class.getName();
static final private Logger LOG = LoggerFactory.getLogger(CLASS_NAME);
- private DbLockManager lockMgr = null;
- private SynchronizedMetaStoreClient client = null;
+ private volatile DbLockManager lockMgr = null;
/**
* The Metastore NEXT_TXN_ID.NTXN_NEXT is initialized to 1; it contains the next available
* transaction id. Thus is 1 is first transaction id.
*/
- private long txnId = 0;
+ private volatile long txnId = 0;
/**
* assigns a unique monotonically increasing ID to each statement
* which is part of an open transaction. This is used by storage
@@ -84,33 +86,31 @@ public class DbTxnManager extends HiveTxnManagerImpl {
private ScheduledFuture<?> heartbeatTask = null;
private Runnable shutdownRunner = null;
private static final int SHUTDOWN_HOOK_PRIORITY = 0;
-
- // SynchronizedMetaStoreClient object per heartbeater thread.
- private static ThreadLocal<SynchronizedMetaStoreClient> threadLocalMSClient =
- new ThreadLocal<SynchronizedMetaStoreClient>() {
-
- @Override
- protected SynchronizedMetaStoreClient initialValue() {
- return null;
- }
-
- @Override
- public synchronized void remove() {
- SynchronizedMetaStoreClient client = this.get();
- if (client != null) {
- client.close();
- }
- super.remove();
- }
- };
-
- private static AtomicInteger heartbeaterMSClientCount = new AtomicInteger(0);
- private static int heartbeaterThreadPoolSize = 0;
-
- private static SynchronizedMetaStoreClient getThreadLocalMSClient() {
- return threadLocalMSClient.get();
+ /**
+ * We do this on every call to make sure TM uses same MS connection as is used by the caller (Driver,
+ * SemanticAnalyzer, etc). {@code Hive} instances are cached using ThreadLocal and
+ * {@link IMetaStoreClient} is cached within {@code Hive} with additional logic. Futhermore, this
+ * ensures that multiple threads are not sharing the same Thrift client (which could happen
+ * if we had cached {@link IMetaStoreClient} here.
+ *
+ * ThreadLocal gets cleaned up automatically when its thread goes away
+ * https://docs.oracle.com/javase/7/docs/api/java/lang/ThreadLocal.html. This is especially
+ * important for threads created by {@link #heartbeatExecutorService} threads.
+ *
+ * Embedded {@link DbLockManager} follows the same logic.
+ * @return IMetaStoreClient
+ * @throws LockException on any errors
+ */
+ IMetaStoreClient getMS() throws LockException {
+ try {
+ return Hive.get(conf).getMSC();
+ }
+ catch(HiveException|MetaException e) {
+ String msg = "Unable to reach Hive Metastore: " + e.getMessage();
+ LOG.error(msg, e);
+ throw new LockException(e);
+ }
}
-
DbTxnManager() {
shutdownRunner = new Runnable() {
@Override
@@ -148,7 +148,7 @@ public class DbTxnManager extends HiveTxnManagerImpl {
throw new LockException("Transaction already opened. " + JavaUtils.txnIdToString(txnId));
}
try {
- txnId = client.openTxn(user);
+ txnId = getMS().openTxn(user);
statementId = 0;
LOG.debug("Opened " + JavaUtils.txnIdToString(txnId));
ctx.setHeartbeater(startHeartbeat(delay));
@@ -158,11 +158,15 @@ public class DbTxnManager extends HiveTxnManagerImpl {
}
}
+ /**
+ * we don't expect multiple thread to call this method concurrently but {@link #lockMgr} will
+ * be read by a different threads that one writing it, thus it's {@code volatile}
+ */
@Override
public HiveLockManager getLockManager() throws LockException {
init();
if (lockMgr == null) {
- lockMgr = new DbLockManager(client, conf);
+ lockMgr = new DbLockManager(conf, this);
}
return lockMgr;
}
@@ -388,7 +392,7 @@ public class DbTxnManager extends HiveTxnManagerImpl {
lockMgr.clearLocalLockRecords();
stopHeartbeat();
LOG.debug("Committing txn " + JavaUtils.txnIdToString(txnId));
- client.commitTxn(txnId);
+ getMS().commitTxn(txnId);
} catch (NoSuchTxnException e) {
LOG.error("Metastore could not find " + JavaUtils.txnIdToString(txnId));
throw new LockException(e, ErrorMsg.TXN_NO_SUCH_TRANSACTION, JavaUtils.txnIdToString(txnId));
@@ -414,7 +418,7 @@ public class DbTxnManager extends HiveTxnManagerImpl {
lockMgr.clearLocalLockRecords();
stopHeartbeat();
LOG.debug("Rolling back " + JavaUtils.txnIdToString(txnId));
- client.rollbackTxn(txnId);
+ getMS().rollbackTxn(txnId);
} catch (NoSuchTxnException e) {
LOG.error("Metastore could not find " + JavaUtils.txnIdToString(txnId));
throw new LockException(e, ErrorMsg.TXN_NO_SUCH_TRANSACTION, JavaUtils.txnIdToString(txnId));
@@ -460,29 +464,11 @@ public class DbTxnManager extends HiveTxnManagerImpl {
for (HiveLock lock : locks) {
long lockId = ((DbLockManager.DbHiveLock)lock).lockId;
try {
- // Get the threadlocal metastore client for the heartbeat calls.
- SynchronizedMetaStoreClient heartbeaterClient = getThreadLocalMSClient();
- if (heartbeaterClient == null) {
- Hive db;
- try {
- db = Hive.get(conf);
- // Create a new threadlocal synchronized metastore client for use in heartbeater threads.
- // This makes the concurrent use of heartbeat thread safe, and won't cause transaction
- // abort due to a long metastore client call blocking the heartbeat call.
- heartbeaterClient = new SynchronizedMetaStoreClient(db.getMSC());
- threadLocalMSClient.set(heartbeaterClient);
- } catch (HiveException e) {
- LOG.error("Unable to create new metastore client for heartbeating", e);
- throw new LockException(e);
- }
- // Increment the threadlocal metastore client count
- if (heartbeaterMSClientCount.incrementAndGet() >= heartbeaterThreadPoolSize) {
- LOG.warn("The number of heartbeater metastore clients - + "
- + heartbeaterMSClientCount.get() + ", has exceeded the max limit - "
- + heartbeaterThreadPoolSize);
- }
- }
- heartbeaterClient.heartbeat(txnId, lockId);
+ /**
+ * This relies on the ThreadLocal caching, which implies that the same {@link IMetaStoreClient},
+ * in particular the Thrift connection it uses is never shared between threads
+ */
+ getMS().heartbeat(txnId, lockId);
} catch (NoSuchLockException e) {
LOG.error("Unable to find lock " + JavaUtils.lockIdToString(lockId));
throw new LockException(e, ErrorMsg.LOCK_NO_SUCH_LOCK, JavaUtils.lockIdToString(lockId));
@@ -554,7 +540,7 @@ public class DbTxnManager extends HiveTxnManagerImpl {
public ValidTxnList getValidTxns() throws LockException {
init();
try {
- return client.getValidTxns(txnId);
+ return getMS().getValidTxns(txnId);
} catch (TException e) {
throw new LockException(ErrorMsg.METASTORE_COMMUNICATION_FAILED.getMsg(),
e);
@@ -598,21 +584,10 @@ public class DbTxnManager extends HiveTxnManagerImpl {
}
private void init() throws LockException {
- if (client == null) {
- if (conf == null) {
- throw new RuntimeException("Must call setHiveConf before any other " +
- "methods.");
- }
- try {
- Hive db = Hive.get(conf);
- client = new SynchronizedMetaStoreClient(db.getMSC());
- initHeartbeatExecutorService();
- } catch (MetaException e) {
- throw new LockException(ErrorMsg.METASTORE_COULD_NOT_INITIATE.getMsg(), e);
- } catch (HiveException e) {
- throw new LockException(ErrorMsg.METASTORE_COULD_NOT_INITIATE.getMsg(), e);
- }
+ if (conf == null) {
+ throw new RuntimeException("Must call setHiveConf before any other methods.");
}
+ initHeartbeatExecutorService();
}
private synchronized void initHeartbeatExecutorService() {
@@ -620,10 +595,9 @@ public class DbTxnManager extends HiveTxnManagerImpl {
&& !heartbeatExecutorService.isTerminated()) {
return;
}
- heartbeaterThreadPoolSize =
- conf.getIntVar(HiveConf.ConfVars.HIVE_TXN_HEARTBEAT_THREADPOOL_SIZE);
heartbeatExecutorService =
- Executors.newScheduledThreadPool(heartbeaterThreadPoolSize, new ThreadFactory() {
+ Executors.newScheduledThreadPool(
+ conf.getIntVar(HiveConf.ConfVars.HIVE_TXN_HEARTBEAT_THREADPOOL_SIZE), new ThreadFactory() {
private final AtomicInteger threadCounter = new AtomicInteger();
@Override
@@ -635,22 +609,10 @@ public class DbTxnManager extends HiveTxnManagerImpl {
}
public static class HeartbeaterThread extends Thread {
- public HeartbeaterThread(Runnable target, String name) {
+ HeartbeaterThread(Runnable target, String name) {
super(target, name);
setDaemon(true);
}
-
- @Override
- /**
- * We're overriding finalize so that we can do an orderly cleanup of resources held by
- * the threadlocal metastore client.
- */
- protected void finalize() throws Throwable {
- threadLocalMSClient.remove();
- // Adjust the metastore client count
- DbTxnManager.heartbeaterMSClientCount.decrementAndGet();
- super.finalize();
- }
}
@Override
[42/50] [abbrv] hive git commit: HIVE-12492: MapJoin: 4 million
unique integers seems to be a probe plateau (Jesus Camacho Rodriguez,
reviewed by Ashutosh Chauhan)
Posted by se...@apache.org.
HIVE-12492: MapJoin: 4 million unique integers seems to be a probe plateau (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e941e63c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e941e63c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e941e63c
Branch: refs/heads/hive-14535
Commit: e941e63c7d2830395e0b535e9b1a3c33d6e5b652
Parents: 759766e
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Thu Feb 23 08:50:20 2017 +0000
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Thu Feb 23 18:50:14 2017 +0000
----------------------------------------------------------------------
.../org/apache/hadoop/hive/conf/HiveConf.java | 5 +
.../test/resources/testconfiguration.properties | 1 +
.../hive/ql/optimizer/ConvertJoinMapJoin.java | 106 +++-
.../stats/annotation/StatsRulesProcFactory.java | 44 +-
.../apache/hadoop/hive/ql/stats/StatsUtils.java | 6 +-
.../queries/clientpositive/join_max_hashtable.q | 37 ++
.../llap/join_max_hashtable.q.out | 490 +++++++++++++++++++
7 files changed, 678 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/e941e63c/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 0b315e1..46be3fb 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1422,6 +1422,11 @@ public class HiveConf extends Configuration {
"This controls how many partitions can be scanned for each partitioned table.\n" +
"The default value \"-1\" means no limit. (DEPRECATED: Please use " + ConfVars.METASTORE_LIMIT_PARTITION_REQUEST + " in the metastore instead.)"),
+ HIVECONVERTJOINMAXENTRIESHASHTABLE("hive.auto.convert.join.hashtable.max.entries", 4194304L,
+ "If hive.auto.convert.join.noconditionaltask is off, this parameter does not take affect. \n" +
+ "However, if it is on, and the predicated number of entries in hashtable for a given join \n" +
+ "input is larger than this number, the join will not be converted to a mapjoin. \n" +
+ "The value \"-1\" means no limit."),
HIVEHASHTABLEKEYCOUNTADJUSTMENT("hive.hashtable.key.count.adjustment", 1.0f,
"Adjustment to mapjoin hashtable size derived from table and column statistics; the estimate" +
" of the number of keys is divided by this value. If the value is 0, statistics are not used" +
http://git-wip-us.apache.org/repos/asf/hive/blob/e941e63c/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index d344464..5b30157 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -501,6 +501,7 @@ minillaplocal.query.files=acid_globallimit.q,\
join1.q,\
join_acid_non_acid.q,\
join_filters.q,\
+ join_max_hashtable.q,\
join_nulls.q,\
join_nullsafe.q,\
leftsemijoin_mr.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/e941e63c/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
index 93e3631..e68618a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
@@ -19,7 +19,6 @@
package org.apache.hadoop.hive.ql.optimizer;
import java.util.ArrayList;
-import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@@ -53,6 +52,7 @@ import org.apache.hadoop.hive.ql.parse.GenTezUtils;
import org.apache.hadoop.hive.ql.parse.OptimizeTezProcContext;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
import org.apache.hadoop.hive.ql.plan.CommonMergeJoinDesc;
import org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
@@ -63,6 +63,7 @@ import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
import org.apache.hadoop.hive.ql.plan.OpTraits;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.Statistics;
+import org.apache.hadoop.hive.ql.stats.StatsUtils;
import org.apache.hadoop.util.ReflectionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -117,7 +118,7 @@ public class ConvertJoinMapJoin implements NodeProcessor {
numBuckets = 1;
}
LOG.info("Estimated number of buckets " + numBuckets);
- int mapJoinConversionPos = getMapJoinConversionPos(joinOp, context, numBuckets, false, maxSize);
+ int mapJoinConversionPos = getMapJoinConversionPos(joinOp, context, numBuckets, false, maxSize, true);
if (mapJoinConversionPos < 0) {
Object retval = checkAndConvertSMBJoin(context, joinOp, tezBucketJoinProcCtx);
if (retval == null) {
@@ -141,7 +142,7 @@ public class ConvertJoinMapJoin implements NodeProcessor {
// check if we can convert to map join no bucket scaling.
LOG.info("Convert to non-bucketed map join");
if (numBuckets != 1) {
- mapJoinConversionPos = getMapJoinConversionPos(joinOp, context, 1, false, maxSize);
+ mapJoinConversionPos = getMapJoinConversionPos(joinOp, context, 1, false, maxSize, true);
}
if (mapJoinConversionPos < 0) {
// we are just converting to a common merge join operator. The shuffle
@@ -519,8 +520,22 @@ public class ConvertJoinMapJoin implements NodeProcessor {
return false;
}
+ /**
+ * Obtain big table position for join.
+ *
+ * @param joinOp join operator
+ * @param context optimization context
+ * @param buckets bucket count for Bucket Map Join conversion consideration or reduce count
+ * for Dynamic Hash Join conversion consideration
+ * @param skipJoinTypeChecks whether to skip join type checking
+ * @param maxSize size threshold for Map Join conversion
+ * @param checkHashTableEntries whether to check threshold for distinct keys in hash table for Map Join
+ * @return returns big table position or -1 if it cannot be determined
+ * @throws SemanticException
+ */
public int getMapJoinConversionPos(JoinOperator joinOp, OptimizeTezProcContext context,
- int buckets, boolean skipJoinTypeChecks, long maxSize) throws SemanticException {
+ int buckets, boolean skipJoinTypeChecks, long maxSize, boolean checkHashTableEntries)
+ throws SemanticException {
if (!skipJoinTypeChecks) {
/*
* HIVE-9038: Join tests fail in tez when we have more than 1 join on the same key and there is
@@ -628,10 +643,20 @@ public class ConvertJoinMapJoin implements NodeProcessor {
// We are replacing the current big table with a new one, thus
// we need to count the current one as a map table then.
totalSize += bigInputStat.getDataSize();
+ // Check if number of distinct keys is larger than given max
+ // number of entries for HashMap. If it is, we do not convert.
+ if (checkHashTableEntries && !checkNumberOfEntriesForHashTable(joinOp, bigTablePosition, context)) {
+ return -1;
+ }
} else if (!selectedBigTable) {
// This is not the first table and we are not using it as big table,
// in fact, we're adding this table as a map table
totalSize += inputSize;
+ // Check if number of distinct keys is larger than given max
+ // number of entries for HashMap. If it is, we do not convert.
+ if (checkHashTableEntries && !checkNumberOfEntriesForHashTable(joinOp, pos, context)) {
+ return -1;
+ }
}
if (totalSize/buckets > maxSize) {
@@ -905,8 +930,8 @@ public class ConvertJoinMapJoin implements NodeProcessor {
int numReducers = estimateNumBuckets(joinOp, false);
LOG.info("Try dynamic partitioned hash join with estimated " + numReducers + " reducers");
int bigTablePos = getMapJoinConversionPos(joinOp, context, numReducers, false,
- context.conf.getLongVar(
- HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD));
+ context.conf.getLongVar(HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD),
+ false);
if (bigTablePos >= 0) {
// Now that we have the big table index, get real numReducers value based on big table RS
ReduceSinkOperator bigTableParentRS =
@@ -951,7 +976,7 @@ public class ConvertJoinMapJoin implements NodeProcessor {
}
int pos = getMapJoinConversionPos(joinOp, context, estimateNumBuckets(joinOp, false),
- true, Long.MAX_VALUE);
+ true, Long.MAX_VALUE, false);
if (pos < 0) {
LOG.info("Could not get a valid join position. Defaulting to position 0");
pos = 0;
@@ -961,4 +986,71 @@ public class ConvertJoinMapJoin implements NodeProcessor {
LOG.info("Fallback to common merge join operator");
convertJoinSMBJoin(joinOp, context, pos, 0, false);
}
+
+ /* Returns true if it passes the test, false otherwise. */
+ private boolean checkNumberOfEntriesForHashTable(JoinOperator joinOp, int position,
+ OptimizeTezProcContext context) {
+ long max = HiveConf.getLongVar(context.parseContext.getConf(),
+ HiveConf.ConfVars.HIVECONVERTJOINMAXENTRIESHASHTABLE);
+ if (max < 1) {
+ // Max is disabled, we can safely return true
+ return true;
+ }
+ // Calculate number of different entries and evaluate
+ ReduceSinkOperator rsOp = (ReduceSinkOperator) joinOp.getParentOperators().get(position);
+ List<String> keys = StatsUtils.getQualifedReducerKeyNames(rsOp.getConf().getOutputKeyColumnNames());
+ Statistics inputStats = rsOp.getStatistics();
+ List<ColStatistics> columnStats = new ArrayList<>();
+ for (String key : keys) {
+ ColStatistics cs = inputStats.getColumnStatisticsFromColName(key);
+ if (cs == null) {
+ LOG.debug("Couldn't get statistics for: {}", key);
+ return true;
+ }
+ columnStats.add(cs);
+ }
+ long numRows = inputStats.getNumRows();
+ long estimation = estimateNDV(numRows, columnStats);
+ LOG.debug("Estimated NDV for input {}: {}; Max NDV for MapJoin conversion: {}",
+ position, estimation, max);
+ if (estimation > max) {
+ // Estimation larger than max
+ LOG.debug("Number of different entries for HashTable is greater than the max; "
+ + "we do not converting to MapJoin");
+ return false;
+ }
+ // We can proceed with the conversion
+ return true;
+ }
+
+ private static long estimateNDV(long numRows, List<ColStatistics> columnStats) {
+ // If there is a single column, return the number of distinct values
+ if (columnStats.size() == 1) {
+ return columnStats.get(0).getCountDistint();
+ }
+
+ // The expected number of distinct values when choosing p values
+ // with replacement from n integers is n . (1 - ((n - 1) / n) ^ p).
+ //
+ // If we have several uniformly distributed attributes A1 ... Am
+ // with N1 ... Nm distinct values, they behave as one uniformly
+ // distributed attribute with N1 * ... * Nm distinct values.
+ long n = 1L;
+ for (ColStatistics cs : columnStats) {
+ final long ndv = cs.getCountDistint();
+ if (ndv > 1) {
+ n = StatsUtils.safeMult(n, ndv);
+ }
+ }
+ final double nn = (double) n;
+ final double a = (nn - 1d) / nn;
+ if (a == 1d) {
+ // A under-flows if nn is large.
+ return numRows;
+ }
+ final double v = nn * (1d - Math.pow(a, numRows));
+ // Cap at fact-row-count, because numerical artifacts can cause it
+ // to go a few % over.
+ return Math.min(Math.round(v), numRows);
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/e941e63c/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index 61f1374..bdb09a8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -51,10 +51,43 @@ import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.ql.plan.*;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicListDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.GroupByDesc;
+import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
+import org.apache.hadoop.hive.ql.plan.JoinDesc;
+import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.Statistics;
import org.apache.hadoop.hive.ql.stats.StatsUtils;
-import org.apache.hadoop.hive.ql.udf.generic.*;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFInBloomFilter;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
@@ -1409,7 +1442,12 @@ public class StatsRulesProcFactory {
// get the join keys from parent ReduceSink operators
for (int pos = 0; pos < parents.size(); pos++) {
ReduceSinkOperator parent = (ReduceSinkOperator) jop.getParentOperators().get(pos);
- Statistics parentStats = parent.getStatistics();
+ Statistics parentStats;
+ try {
+ parentStats = parent.getStatistics().clone();
+ } catch (CloneNotSupportedException e) {
+ throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
+ }
keyExprs = StatsUtils.getQualifedReducerKeyNames(parent.getConf()
.getOutputKeyColumnNames());
http://git-wip-us.apache.org/repos/asf/hive/blob/e941e63c/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index 0da7ea4..e48b609 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -1278,7 +1278,11 @@ public class StatsUtils {
ColStatistics colStats = parentStats.getColumnStatisticsFromColName(colName);
if (colStats != null) {
/* If statistics for the column already exist use it. */
- return colStats;
+ try {
+ return colStats.clone();
+ } catch (CloneNotSupportedException e) {
+ return null;
+ }
}
// virtual columns
http://git-wip-us.apache.org/repos/asf/hive/blob/e941e63c/ql/src/test/queries/clientpositive/join_max_hashtable.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/join_max_hashtable.q b/ql/src/test/queries/clientpositive/join_max_hashtable.q
new file mode 100644
index 0000000..9c30a0d
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/join_max_hashtable.q
@@ -0,0 +1,37 @@
+set hive.auto.convert.join=true;
+set hive.optimize.dynamic.partition.hashjoin=true;
+set hive.auto.convert.join.hashtable.max.entries=500;
+
+-- CONVERT
+EXPLAIN
+SELECT x.key, x.value
+FROM src x JOIN src y ON (x.key = y.key);
+
+-- CONVERT
+EXPLAIN
+SELECT x.key, x.value
+FROM src x JOIN src y ON (x.key = y.key AND x.value = y.value);
+
+set hive.auto.convert.join.hashtable.max.entries=300;
+
+-- CONVERT
+EXPLAIN
+SELECT x.key, x.value
+FROM src x JOIN src y ON (x.key = y.key);
+
+-- DO NOT CONVERT
+EXPLAIN
+SELECT x.key, x.value
+FROM src x JOIN src y ON (x.key = y.key AND x.value = y.value);
+
+set hive.auto.convert.join.hashtable.max.entries=10;
+
+-- DO NOT CONVERT
+EXPLAIN
+SELECT x.key, x.value
+FROM src x JOIN src y ON (x.key = y.key);
+
+-- DO NOT CONVERT
+EXPLAIN
+SELECT x.key, x.value
+FROM src x JOIN src y ON (x.key = y.key AND x.value = y.value);
http://git-wip-us.apache.org/repos/asf/hive/blob/e941e63c/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out b/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out
new file mode 100644
index 0000000..85d45fe
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out
@@ -0,0 +1,490 @@
+PREHOOK: query: EXPLAIN
+SELECT x.key, x.value
+FROM src x JOIN src y ON (x.key = y.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT x.key, x.value
+FROM src x JOIN src y ON (x.key = y.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Map 2 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: x
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1
+ input vertices:
+ 1 Map 2
+ Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: y
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN
+SELECT x.key, x.value
+FROM src x JOIN src y ON (x.key = y.key AND x.value = y.value)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT x.key, x.value
+FROM src x JOIN src y ON (x.key = y.key AND x.value = y.value)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Map 2 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: x
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key is not null and value is not null) (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string), _col1 (type: string)
+ 1 _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ input vertices:
+ 1 Map 2
+ Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: y
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key is not null and value is not null) (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN
+SELECT x.key, x.value
+FROM src x JOIN src y ON (x.key = y.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT x.key, x.value
+FROM src x JOIN src y ON (x.key = y.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Map 2 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: x
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1
+ input vertices:
+ 1 Map 2
+ Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: y
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN
+SELECT x.key, x.value
+FROM src x JOIN src y ON (x.key = y.key AND x.value = y.value)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT x.key, x.value
+FROM src x JOIN src y ON (x.key = y.key AND x.value = y.value)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: x
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key is not null and value is not null) (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: y
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key is not null and value is not null) (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string)
+ 1 KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string)
+ outputColumnNames: _col0, _col1
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE
+ HybridGraceHashJoin: true
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN
+SELECT x.key, x.value
+FROM src x JOIN src y ON (x.key = y.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT x.key, x.value
+FROM src x JOIN src y ON (x.key = y.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: x
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: y
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 KEY.reducesinkkey0 (type: string)
+ 1 KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE
+ HybridGraceHashJoin: true
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN
+SELECT x.key, x.value
+FROM src x JOIN src y ON (x.key = y.key AND x.value = y.value)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT x.key, x.value
+FROM src x JOIN src y ON (x.key = y.key AND x.value = y.value)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: x
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key is not null and value is not null) (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: y
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key is not null and value is not null) (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string)
+ 1 KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string)
+ outputColumnNames: _col0, _col1
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE
+ HybridGraceHashJoin: true
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
[27/50] [abbrv] hive git commit: HIVE-15948 : Failing test:
TestCliDriver, TestSparkCliDriver join31 (Sahil Takiar via Rui Li)
Posted by se...@apache.org.
HIVE-15948 : Failing test: TestCliDriver, TestSparkCliDriver join31 (Sahil Takiar via Rui Li)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/32ab6c08
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/32ab6c08
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/32ab6c08
Branch: refs/heads/hive-14535
Commit: 32ab6c08239712ef1a6ca5ab2e70e3ba25105469
Parents: e17a040
Author: Sahil Takiar <ta...@gmail.com>
Authored: Tue Feb 21 13:10:24 2017 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Tue Feb 21 13:10:24 2017 -0800
----------------------------------------------------------------------
ql/src/test/queries/clientpositive/cbo_rp_auto_join1.q | 2 +-
ql/src/test/queries/clientpositive/join31.q | 2 +-
ql/src/test/queries/clientpositive/multiMapJoin2.q | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/32ab6c08/ql/src/test/queries/clientpositive/cbo_rp_auto_join1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/cbo_rp_auto_join1.q b/ql/src/test/queries/clientpositive/cbo_rp_auto_join1.q
index 8936073..e9e434c 100644
--- a/ql/src/test/queries/clientpositive/cbo_rp_auto_join1.q
+++ b/ql/src/test/queries/clientpositive/cbo_rp_auto_join1.q
@@ -1,6 +1,6 @@
set hive.cbo.returnpath.hiveop=true;
set hive.stats.fetch.column.stats=true;
-set hive.enable.semijoin.conversion=true;
+set hive.optimize.semijoin.conversion=true;
;
set hive.exec.reducers.max = 1;
http://git-wip-us.apache.org/repos/asf/hive/blob/32ab6c08/ql/src/test/queries/clientpositive/join31.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/join31.q b/ql/src/test/queries/clientpositive/join31.q
index aa17b4d..62407a0 100644
--- a/ql/src/test/queries/clientpositive/join31.q
+++ b/ql/src/test/queries/clientpositive/join31.q
@@ -1,5 +1,5 @@
set hive.mapred.mode=nonstrict;
-set hive.enable.semijoin.conversion=true;
+set hive.optimize.semijoin.conversion=true;
-- SORT_QUERY_RESULTS
CREATE TABLE dest_j1(key STRING, cnt INT);
http://git-wip-us.apache.org/repos/asf/hive/blob/32ab6c08/ql/src/test/queries/clientpositive/multiMapJoin2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/multiMapJoin2.q b/ql/src/test/queries/clientpositive/multiMapJoin2.q
index 38ab1a7..14b2eda 100644
--- a/ql/src/test/queries/clientpositive/multiMapJoin2.q
+++ b/ql/src/test/queries/clientpositive/multiMapJoin2.q
@@ -3,7 +3,7 @@ set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecutePrinter,org.
set hive.auto.convert.join=true;
set hive.auto.convert.join.noconditionaltask=true;
set hive.auto.convert.join.noconditionaltask.size=6000;
-set hive.enable.semijoin.conversion=true;
+set hive.optimize.semijoin.conversion=true;
-- we will generate one MR job.
EXPLAIN
[26/50] [abbrv] hive git commit: HIVE-15847 : In Progress update
refreshes seem slow (Anishek Agarwal, via THejas Nair)
Posted by se...@apache.org.
HIVE-15847 : In Progress update refreshes seem slow (Anishek Agarwal, via THejas Nair)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e17a0409
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e17a0409
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e17a0409
Branch: refs/heads/hive-14535
Commit: e17a0409c920bcf75d8915047c3318d621e2ced0
Parents: 7fa8e37
Author: Anishek Agarwal <an...@gmail.com>
Authored: Tue Feb 21 12:48:19 2017 -0800
Committer: Thejas M Nair <th...@hortonworks.com>
Committed: Tue Feb 21 12:48:25 2017 -0800
----------------------------------------------------------------------
.../hadoop/hive/common/log/InPlaceUpdate.java | 1 +
.../hive/ql/exec/tez/monitoring/DAGSummary.java | 12 +-
.../ql/exec/tez/monitoring/RenderStrategy.java | 154 +++++++++++++++++++
.../ql/exec/tez/monitoring/TezJobMonitor.java | 110 ++-----------
4 files changed, 168 insertions(+), 109 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/e17a0409/common/src/java/org/apache/hadoop/hive/common/log/InPlaceUpdate.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/common/log/InPlaceUpdate.java b/common/src/java/org/apache/hadoop/hive/common/log/InPlaceUpdate.java
index bfdb4fa..6db5c18 100644
--- a/common/src/java/org/apache/hadoop/hive/common/log/InPlaceUpdate.java
+++ b/common/src/java/org/apache/hadoop/hive/common/log/InPlaceUpdate.java
@@ -163,6 +163,7 @@ public class InPlaceUpdate {
progressStr,
elapsedTime);
+ reprintLine(SEPARATOR);
reprintLineWithColorAsBold(footer, Ansi.Color.RED);
reprintLine(SEPARATOR);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/e17a0409/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/DAGSummary.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/DAGSummary.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/DAGSummary.java
index 5840ad6..1400be4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/DAGSummary.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/DAGSummary.java
@@ -26,16 +26,11 @@ import java.util.*;
class DAGSummary implements PrintSummary {
- private static final int FILE_HEADER_SEPARATOR_WIDTH = InPlaceUpdate.MIN_TERMINAL_WIDTH + 34;
- private static final String FILE_HEADER_SEPARATOR = new String(new char[FILE_HEADER_SEPARATOR_WIDTH]).replace("\0", "-");
-
- private static final String FORMATTING_PATTERN = "%10s %12s %16s %13s %14s %13s %12s %14s %15s";
+ private static final String FILE_HEADER_SEPARATOR = new String(new char[InPlaceUpdate.MIN_TERMINAL_WIDTH]).replace("\0", "-");
+ private static final String FORMATTING_PATTERN = "%10s %17s %14s %14s %15s %16s";
private static final String FILE_HEADER = String.format(
FORMATTING_PATTERN,
"VERTICES",
- "TOTAL_TASKS",
- "FAILED_ATTEMPTS",
- "KILLED_TASKS",
"DURATION(ms)",
"CPU_TIME(ms)",
"GC_TIME(ms)",
@@ -170,9 +165,6 @@ class DAGSummary implements PrintSummary {
return String.format(FORMATTING_PATTERN,
vertexName,
- progress.getTotalTaskCount(),
- progress.getFailedTaskAttemptCount(),
- progress.getKilledTaskAttemptCount(),
secondsFormatter.format((duration)),
commaFormatter.format(cpuTimeMillis),
commaFormatter.format(gcTimeMillis),
http://git-wip-us.apache.org/repos/asf/hive/blob/e17a0409/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/RenderStrategy.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/RenderStrategy.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/RenderStrategy.java
new file mode 100644
index 0000000..bb9a5e7
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/RenderStrategy.java
@@ -0,0 +1,154 @@
+package org.apache.hadoop.hive.ql.exec.tez.monitoring;
+
+import org.apache.hadoop.hive.common.log.InPlaceUpdate;
+import org.apache.hadoop.hive.common.log.ProgressMonitor;
+import org.apache.hadoop.hive.ql.log.PerfLogger;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.tez.dag.api.client.DAGStatus;
+import org.apache.tez.dag.api.client.Progress;
+
+import java.io.StringWriter;
+import java.util.Map;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+class RenderStrategy {
+
+ interface UpdateFunction {
+ void update(DAGStatus status, Map<String, Progress> vertexProgressMap);
+ }
+
+ private abstract static class BaseUpdateFunction implements UpdateFunction {
+ private static final int PRINT_INTERVAL = 3000;
+
+ final TezJobMonitor monitor;
+ private final PerfLogger perfLogger;
+
+ private long lastPrintTime = 0L;
+ private String lastReport = null;
+
+ BaseUpdateFunction(TezJobMonitor monitor) {
+ this.monitor = monitor;
+ perfLogger = SessionState.getPerfLogger();
+ }
+
+ @Override
+ public void update(DAGStatus status, Map<String, Progress> vertexProgressMap) {
+ renderProgress(monitor.progressMonitor(status, vertexProgressMap));
+ String report = getReport(vertexProgressMap);
+ if (showReport(report)) {
+ renderReport(report);
+ lastReport = report;
+ lastPrintTime = System.currentTimeMillis();
+ }
+ }
+
+ private boolean showReport(String report) {
+ return !report.equals(lastReport)
+ || System.currentTimeMillis() >= lastPrintTime + PRINT_INTERVAL;
+ }
+
+ private String getReport(Map<String, Progress> progressMap) {
+ StringWriter reportBuffer = new StringWriter();
+
+ SortedSet<String> keys = new TreeSet<String>(progressMap.keySet());
+ for (String s : keys) {
+ Progress progress = progressMap.get(s);
+ final int complete = progress.getSucceededTaskCount();
+ final int total = progress.getTotalTaskCount();
+ final int running = progress.getRunningTaskCount();
+ final int failed = progress.getFailedTaskAttemptCount();
+ if (total <= 0) {
+ reportBuffer.append(String.format("%s: -/-\t", s));
+ } else {
+ if (complete == total) {
+ /*
+ * We may have missed the start of the vertex due to the 3 seconds interval
+ */
+ if (!perfLogger.startTimeHasMethod(PerfLogger.TEZ_RUN_VERTEX + s)) {
+ perfLogger.PerfLogBegin(TezJobMonitor.CLASS_NAME, PerfLogger.TEZ_RUN_VERTEX + s);
+ }
+
+ perfLogger.PerfLogEnd(TezJobMonitor.CLASS_NAME, PerfLogger.TEZ_RUN_VERTEX + s);
+ }
+ if (complete < total && (complete > 0 || running > 0 || failed > 0)) {
+
+ if (!perfLogger.startTimeHasMethod(PerfLogger.TEZ_RUN_VERTEX + s)) {
+ perfLogger.PerfLogBegin(TezJobMonitor.CLASS_NAME, PerfLogger.TEZ_RUN_VERTEX + s);
+ }
+
+ /* vertex is started, but not complete */
+ if (failed > 0) {
+ reportBuffer.append(
+ String.format("%s: %d(+%d,-%d)/%d\t", s, complete, running, failed, total));
+ } else {
+ reportBuffer.append(String.format("%s: %d(+%d)/%d\t", s, complete, running, total));
+ }
+ } else {
+ /* vertex is waiting for input/slots or complete */
+ if (failed > 0) {
+ /* tasks finished but some failed */
+ reportBuffer.append(String.format("%s: %d(-%d)/%d\t", s, complete, failed, total));
+ } else {
+ reportBuffer.append(String.format("%s: %d/%d\t", s, complete, total));
+ }
+ }
+ }
+ }
+
+ return reportBuffer.toString();
+ }
+
+ abstract void renderProgress(ProgressMonitor progressMonitor);
+
+ abstract void renderReport(String report);
+ }
+
+ /**
+ * this adds the required progress update to the session state that is used by HS2 to send the
+ * same information to beeline client when requested.
+ */
+ static class LogToFileFunction extends BaseUpdateFunction {
+
+ LogToFileFunction(TezJobMonitor monitor) {
+ super(monitor);
+ }
+
+ @Override
+ public void renderProgress(ProgressMonitor progressMonitor) {
+ SessionState.get().updateProgressMonitor(progressMonitor);
+ }
+
+ @Override
+ public void renderReport(String report) {
+ monitor.console.printInfo(report);
+ }
+ }
+
+ /**
+ * This used when we want the progress update to printed in the same process typically used via
+ * hive-cli mode.
+ */
+ static class InPlaceUpdateFunction extends BaseUpdateFunction {
+ /**
+ * Have to use the same instance to render else the number lines printed earlier is lost and the
+ * screen will print the table again and again.
+ */
+ private final InPlaceUpdate inPlaceUpdate;
+
+ InPlaceUpdateFunction(TezJobMonitor monitor) {
+ super(monitor);
+ inPlaceUpdate = new InPlaceUpdate(SessionState.LogHelper.getInfoStream());
+ }
+
+ @Override
+ public void renderProgress(ProgressMonitor progressMonitor) {
+ inPlaceUpdate.render(progressMonitor);
+ }
+
+ @Override
+ public void renderReport(String report) {
+ monitor.console.logInfo(report);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/e17a0409/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/TezJobMonitor.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/TezJobMonitor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/TezJobMonitor.java
index c0a068d..f2f97f3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/TezJobMonitor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/monitoring/TezJobMonitor.java
@@ -47,8 +47,6 @@ import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
-import java.util.SortedSet;
-import java.util.TreeSet;
import static org.apache.tez.dag.api.client.DAGStatus.State.RUNNING;
@@ -59,24 +57,18 @@ import static org.apache.tez.dag.api.client.DAGStatus.State.RUNNING;
*/
public class TezJobMonitor {
- private static final String CLASS_NAME = TezJobMonitor.class.getName();
+ static final String CLASS_NAME = TezJobMonitor.class.getName();
private static final int CHECK_INTERVAL = 200;
private static final int MAX_RETRY_INTERVAL = 2500;
- private static final int PRINT_INTERVAL = 3000;
private final PerfLogger perfLogger = SessionState.getPerfLogger();
private static final List<DAGClient> shutdownList;
private final Map<String, BaseWork> workMap;
- private transient LogHelper console;
+ transient LogHelper console;
- private long lastPrintTime;
private StringWriter diagnostics = new StringWriter();
- interface UpdateFunction {
- void update(DAGStatus status, Map<String, Progress> vertexProgressMap, String report);
- }
-
static {
shutdownList = new LinkedList<>();
ShutdownHookManager.addShutdownHook(new Runnable() {
@@ -102,12 +94,7 @@ public class TezJobMonitor {
private final DAG dag;
private final Context context;
private long executionStartTime = 0;
- private final UpdateFunction updateFunction;
- /**
- * Have to use the same instance to render else the number lines printed earlier is lost and the
- * screen will print the table again and again.
- */
- private final InPlaceUpdate inPlaceUpdate;
+ private final RenderStrategy.UpdateFunction updateFunction;
public TezJobMonitor(Map<String, BaseWork> workMap, final DAGClient dagClient, HiveConf conf, DAG dag,
Context ctx) {
@@ -117,29 +104,15 @@ public class TezJobMonitor {
this.dag = dag;
this.context = ctx;
console = SessionState.getConsole();
- inPlaceUpdate = new InPlaceUpdate(LogHelper.getInfoStream());
updateFunction = updateFunction();
}
- private UpdateFunction updateFunction() {
- UpdateFunction logToFileFunction = new UpdateFunction() {
- @Override
- public void update(DAGStatus status, Map<String, Progress> vertexProgressMap, String report) {
- SessionState.get().updateProgressMonitor(progressMonitor(status, vertexProgressMap));
- console.printInfo(report);
- }
- };
- UpdateFunction inPlaceUpdateFunction = new UpdateFunction() {
- @Override
- public void update(DAGStatus status, Map<String, Progress> vertexProgressMap, String report) {
- inPlaceUpdate.render(progressMonitor(status, vertexProgressMap));
- console.logInfo(report);
- }
- };
+ private RenderStrategy.UpdateFunction updateFunction() {
return InPlaceUpdate.canRenderInPlace(hiveConf)
&& !SessionState.getConsole().getIsSilent()
&& !SessionState.get().isHiveServerQuery()
- ? inPlaceUpdateFunction : logToFileFunction;
+ ? new RenderStrategy.InPlaceUpdateFunction(this)
+ : new RenderStrategy.LogToFileFunction(this);
}
private boolean isProfilingEnabled() {
@@ -163,7 +136,6 @@ public class TezJobMonitor {
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_RUN_DAG);
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_SUBMIT_TO_RUNNING);
DAGStatus.State lastState = null;
- String lastReport = null;
boolean running = false;
while (true) {
@@ -195,13 +167,13 @@ public class TezJobMonitor {
this.executionStartTime = System.currentTimeMillis();
running = true;
}
- lastReport = updateStatus(status, vertexProgressMap, lastReport);
+ updateFunction.update(status, vertexProgressMap);
break;
case SUCCEEDED:
if (!running) {
this.executionStartTime = monitorStartTime;
}
- lastReport = updateStatus(status, vertexProgressMap, lastReport);
+ updateFunction.update(status, vertexProgressMap);
success = true;
running = false;
done = true;
@@ -210,7 +182,7 @@ public class TezJobMonitor {
if (!running) {
this.executionStartTime = monitorStartTime;
}
- lastReport = updateStatus(status, vertexProgressMap, lastReport);
+ updateFunction.update(status, vertexProgressMap);
console.printInfo("Status: Killed");
running = false;
done = true;
@@ -221,7 +193,7 @@ public class TezJobMonitor {
if (!running) {
this.executionStartTime = monitorStartTime;
}
- lastReport = updateStatus(status, vertexProgressMap, lastReport);
+ updateFunction.update(status, vertexProgressMap);
console.printError("Status: Failed");
running = false;
done = true;
@@ -323,71 +295,11 @@ public class TezJobMonitor {
return (tezCounter == null) ? 0 : tezCounter.getValue();
}
- private String updateStatus(DAGStatus status, Map<String, Progress> vertexProgressMap,
- String lastReport) {
- String report = getReport(vertexProgressMap);
- if (!report.equals(lastReport) || System.currentTimeMillis() >= lastPrintTime + PRINT_INTERVAL) {
- updateFunction.update(status, vertexProgressMap, report);
- lastPrintTime = System.currentTimeMillis();
- }
- return report;
- }
-
- private String getReport(Map<String, Progress> progressMap) {
- StringBuilder reportBuffer = new StringBuilder();
-
- SortedSet<String> keys = new TreeSet<String>(progressMap.keySet());
- for (String s : keys) {
- Progress progress = progressMap.get(s);
- final int complete = progress.getSucceededTaskCount();
- final int total = progress.getTotalTaskCount();
- final int running = progress.getRunningTaskCount();
- final int failed = progress.getFailedTaskAttemptCount();
- if (total <= 0) {
- reportBuffer.append(String.format("%s: -/-\t", s));
- } else {
- if (complete == total) {
- /*
- * We may have missed the start of the vertex due to the 3 seconds interval
- */
- if (!perfLogger.startTimeHasMethod(PerfLogger.TEZ_RUN_VERTEX + s)) {
- perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_RUN_VERTEX + s);
- }
-
- perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_RUN_VERTEX + s);
- }
- if (complete < total && (complete > 0 || running > 0 || failed > 0)) {
-
- if (!perfLogger.startTimeHasMethod(PerfLogger.TEZ_RUN_VERTEX + s)) {
- perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_RUN_VERTEX + s);
- }
-
- /* vertex is started, but not complete */
- if (failed > 0) {
- reportBuffer.append(String.format("%s: %d(+%d,-%d)/%d\t", s, complete, running, failed, total));
- } else {
- reportBuffer.append(String.format("%s: %d(+%d)/%d\t", s, complete, running, total));
- }
- } else {
- /* vertex is waiting for input/slots or complete */
- if (failed > 0) {
- /* tasks finished but some failed */
- reportBuffer.append(String.format("%s: %d(-%d)/%d\t", s, complete, failed, total));
- } else {
- reportBuffer.append(String.format("%s: %d/%d\t", s, complete, total));
- }
- }
- }
- }
-
- return reportBuffer.toString();
- }
-
public String getDiagnostics() {
return diagnostics.toString();
}
- private ProgressMonitor progressMonitor(DAGStatus status, Map<String, Progress> progressMap) {
+ ProgressMonitor progressMonitor(DAGStatus status, Map<String, Progress> progressMap) {
try {
return new TezProgressMonitor(dagClient, status, workMap, progressMap, console,
executionStartTime);
[25/50] [abbrv] hive git commit: HIVE-15992: LLAP: NPE in
LlapTaskCommunicator.getCompletedLogsUrl for unsuccessful attempt (Rajesh
Balamohan reviewed by Prasanth Jayachandran)
Posted by se...@apache.org.
HIVE-15992: LLAP: NPE in LlapTaskCommunicator.getCompletedLogsUrl for unsuccessful attempt (Rajesh Balamohan reviewed by Prasanth Jayachandran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7fa8e37f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7fa8e37f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7fa8e37f
Branch: refs/heads/hive-14535
Commit: 7fa8e37fd13d9d6a4a4a5b2c72ce02d7c2d199ef
Parents: bda64ee
Author: Prasanth Jayachandran <pr...@apache.org>
Authored: Tue Feb 21 10:24:28 2017 -0800
Committer: Prasanth Jayachandran <pr...@apache.org>
Committed: Tue Feb 21 10:24:28 2017 -0800
----------------------------------------------------------------------
.../apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/7fa8e37f/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java
----------------------------------------------------------------------
diff --git a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java
index 893b7d9..3aae7a4 100644
--- a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java
+++ b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java
@@ -541,7 +541,7 @@ public class LlapTaskCommunicator extends TezTaskCommunicatorImpl {
@Override
public String getInProgressLogsUrl(TezTaskAttemptID attemptID, NodeId containerNodeId) {
String url = "";
- if (timelineServerUri != null) {
+ if (timelineServerUri != null && containerNodeId != null) {
LlapNodeId llapNodeId = LlapNodeId.getInstance(containerNodeId.getHost(), containerNodeId.getPort());
BiMap<ContainerId, TezTaskAttemptID> biMap = entityTracker.getContainerAttemptMapForNode(llapNodeId);
ContainerId containerId = biMap.inverse().get(attemptID);
@@ -559,7 +559,7 @@ public class LlapTaskCommunicator extends TezTaskCommunicatorImpl {
@Override
public String getCompletedLogsUrl(TezTaskAttemptID attemptID, NodeId containerNodeId) {
String url = "";
- if (timelineServerUri != null) {
+ if (timelineServerUri != null && containerNodeId != null) {
LlapNodeId llapNodeId = LlapNodeId.getInstance(containerNodeId.getHost(), containerNodeId.getPort());
BiMap<ContainerId, TezTaskAttemptID> biMap = entityTracker.getContainerAttemptMapForNode(llapNodeId);
ContainerId containerId = biMap.inverse().get(attemptID);
[38/50] [abbrv] hive git commit: HIVE-16002 : Correlated IN subquery
with aggregate asserts in sq_count_check UDF (Vineet Garg via Ashutosh
Chauhan)
Posted by se...@apache.org.
HIVE-16002 : Correlated IN subquery with aggregate asserts in sq_count_check UDF (Vineet Garg via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/89310fee
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/89310fee
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/89310fee
Branch: refs/heads/hive-14535
Commit: 89310fee3ab7fa8cf4347850aaa03a57e10e78ba
Parents: ede8a55
Author: Vineet Garg <vg...@hortonworks.com>
Authored: Wed Feb 22 18:15:40 2017 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Wed Feb 22 18:16:52 2017 -0800
----------------------------------------------------------------------
.../ql/udf/generic/GenericUDFSQCountCheck.java | 11 ++++--
.../clientnegative/subquery_corr_in_agg.q | 8 ++++
.../clientnegative/subquery_corr_in_agg.q.out | 39 ++++++++++++++++++++
3 files changed, 54 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/89310fee/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSQCountCheck.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSQCountCheck.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSQCountCheck.java
index 89fa0de..f5d9f82 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSQCountCheck.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFSQCountCheck.java
@@ -61,11 +61,8 @@ public class GenericUDFSQCountCheck extends GenericUDF {
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
- Object valObject = arguments[0].get();
- assert(valObject != null);
Long val = getLongValue(arguments, 0, converters);
- assert(val >= 0);
switch (arguments.length){
case 1: //Scalar queries, should expect value/count less than 1
@@ -75,7 +72,13 @@ public class GenericUDFSQCountCheck extends GenericUDF {
}
break;
case 2:
- if (val == 0) { // IN/NOT IN subqueries with aggregate
+ Object valObject = arguments[0].get();
+ if( valObject != null
+ && getLongValue(arguments, 0, converters) == 0){
+ throw new UDFArgumentException(
+ " IN/NOT IN subquery with aggregate returning zero result. Currently this is not supported.");
+ }
+ else if(valObject == null) {
throw new UDFArgumentException(
" IN/NOT IN subquery with aggregate returning zero result. Currently this is not supported.");
}
http://git-wip-us.apache.org/repos/asf/hive/blob/89310fee/ql/src/test/queries/clientnegative/subquery_corr_in_agg.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/subquery_corr_in_agg.q b/ql/src/test/queries/clientnegative/subquery_corr_in_agg.q
new file mode 100644
index 0000000..f677fba
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/subquery_corr_in_agg.q
@@ -0,0 +1,8 @@
+create table Part1 (PNum int, OrderOnHand int);
+insert into Part1 values (3,6),(10,1),(8,0);
+create table Supply (PNum int, Qty int);
+insert into Supply values (3,4),(3,2),(10,1);
+
+
+select pnum from Part1 p where OrderOnHand in
+ (select count(*) from Supply s where s.pnum = p.pnum);
http://git-wip-us.apache.org/repos/asf/hive/blob/89310fee/ql/src/test/results/clientnegative/subquery_corr_in_agg.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/subquery_corr_in_agg.q.out b/ql/src/test/results/clientnegative/subquery_corr_in_agg.q.out
new file mode 100644
index 0000000..36019cb
--- /dev/null
+++ b/ql/src/test/results/clientnegative/subquery_corr_in_agg.q.out
@@ -0,0 +1,39 @@
+PREHOOK: query: create table Part1 (PNum int, OrderOnHand int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@Part1
+POSTHOOK: query: create table Part1 (PNum int, OrderOnHand int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@Part1
+PREHOOK: query: insert into Part1 values (3,6),(10,1),(8,0)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@part1
+POSTHOOK: query: insert into Part1 values (3,6),(10,1),(8,0)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@part1
+POSTHOOK: Lineage: part1.orderonhand EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: part1.pnum EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: create table Supply (PNum int, Qty int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@Supply
+POSTHOOK: query: create table Supply (PNum int, Qty int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@Supply
+PREHOOK: query: insert into Supply values (3,4),(3,2),(10,1)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@supply
+POSTHOOK: query: insert into Supply values (3,4),(3,2),(10,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@supply
+POSTHOOK: Lineage: supply.pnum EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: supply.qty EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: select pnum from Part1 p where OrderOnHand in
+ (select count(*) from Supply s where s.pnum = p.pnum)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part1
+PREHOOK: Input: default@supply
+#### A masked pattern was here ####
+FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask
[09/50] [abbrv] hive git commit: HIVE-15915: Emit progress percentage
in getting operation status (Jimmy Xiang, reviewed by Xuefu Zhang)
Posted by se...@apache.org.
HIVE-15915: Emit progress percentage in getting operation status (Jimmy Xiang, reviewed by Xuefu Zhang)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1677ed95
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1677ed95
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1677ed95
Branch: refs/heads/hive-14535
Commit: 1677ed956edb660c02b0d15253d486e2b0b626ba
Parents: bb4d8db
Author: Jimmy Xiang <jx...@apache.org>
Authored: Tue Feb 14 10:27:14 2017 -0800
Committer: Jimmy Xiang <jx...@apache.org>
Committed: Fri Feb 17 09:48:37 2017 -0800
----------------------------------------------------------------------
.../hive/ql/exec/mr/HadoopJobExecHelper.java | 6 +++-
.../ql/exec/spark/status/SparkJobMonitor.java | 12 +++++++
.../hadoop/hive/ql/session/SessionState.java | 36 +++++++++++++++++++-
.../service/cli/thrift/ThriftCLIService.java | 24 +++++++++++--
4 files changed, 74 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/1677ed95/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HadoopJobExecHelper.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HadoopJobExecHelper.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HadoopJobExecHelper.java
index 41887d7..3c07197 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HadoopJobExecHelper.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HadoopJobExecHelper.java
@@ -92,6 +92,11 @@ public class HadoopJobExecHelper {
reduceProgress = reduceProgress == 100 ? (int)Math.floor(rj.reduceProgress() * 100) : reduceProgress;
task.taskCounters.put("CNTR_NAME_" + task.getId() + "_MAP_PROGRESS", Long.valueOf(mapProgress));
task.taskCounters.put("CNTR_NAME_" + task.getId() + "_REDUCE_PROGRESS", Long.valueOf(reduceProgress));
+
+ if (SessionState.get() != null) {
+ final float progress = (rj.mapProgress() + rj.reduceProgress()) * 0.5f;
+ SessionState.get().updateProgressedPercentage(progress);
+ }
}
/**
@@ -196,7 +201,6 @@ public class HadoopJobExecHelper {
}
}
- @SuppressWarnings("deprecation")
public boolean checkFatalErrors(Counters ctrs, StringBuilder errMsg) {
if (ctrs == null) {
// hadoop might return null if it cannot locate the job.
http://git-wip-us.apache.org/repos/asf/hive/blob/1677ed95/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/SparkJobMonitor.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/SparkJobMonitor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/SparkJobMonitor.java
index cf0162d..0b224f2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/SparkJobMonitor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/status/SparkJobMonitor.java
@@ -179,6 +179,10 @@ abstract class SparkJobMonitor {
String currentDate = dt.format(new Date());
reportBuffer.append(currentDate + "\t");
+ // Num of total and completed tasks
+ int sumTotal = 0;
+ int sumComplete = 0;
+
SortedSet<String> keys = new TreeSet<String>(progressMap.keySet());
for (String s : keys) {
SparkStageProgress progress = progressMap.get(s);
@@ -186,6 +190,9 @@ abstract class SparkJobMonitor {
final int total = progress.getTotalTaskCount();
final int running = progress.getRunningTaskCount();
final int failed = progress.getFailedTaskCount();
+ sumTotal += total;
+ sumComplete += complete;
+
String stageName = "Stage-" + s;
if (total <= 0) {
reportBuffer.append(String.format("%s: -/-\t", stageName));
@@ -230,6 +237,11 @@ abstract class SparkJobMonitor {
}
}
}
+
+ if (SessionState.get() != null) {
+ final float progress = (sumTotal == 0) ? 1.0f : (float) sumComplete / (float) sumTotal;
+ SessionState.get().updateProgressedPercentage(progress);
+ }
return reportBuffer.toString();
}
http://git-wip-us.apache.org/repos/asf/hive/blob/1677ed95/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
index 3e01e92..ba2c9c3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
@@ -51,6 +51,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.common.JavaUtils;
+import org.apache.hadoop.hive.common.log.ProgressMonitor;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.ObjectStore;
@@ -69,7 +70,6 @@ import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager;
import org.apache.hadoop.hive.ql.lockmgr.LockException;
import org.apache.hadoop.hive.ql.lockmgr.TxnManagerFactory;
import org.apache.hadoop.hive.ql.log.PerfLogger;
-import org.apache.hadoop.hive.common.log.ProgressMonitor;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.HiveUtils;
@@ -1748,6 +1748,40 @@ public class SessionState {
return StringUtils.join(preReloadableAuxJars, ',');
}
+ public void updateProgressedPercentage(final double percentage) {
+ this.progressMonitor = new ProgressMonitor() {
+ @Override
+ public List<String> headers() {
+ return null;
+ }
+
+ @Override
+ public List<List<String>> rows() {
+ return null;
+ }
+
+ @Override
+ public String footerSummary() {
+ return null;
+ }
+
+ @Override
+ public long startTime() {
+ return 0;
+ }
+
+ @Override
+ public String executionStatus() {
+ return null;
+ }
+
+ @Override
+ public double progressedPercentage() {
+ return percentage;
+ }
+ };
+ }
+
public void updateProgressMonitor(ProgressMonitor progressMonitor) {
this.progressMonitor = progressMonitor;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/1677ed95/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
----------------------------------------------------------------------
diff --git a/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java b/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
index e09d9fe..211b33b 100644
--- a/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
+++ b/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
@@ -18,6 +18,8 @@
package org.apache.hive.service.cli.thrift;
+import static com.google.common.base.Preconditions.checkArgument;
+
import java.io.IOException;
import java.net.InetAddress;
import java.net.UnknownHostException;
@@ -27,9 +29,11 @@ import java.util.concurrent.TimeUnit;
import javax.security.auth.login.LoginException;
+import org.apache.hadoop.hive.common.ServerUtils;
+import org.apache.hadoop.hive.common.log.ProgressMonitor;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
-import org.apache.hadoop.hive.common.ServerUtils;
+import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.shims.HadoopShims.KerberosNameShim;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hive.service.AbstractService;
@@ -46,11 +50,13 @@ import org.apache.hive.service.cli.HiveSQLException;
import org.apache.hive.service.cli.JobProgressUpdate;
import org.apache.hive.service.cli.OperationHandle;
import org.apache.hive.service.cli.OperationStatus;
+import org.apache.hive.service.cli.OperationType;
import org.apache.hive.service.cli.ProgressMonitorStatusMapper;
import org.apache.hive.service.cli.RowSet;
import org.apache.hive.service.cli.SessionHandle;
import org.apache.hive.service.cli.TableSchema;
import org.apache.hive.service.cli.TezProgressMonitorStatusMapper;
+import org.apache.hive.service.cli.operation.Operation;
import org.apache.hive.service.cli.session.SessionManager;
import org.apache.hive.service.rpc.thrift.TCLIService;
import org.apache.hive.service.rpc.thrift.TCancelDelegationTokenReq;
@@ -91,6 +97,7 @@ import org.apache.hive.service.rpc.thrift.TGetTablesReq;
import org.apache.hive.service.rpc.thrift.TGetTablesResp;
import org.apache.hive.service.rpc.thrift.TGetTypeInfoReq;
import org.apache.hive.service.rpc.thrift.TGetTypeInfoResp;
+import org.apache.hive.service.rpc.thrift.TJobExecutionStatus;
import org.apache.hive.service.rpc.thrift.TOpenSessionReq;
import org.apache.hive.service.rpc.thrift.TOpenSessionResp;
import org.apache.hive.service.rpc.thrift.TProgressUpdateResp;
@@ -431,6 +438,13 @@ public abstract class ThriftCLIService extends AbstractService implements TCLISe
return sessionHandle;
}
+ private double getProgressedPercentage(OperationHandle opHandle) throws HiveSQLException {
+ checkArgument(OperationType.EXECUTE_STATEMENT.equals(opHandle.getOperationType()));
+ Operation operation = cliService.getSessionManager().getOperationManager().getOperation(opHandle);
+ SessionState state = operation.getParentSession().getSessionState();
+ ProgressMonitor monitor = state.getProgressMonitor();
+ return monitor == null ? 0.0 : monitor.progressedPercentage();
+ }
private String getDelegationToken(String userName)
throws HiveSQLException, LoginException, IOException {
@@ -646,11 +660,13 @@ public abstract class ThriftCLIService extends AbstractService implements TCLISe
mapper = new TezProgressMonitorStatusMapper();
}
+ TJobExecutionStatus executionStatus =
+ mapper.forStatus(progressUpdate.status);
resp.setProgressUpdateResponse(new TProgressUpdateResp(
progressUpdate.headers(),
progressUpdate.rows(),
progressUpdate.progressedPercentage,
- mapper.forStatus(progressUpdate.status),
+ executionStatus,
progressUpdate.footerSummary,
progressUpdate.startTimeMillis
));
@@ -659,6 +675,10 @@ public abstract class ThriftCLIService extends AbstractService implements TCLISe
resp.setErrorCode(opException.getErrorCode());
resp.setErrorMessage(org.apache.hadoop.util.StringUtils.
stringifyException(opException));
+ } else if (executionStatus == TJobExecutionStatus.NOT_AVAILABLE
+ && OperationType.EXECUTE_STATEMENT.equals(operationHandle.getOperationType())) {
+ resp.getProgressUpdateResponse().setProgressedPercentage(
+ getProgressedPercentage(operationHandle));
}
resp.setStatus(OK_STATUS);
} catch (Exception e) {
[17/50] [abbrv] hive git commit: HIVE-15846 : Relocate more
dependencies (e.g. org.apache.zookeeper) for JDBC uber jar (Tao Li via Thejas
Nair)
Posted by se...@apache.org.
HIVE-15846 : Relocate more dependencies (e.g. org.apache.zookeeper) for JDBC uber jar (Tao Li via Thejas Nair)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/56f6c9dd
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/56f6c9dd
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/56f6c9dd
Branch: refs/heads/hive-14535
Commit: 56f6c9dd56e839a57dc93f7ae3c27866d62eb9fc
Parents: 1a6902c
Author: Tao Li <tl...@hortonworks.com>
Authored: Sat Feb 18 21:12:33 2017 -0800
Committer: Thejas M Nair <th...@hortonworks.com>
Committed: Sat Feb 18 21:12:38 2017 -0800
----------------------------------------------------------------------
jdbc/pom.xml | 21 +++++++++++++++++----
1 file changed, 17 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/56f6c9dd/jdbc/pom.xml
----------------------------------------------------------------------
diff --git a/jdbc/pom.xml b/jdbc/pom.xml
index b68f207..25430c6 100644
--- a/jdbc/pom.xml
+++ b/jdbc/pom.xml
@@ -173,19 +173,18 @@
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
+ <exclude>core-default.xml</exclude>
</excludes>
</filter>
</filters>
<artifactSet>
<excludes>
<exclude>org.apache.commons:commons-compress</exclude>
- <exclude>commons-configuration:commons-configuration</exclude>
<exclude>org.apache.hadoop:hadoop-yarn*</exclude>
<exclude>org.apache.hadoop:hadoop-mapreduce*</exclude>
<exclude>org.apache.hadoop:hadoop-hdfs</exclude>
<exclude>org.apache.hadoop:hadoop-client</exclude>
<exclude>org.apache.hadoop:hadoop-annotations</exclude>
- <exclude>org.apache.hadoop:hadoop-auth</exclude>
<exclude>org.apache.hive:hive-vector-code-gen</exclude>
<exclude>org.apache.ant:*</exclude>
<exclude>junit:*</exclude>
@@ -197,6 +196,7 @@
<exclude>org.eclipse.jetty.aggregate:*</exclude>
<exclude>org.tukaani:*</exclude>
<exclude>io.airlift:*</exclude>
+ <exclude>io.dropwizard.metrics:*</exclude>
<exclude>org.apache.velocity:*</exclude>
<exclude>net.sf.jpam:*</exclude>
<exclude>org.apache.avro:*</exclude>
@@ -228,6 +228,7 @@
<exclude>com.sun.xml.bind:*</exclude>
<exclude>com.thoughtworks.paranamer:*</exclude>
<exclude>com.twitter:*</exclude>
+ <exclude>com.zaxxer:*</exclude>
<exclude>io.netty:*</exclude>
<exclude>javax.activation:*</exclude>
<exclude>javax.inject:*</exclude>
@@ -236,8 +237,7 @@
<exclude>javax.servlet:*</exclude>
<exclude>javax.servlet.jsp:*</exclude>
<exclude>javax.transaction:*</exclude>
- <exclude>javax.xml.bind:*</exclude>
- <exclude>javax.xml.stream:*</exclude>
+ <exclude>javax.xml.*:*</exclude>
<exclude>jline:*</exclude>
<exclude>joda-time:*</exclude>
<exclude>net.java.dev.jets3t:*</exclude>
@@ -246,6 +246,8 @@
<exclude>org.apache.directory.api:*</exclude>
<exclude>org.apache.directory.server:*</exclude>
<exclude>org.apache.geronimo.specs:*</exclude>
+ <exclude>org.apache.htrace:*</exclude>
+ <exclude>org.apache.slider:*</exclude>
<exclude>org.codehaus.jackson:*</exclude>
<exclude>org.codehaus.jettison:*</exclude>
<exclude>org.datanucleus:*</exclude>
@@ -253,8 +255,11 @@
<exclude>org.htrace:*</exclude>
<exclude>org.mortbay.jetty:*</exclude>
<exclude>org.xerial.snappy:*</exclude>
+ <exclude>org.json:*</exclude>
<exclude>tomcat:*</exclude>
<exclude>xmlenc:*</exclude>
+ <exclude>xerces:*</exclude>
+ <exclude>xml-apis:*</exclude>
</excludes>
</artifactSet>
<relocations>
@@ -291,6 +296,14 @@
<shadedPattern>org.apache.hive.org.apache.hadoop</shadedPattern>
</relocation>
<relocation>
+ <pattern>org.apache.zookeeper</pattern>
+ <shadedPattern>org.apache.hive.org.apache.zookeeper</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>org.apache.curator</pattern>
+ <shadedPattern>org.apache.hive.org.apache.curator</shadedPattern>
+ </relocation>
+ <relocation>
<pattern>com.beust</pattern>
<shadedPattern>org.apache.hive.com.beust</shadedPattern>
</relocation>
[24/50] [abbrv] hive git commit: HIVE-15910 : Improvements in Hive
Unit Test by using In-memory Derby DB (Sankar Hariappan via Thejas Nair,
Wei Zheng)
Posted by se...@apache.org.
HIVE-15910 : Improvements in Hive Unit Test by using In-memory Derby DB (Sankar Hariappan via Thejas Nair, Wei Zheng)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bda64ee8
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bda64ee8
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bda64ee8
Branch: refs/heads/hive-14535
Commit: bda64ee87c74a06b3cf19b08c41d67f192f22018
Parents: 016afe0
Author: Sankar Hariappan <ma...@gmail.com>
Authored: Mon Feb 20 22:51:29 2017 -0800
Committer: Thejas M Nair <th...@hortonworks.com>
Committed: Mon Feb 20 22:51:34 2017 -0800
----------------------------------------------------------------------
data/conf/hive-site.xml | 2 +-
.../hadoop/hive/ql/txn/compactor/CompactorTest.java | 11 +++++++----
.../apache/hadoop/hive/ql/txn/compactor/TestCleaner.java | 6 ++++++
.../hadoop/hive/ql/txn/compactor/TestInitiator.java | 5 +++++
.../apache/hadoop/hive/ql/txn/compactor/TestWorker.java | 6 ++++++
5 files changed, 25 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/bda64ee8/data/conf/hive-site.xml
----------------------------------------------------------------------
diff --git a/data/conf/hive-site.xml b/data/conf/hive-site.xml
index 7a69711..62364fe 100644
--- a/data/conf/hive-site.xml
+++ b/data/conf/hive-site.xml
@@ -70,7 +70,7 @@
<property>
<name>javax.jdo.option.ConnectionURL</name>
- <value>jdbc:derby:;databaseName=${test.tmp.dir}/junit_metastore_db;create=true</value>
+ <value>jdbc:derby:memory:${test.tmp.dir}/junit_metastore_db;create=true</value>
</property>
<property>
http://git-wip-us.apache.org/repos/asf/hive/blob/bda64ee8/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java
index 2d1ecb5..bbed591 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hive.ql.txn.compactor;
+import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
@@ -66,6 +67,7 @@ import org.slf4j.LoggerFactory;
import java.io.EOFException;
import java.io.File;
import java.io.IOException;
+import java.nio.file.Files;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@@ -96,10 +98,11 @@ public abstract class CompactorTest {
TxnDbUtil.cleanDb();
ms = new HiveMetaStoreClient(conf);
txnHandler = TxnUtils.getTxnStore(conf);
- tmpdir = new File(System.getProperty("java.io.tmpdir") +
- System.getProperty("file.separator") + "compactor_test_tables");
- tmpdir.mkdir();
- tmpdir.deleteOnExit();
+ tmpdir = new File (Files.createTempDirectory("compactor_test_table_").toString());
+ }
+
+ protected void compactorTestCleanup() throws IOException {
+ FileUtils.deleteDirectory(tmpdir);
}
protected void startInitiator() throws Exception {
http://git-wip-us.apache.org/repos/asf/hive/blob/bda64ee8/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestCleaner.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestCleaner.java b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestCleaner.java
index 44dd99b..0acf71b 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestCleaner.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestCleaner.java
@@ -37,6 +37,7 @@ import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.api.UnlockRequest;
import org.apache.hadoop.hive.metastore.txn.CompactionInfo;
import org.apache.hadoop.hive.metastore.txn.TxnStore;
+import org.junit.After;
import org.junit.Assert;
import org.junit.Test;
import org.slf4j.Logger;
@@ -517,4 +518,9 @@ public class TestCleaner extends CompactorTest {
boolean useHive130DeltaDirName() {
return false;
}
+
+ @After
+ public void tearDown() throws Exception {
+ compactorTestCleanup();
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/bda64ee8/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestInitiator.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestInitiator.java b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestInitiator.java
index a11fe86..f75a1be 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestInitiator.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestInitiator.java
@@ -35,6 +35,7 @@ import org.apache.hadoop.hive.metastore.api.ShowCompactResponse;
import org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.txn.TxnStore;
+import org.junit.After;
import org.junit.Assert;
import org.junit.Test;
import org.slf4j.Logger;
@@ -752,4 +753,8 @@ public class TestInitiator extends CompactorTest {
return false;
}
+ @After
+ public void tearDown() throws Exception {
+ compactorTestCleanup();
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/bda64ee8/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestWorker.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestWorker.java b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestWorker.java
index e85f49c..efd6ed8 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestWorker.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestWorker.java
@@ -33,6 +33,7 @@ import org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.txn.TxnStore;
import org.apache.hadoop.hive.ql.io.AcidUtils;
+import org.junit.After;
import org.junit.Assert;
import org.junit.Test;
import org.slf4j.Logger;
@@ -977,4 +978,9 @@ public class TestWorker extends CompactorTest {
Assert.assertEquals(1, compacts.size());
Assert.assertTrue(TxnStore.SUCCEEDED_RESPONSE.equals(rsp.getCompacts().get(0).getState()));
}
+
+ @After
+ public void tearDown() throws Exception {
+ compactorTestCleanup();
+ }
}
[49/50] [abbrv] hive git commit: HIVE-14671 : merge master into
hive-14535 (Sergey Shelukhin)
Posted by se...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/2014ece9/ql/src/test/results/clientpositive/mm_all.q.out
----------------------------------------------------------------------
diff --cc ql/src/test/results/clientpositive/mm_all.q.out
index 4944d6c,0000000..4a9e4e5
mode 100644,000000..100644
--- a/ql/src/test/results/clientpositive/mm_all.q.out
+++ b/ql/src/test/results/clientpositive/mm_all.q.out
@@@ -1,3163 -1,0 +1,3163 @@@
+PREHOOK: query: drop table intermediate
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table intermediate
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table intermediate(key int) partitioned by (p int) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@intermediate
+POSTHOOK: query: create table intermediate(key int) partitioned by (p int) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@intermediate
+PREHOOK: query: insert into table intermediate partition(p='455') select distinct key from src where key >= 0 order by key desc limit 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@intermediate@p=455
+POSTHOOK: query: insert into table intermediate partition(p='455') select distinct key from src where key >= 0 order by key desc limit 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@intermediate@p=455
+POSTHOOK: Lineage: intermediate PARTITION(p=455).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: insert into table intermediate partition(p='456') select distinct key from src where key is not null order by key asc limit 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@intermediate@p=456
+POSTHOOK: query: insert into table intermediate partition(p='456') select distinct key from src where key is not null order by key asc limit 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@intermediate@p=456
+POSTHOOK: Lineage: intermediate PARTITION(p=456).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: insert into table intermediate partition(p='457') select distinct key from src where key >= 100 order by key asc limit 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@intermediate@p=457
+POSTHOOK: query: insert into table intermediate partition(p='457') select distinct key from src where key >= 100 order by key asc limit 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@intermediate@p=457
+POSTHOOK: Lineage: intermediate PARTITION(p=457).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: drop table part_mm
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table part_mm
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table part_mm(key int) partitioned by (key_mm int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@part_mm
+POSTHOOK: query: create table part_mm(key int) partitioned by (key_mm int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@part_mm
+PREHOOK: query: explain insert into table part_mm partition(key_mm=455) select key from intermediate
+PREHOOK: type: QUERY
+POSTHOOK: query: explain insert into table part_mm partition(key_mm=455) select key from intermediate
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
+ Stage-4
+ Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
+ Stage-2 depends on stages: Stage-0
+ Stage-3
+ Stage-5
+ Stage-6 depends on stages: Stage-5
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: intermediate
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.part_mm
+
+ Stage: Stage-7
+ Conditional Operator
+
+ Stage: Stage-4
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ key_mm 455
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.part_mm
+ micromanaged table: true
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+ Stage: Stage-3
+ Merge File Operator
+ Map Operator Tree:
+ ORC File Merge Operator
+ merge level: stripe
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+
+ Stage: Stage-5
+ Merge File Operator
+ Map Operator Tree:
+ ORC File Merge Operator
+ merge level: stripe
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+
+ Stage: Stage-6
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+PREHOOK: query: insert into table part_mm partition(key_mm=455) select key from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@part_mm@key_mm=455
+POSTHOOK: query: insert into table part_mm partition(key_mm=455) select key from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@part_mm@key_mm=455
+POSTHOOK: Lineage: part_mm PARTITION(key_mm=455).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: insert into table part_mm partition(key_mm=456) select key from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@part_mm@key_mm=456
+POSTHOOK: query: insert into table part_mm partition(key_mm=456) select key from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@part_mm@key_mm=456
+POSTHOOK: Lineage: part_mm PARTITION(key_mm=456).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: insert into table part_mm partition(key_mm=455) select key from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@part_mm@key_mm=455
+POSTHOOK: query: insert into table part_mm partition(key_mm=455) select key from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@part_mm@key_mm=455
+POSTHOOK: Lineage: part_mm PARTITION(key_mm=455).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from part_mm order by key, key_mm
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part_mm
+PREHOOK: Input: default@part_mm@key_mm=455
+PREHOOK: Input: default@part_mm@key_mm=456
+#### A masked pattern was here ####
+POSTHOOK: query: select * from part_mm order by key, key_mm
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part_mm
+POSTHOOK: Input: default@part_mm@key_mm=455
+POSTHOOK: Input: default@part_mm@key_mm=456
+#### A masked pattern was here ####
+0 455
+0 455
+0 456
+10 455
+10 455
+10 456
+97 455
+97 455
+97 456
+98 455
+98 455
+98 456
+100 455
+100 455
+100 456
+103 455
+103 455
+103 456
+PREHOOK: query: select * from part_mm order by key, key_mm
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part_mm
+PREHOOK: Input: default@part_mm@key_mm=455
+PREHOOK: Input: default@part_mm@key_mm=456
+#### A masked pattern was here ####
+POSTHOOK: query: select * from part_mm order by key, key_mm
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part_mm
+POSTHOOK: Input: default@part_mm@key_mm=455
+POSTHOOK: Input: default@part_mm@key_mm=456
+#### A masked pattern was here ####
+0 455
+0 455
+0 456
+10 455
+10 455
+10 456
+97 455
+97 455
+97 456
+98 455
+98 455
+98 456
+100 455
+100 455
+100 456
+103 455
+103 455
+103 456
+PREHOOK: query: truncate table part_mm
+PREHOOK: type: TRUNCATETABLE
+PREHOOK: Output: default@part_mm@key_mm=455
+PREHOOK: Output: default@part_mm@key_mm=456
+POSTHOOK: query: truncate table part_mm
+POSTHOOK: type: TRUNCATETABLE
+POSTHOOK: Output: default@part_mm@key_mm=455
+POSTHOOK: Output: default@part_mm@key_mm=456
+PREHOOK: query: select * from part_mm order by key, key_mm
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part_mm
+PREHOOK: Input: default@part_mm@key_mm=455
+PREHOOK: Input: default@part_mm@key_mm=456
+#### A masked pattern was here ####
+POSTHOOK: query: select * from part_mm order by key, key_mm
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part_mm
+POSTHOOK: Input: default@part_mm@key_mm=455
+POSTHOOK: Input: default@part_mm@key_mm=456
+#### A masked pattern was here ####
+PREHOOK: query: drop table part_mm
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@part_mm
+PREHOOK: Output: default@part_mm
+POSTHOOK: query: drop table part_mm
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@part_mm
+POSTHOOK: Output: default@part_mm
+PREHOOK: query: drop table simple_mm
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table simple_mm
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table simple_mm(key int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@simple_mm
+POSTHOOK: query: create table simple_mm(key int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@simple_mm
+PREHOOK: query: insert into table simple_mm select key from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@simple_mm
+POSTHOOK: query: insert into table simple_mm select key from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@simple_mm
+POSTHOOK: Lineage: simple_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: insert overwrite table simple_mm select key from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@simple_mm
+POSTHOOK: query: insert overwrite table simple_mm select key from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@simple_mm
+POSTHOOK: Lineage: simple_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from simple_mm order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@simple_mm
+#### A masked pattern was here ####
+POSTHOOK: query: select * from simple_mm order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@simple_mm
+#### A masked pattern was here ####
+0
+10
+97
+98
+100
+103
+PREHOOK: query: insert into table simple_mm select key from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@simple_mm
+POSTHOOK: query: insert into table simple_mm select key from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@simple_mm
+POSTHOOK: Lineage: simple_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from simple_mm order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@simple_mm
+#### A masked pattern was here ####
+POSTHOOK: query: select * from simple_mm order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@simple_mm
+#### A masked pattern was here ####
+0
+0
+10
+10
+97
+97
+98
+98
+100
+100
+103
+103
+PREHOOK: query: truncate table simple_mm
+PREHOOK: type: TRUNCATETABLE
+PREHOOK: Output: default@simple_mm
+POSTHOOK: query: truncate table simple_mm
+POSTHOOK: type: TRUNCATETABLE
+POSTHOOK: Output: default@simple_mm
+PREHOOK: query: select * from simple_mm
+PREHOOK: type: QUERY
+PREHOOK: Input: default@simple_mm
+#### A masked pattern was here ####
+POSTHOOK: query: select * from simple_mm
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@simple_mm
+#### A masked pattern was here ####
+PREHOOK: query: drop table simple_mm
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@simple_mm
+PREHOOK: Output: default@simple_mm
+POSTHOOK: query: drop table simple_mm
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@simple_mm
+POSTHOOK: Output: default@simple_mm
+PREHOOK: query: drop table dp_mm
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table dp_mm
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table dp_mm (key int) partitioned by (key1 string, key2 int) stored as orc
+ tblproperties ("transactional"="true", "transactional_properties"="insert_only")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dp_mm
+POSTHOOK: query: create table dp_mm (key int) partitioned by (key1 string, key2 int) stored as orc
+ tblproperties ("transactional"="true", "transactional_properties"="insert_only")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dp_mm
+PREHOOK: query: insert into table dp_mm partition (key1='123', key2) select key, key from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@dp_mm@key1=123
+POSTHOOK: query: insert into table dp_mm partition (key1='123', key2) select key, key from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@dp_mm@key1=123/key2=0
+POSTHOOK: Output: default@dp_mm@key1=123/key2=10
+POSTHOOK: Output: default@dp_mm@key1=123/key2=100
+POSTHOOK: Output: default@dp_mm@key1=123/key2=103
+POSTHOOK: Output: default@dp_mm@key1=123/key2=97
+POSTHOOK: Output: default@dp_mm@key1=123/key2=98
+POSTHOOK: Lineage: dp_mm PARTITION(key1=123,key2=0).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dp_mm PARTITION(key1=123,key2=100).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dp_mm PARTITION(key1=123,key2=103).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dp_mm PARTITION(key1=123,key2=10).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dp_mm PARTITION(key1=123,key2=97).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dp_mm PARTITION(key1=123,key2=98).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from dp_mm order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dp_mm
+PREHOOK: Input: default@dp_mm@key1=123/key2=0
+PREHOOK: Input: default@dp_mm@key1=123/key2=10
+PREHOOK: Input: default@dp_mm@key1=123/key2=100
+PREHOOK: Input: default@dp_mm@key1=123/key2=103
+PREHOOK: Input: default@dp_mm@key1=123/key2=97
+PREHOOK: Input: default@dp_mm@key1=123/key2=98
+#### A masked pattern was here ####
+POSTHOOK: query: select * from dp_mm order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dp_mm
+POSTHOOK: Input: default@dp_mm@key1=123/key2=0
+POSTHOOK: Input: default@dp_mm@key1=123/key2=10
+POSTHOOK: Input: default@dp_mm@key1=123/key2=100
+POSTHOOK: Input: default@dp_mm@key1=123/key2=103
+POSTHOOK: Input: default@dp_mm@key1=123/key2=97
+POSTHOOK: Input: default@dp_mm@key1=123/key2=98
+#### A masked pattern was here ####
+0 123 0
+10 123 10
+97 123 97
+98 123 98
+100 123 100
+103 123 103
+PREHOOK: query: drop table dp_mm
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dp_mm
+PREHOOK: Output: default@dp_mm
+POSTHOOK: query: drop table dp_mm
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@dp_mm
+POSTHOOK: Output: default@dp_mm
+PREHOOK: query: create table union_mm(id int) tblproperties ("transactional"="true", "transactional_properties"="insert_only")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@union_mm
+POSTHOOK: query: create table union_mm(id int) tblproperties ("transactional"="true", "transactional_properties"="insert_only")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@union_mm
+PREHOOK: query: insert into table union_mm
+select temps.p from (
+select key as p from intermediate
+union all
+select key + 1 as p from intermediate ) temps
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@union_mm
+POSTHOOK: query: insert into table union_mm
+select temps.p from (
+select key as p from intermediate
+union all
+select key + 1 as p from intermediate ) temps
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@union_mm
+POSTHOOK: Lineage: union_mm.id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from union_mm order by id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@union_mm
+#### A masked pattern was here ####
+POSTHOOK: query: select * from union_mm order by id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@union_mm
+#### A masked pattern was here ####
+0
+1
+10
+11
+97
+98
+98
+99
+100
+101
+103
+104
+PREHOOK: query: insert into table union_mm
+select p from
+(
+select key + 1 as p from intermediate
+union all
+select key from intermediate
+) tab group by p
+union all
+select key + 2 as p from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@union_mm
+POSTHOOK: query: insert into table union_mm
+select p from
+(
+select key + 1 as p from intermediate
+union all
+select key from intermediate
+) tab group by p
+union all
+select key + 2 as p from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@union_mm
+POSTHOOK: Lineage: union_mm.id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from union_mm order by id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@union_mm
+#### A masked pattern was here ####
+POSTHOOK: query: select * from union_mm order by id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@union_mm
+#### A masked pattern was here ####
+0
+0
+1
+1
+2
+10
+10
+11
+11
+12
+97
+97
+98
+98
+98
+99
+99
+99
+100
+100
+100
+101
+101
+102
+103
+103
+104
+104
+105
+PREHOOK: query: insert into table union_mm
+SELECT p FROM
+(
+ SELECT key + 1 as p FROM intermediate
+ UNION ALL
+ SELECT key as p FROM (
+ SELECT distinct key FROM (
+ SELECT key FROM (
+ SELECT key + 2 as key FROM intermediate
+ UNION ALL
+ SELECT key FROM intermediate
+ )t1
+ group by key)t2
+ )t3
+)t4
+group by p
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@union_mm
+POSTHOOK: query: insert into table union_mm
+SELECT p FROM
+(
+ SELECT key + 1 as p FROM intermediate
+ UNION ALL
+ SELECT key as p FROM (
+ SELECT distinct key FROM (
+ SELECT key FROM (
+ SELECT key + 2 as key FROM intermediate
+ UNION ALL
+ SELECT key FROM intermediate
+ )t1
+ group by key)t2
+ )t3
+)t4
+group by p
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@union_mm
+POSTHOOK: Lineage: union_mm.id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from union_mm order by id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@union_mm
+#### A masked pattern was here ####
+POSTHOOK: query: select * from union_mm order by id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@union_mm
+#### A masked pattern was here ####
+0
+0
+0
+1
+1
+1
+2
+2
+10
+10
+10
+11
+11
+11
+12
+12
+97
+97
+97
+98
+98
+98
+98
+99
+99
+99
+99
+100
+100
+100
+100
+101
+101
+101
+102
+102
+103
+103
+103
+104
+104
+104
+105
+105
+PREHOOK: query: drop table union_mm
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@union_mm
+PREHOOK: Output: default@union_mm
+POSTHOOK: query: drop table union_mm
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@union_mm
+POSTHOOK: Output: default@union_mm
+PREHOOK: query: create table partunion_mm(id int) partitioned by (key int) tblproperties ("transactional"="true", "transactional_properties"="insert_only")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@partunion_mm
+POSTHOOK: query: create table partunion_mm(id int) partitioned by (key int) tblproperties ("transactional"="true", "transactional_properties"="insert_only")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@partunion_mm
+PREHOOK: query: insert into table partunion_mm partition(key)
+select temps.* from (
+select key as p, key from intermediate
+union all
+select key + 1 as p, key + 1 from intermediate ) temps
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@partunion_mm
+POSTHOOK: query: insert into table partunion_mm partition(key)
+select temps.* from (
+select key as p, key from intermediate
+union all
+select key + 1 as p, key + 1 from intermediate ) temps
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@partunion_mm@key=0
+POSTHOOK: Output: default@partunion_mm@key=1
+POSTHOOK: Output: default@partunion_mm@key=10
+POSTHOOK: Output: default@partunion_mm@key=100
+POSTHOOK: Output: default@partunion_mm@key=101
+POSTHOOK: Output: default@partunion_mm@key=103
+POSTHOOK: Output: default@partunion_mm@key=104
+POSTHOOK: Output: default@partunion_mm@key=11
+POSTHOOK: Output: default@partunion_mm@key=97
+POSTHOOK: Output: default@partunion_mm@key=98
+POSTHOOK: Output: default@partunion_mm@key=99
+POSTHOOK: Lineage: partunion_mm PARTITION(key=0).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: partunion_mm PARTITION(key=100).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: partunion_mm PARTITION(key=101).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: partunion_mm PARTITION(key=103).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: partunion_mm PARTITION(key=104).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: partunion_mm PARTITION(key=10).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: partunion_mm PARTITION(key=11).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: partunion_mm PARTITION(key=1).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: partunion_mm PARTITION(key=97).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: partunion_mm PARTITION(key=98).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: partunion_mm PARTITION(key=99).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from partunion_mm order by id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@partunion_mm
+PREHOOK: Input: default@partunion_mm@key=0
+PREHOOK: Input: default@partunion_mm@key=1
+PREHOOK: Input: default@partunion_mm@key=10
+PREHOOK: Input: default@partunion_mm@key=100
+PREHOOK: Input: default@partunion_mm@key=101
+PREHOOK: Input: default@partunion_mm@key=103
+PREHOOK: Input: default@partunion_mm@key=104
+PREHOOK: Input: default@partunion_mm@key=11
+PREHOOK: Input: default@partunion_mm@key=97
+PREHOOK: Input: default@partunion_mm@key=98
+PREHOOK: Input: default@partunion_mm@key=99
+#### A masked pattern was here ####
+POSTHOOK: query: select * from partunion_mm order by id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@partunion_mm
+POSTHOOK: Input: default@partunion_mm@key=0
+POSTHOOK: Input: default@partunion_mm@key=1
+POSTHOOK: Input: default@partunion_mm@key=10
+POSTHOOK: Input: default@partunion_mm@key=100
+POSTHOOK: Input: default@partunion_mm@key=101
+POSTHOOK: Input: default@partunion_mm@key=103
+POSTHOOK: Input: default@partunion_mm@key=104
+POSTHOOK: Input: default@partunion_mm@key=11
+POSTHOOK: Input: default@partunion_mm@key=97
+POSTHOOK: Input: default@partunion_mm@key=98
+POSTHOOK: Input: default@partunion_mm@key=99
+#### A masked pattern was here ####
+0 0
+1 1
+10 10
+11 11
+97 97
+98 98
+98 98
+99 99
+100 100
+101 101
+103 103
+104 104
+PREHOOK: query: drop table partunion_mm
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@partunion_mm
+PREHOOK: Output: default@partunion_mm
+POSTHOOK: query: drop table partunion_mm
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@partunion_mm
+POSTHOOK: Output: default@partunion_mm
+PREHOOK: query: create table skew_mm(k1 int, k2 int, k4 int) skewed by (k1, k4) on ((0,0),(1,1),(2,2),(3,3))
+ stored as directories tblproperties ("transactional"="true", "transactional_properties"="insert_only")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@skew_mm
+POSTHOOK: query: create table skew_mm(k1 int, k2 int, k4 int) skewed by (k1, k4) on ((0,0),(1,1),(2,2),(3,3))
+ stored as directories tblproperties ("transactional"="true", "transactional_properties"="insert_only")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@skew_mm
+PREHOOK: query: insert into table skew_mm
+select key, key, key from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@skew_mm
+POSTHOOK: query: insert into table skew_mm
+select key, key, key from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@skew_mm
+POSTHOOK: Lineage: skew_mm.k1 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_mm.k2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_mm.k4 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from skew_mm order by k2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skew_mm
+#### A masked pattern was here ####
+POSTHOOK: query: select * from skew_mm order by k2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skew_mm
+#### A masked pattern was here ####
+0 0 0
+10 10 10
+97 97 97
+98 98 98
+100 100 100
+103 103 103
+PREHOOK: query: drop table skew_mm
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@skew_mm
+PREHOOK: Output: default@skew_mm
+POSTHOOK: query: drop table skew_mm
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@skew_mm
+POSTHOOK: Output: default@skew_mm
+PREHOOK: query: create table skew_dp_union_mm(k1 int, k2 int, k4 int) partitioned by (k3 int)
+skewed by (k1, k4) on ((0,0),(1,1),(2,2),(3,3)) stored as directories tblproperties ("transactional"="true", "transactional_properties"="insert_only")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@skew_dp_union_mm
+POSTHOOK: query: create table skew_dp_union_mm(k1 int, k2 int, k4 int) partitioned by (k3 int)
+skewed by (k1, k4) on ((0,0),(1,1),(2,2),(3,3)) stored as directories tblproperties ("transactional"="true", "transactional_properties"="insert_only")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@skew_dp_union_mm
+PREHOOK: query: insert into table skew_dp_union_mm partition (k3)
+select key as i, key as j, key as k, key as l from intermediate
+union all
+select key +1 as i, key +2 as j, key +3 as k, key +4 as l from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@skew_dp_union_mm
+POSTHOOK: query: insert into table skew_dp_union_mm partition (k3)
+select key as i, key as j, key as k, key as l from intermediate
+union all
+select key +1 as i, key +2 as j, key +3 as k, key +4 as l from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@skew_dp_union_mm@k3=0
+POSTHOOK: Output: default@skew_dp_union_mm@k3=10
+POSTHOOK: Output: default@skew_dp_union_mm@k3=100
+POSTHOOK: Output: default@skew_dp_union_mm@k3=101
+POSTHOOK: Output: default@skew_dp_union_mm@k3=102
+POSTHOOK: Output: default@skew_dp_union_mm@k3=103
+POSTHOOK: Output: default@skew_dp_union_mm@k3=104
+POSTHOOK: Output: default@skew_dp_union_mm@k3=107
+POSTHOOK: Output: default@skew_dp_union_mm@k3=14
+POSTHOOK: Output: default@skew_dp_union_mm@k3=4
+POSTHOOK: Output: default@skew_dp_union_mm@k3=97
+POSTHOOK: Output: default@skew_dp_union_mm@k3=98
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=0).k1 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=0).k2 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=0).k4 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=100).k1 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=100).k2 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=100).k4 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=101).k1 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=101).k2 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=101).k4 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=102).k1 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=102).k2 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=102).k4 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=103).k1 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=103).k2 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=103).k4 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=104).k1 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=104).k2 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=104).k4 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=107).k1 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=107).k2 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=107).k4 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=10).k1 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=10).k2 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=10).k4 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=14).k1 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=14).k2 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=14).k4 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=4).k1 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=4).k2 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=4).k4 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=97).k1 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=97).k2 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=97).k4 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=98).k1 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=98).k2 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=98).k4 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from skew_dp_union_mm order by k2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skew_dp_union_mm
+PREHOOK: Input: default@skew_dp_union_mm@k3=0
+PREHOOK: Input: default@skew_dp_union_mm@k3=10
+PREHOOK: Input: default@skew_dp_union_mm@k3=100
+PREHOOK: Input: default@skew_dp_union_mm@k3=101
+PREHOOK: Input: default@skew_dp_union_mm@k3=102
+PREHOOK: Input: default@skew_dp_union_mm@k3=103
+PREHOOK: Input: default@skew_dp_union_mm@k3=104
+PREHOOK: Input: default@skew_dp_union_mm@k3=107
+PREHOOK: Input: default@skew_dp_union_mm@k3=14
+PREHOOK: Input: default@skew_dp_union_mm@k3=4
+PREHOOK: Input: default@skew_dp_union_mm@k3=97
+PREHOOK: Input: default@skew_dp_union_mm@k3=98
+#### A masked pattern was here ####
+POSTHOOK: query: select * from skew_dp_union_mm order by k2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skew_dp_union_mm
+POSTHOOK: Input: default@skew_dp_union_mm@k3=0
+POSTHOOK: Input: default@skew_dp_union_mm@k3=10
+POSTHOOK: Input: default@skew_dp_union_mm@k3=100
+POSTHOOK: Input: default@skew_dp_union_mm@k3=101
+POSTHOOK: Input: default@skew_dp_union_mm@k3=102
+POSTHOOK: Input: default@skew_dp_union_mm@k3=103
+POSTHOOK: Input: default@skew_dp_union_mm@k3=104
+POSTHOOK: Input: default@skew_dp_union_mm@k3=107
+POSTHOOK: Input: default@skew_dp_union_mm@k3=14
+POSTHOOK: Input: default@skew_dp_union_mm@k3=4
+POSTHOOK: Input: default@skew_dp_union_mm@k3=97
+POSTHOOK: Input: default@skew_dp_union_mm@k3=98
+#### A masked pattern was here ####
+0 0 0 0
+1 2 3 4
+10 10 10 10
+11 12 13 14
+97 97 97 97
+98 98 98 98
+98 99 100 101
+99 100 101 102
+100 100 100 100
+101 102 103 104
+103 103 103 103
+104 105 106 107
+PREHOOK: query: drop table skew_dp_union_mm
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@skew_dp_union_mm
+PREHOOK: Output: default@skew_dp_union_mm
+POSTHOOK: query: drop table skew_dp_union_mm
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@skew_dp_union_mm
+POSTHOOK: Output: default@skew_dp_union_mm
+PREHOOK: query: create table merge0_mm (id int) stored as orc tblproperties("transactional"="true", "transactional_properties"="insert_only")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@merge0_mm
+POSTHOOK: query: create table merge0_mm (id int) stored as orc tblproperties("transactional"="true", "transactional_properties"="insert_only")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@merge0_mm
+PREHOOK: query: insert into table merge0_mm select key from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@merge0_mm
+POSTHOOK: query: insert into table merge0_mm select key from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@merge0_mm
+POSTHOOK: Lineage: merge0_mm.id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from merge0_mm
+PREHOOK: type: QUERY
+PREHOOK: Input: default@merge0_mm
+#### A masked pattern was here ####
+POSTHOOK: query: select * from merge0_mm
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@merge0_mm
+#### A masked pattern was here ####
+98
+97
+0
+10
+100
+103
+PREHOOK: query: insert into table merge0_mm select key from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@merge0_mm
+POSTHOOK: query: insert into table merge0_mm select key from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@merge0_mm
+POSTHOOK: Lineage: merge0_mm.id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from merge0_mm
+PREHOOK: type: QUERY
+PREHOOK: Input: default@merge0_mm
+#### A masked pattern was here ####
+POSTHOOK: query: select * from merge0_mm
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@merge0_mm
+#### A masked pattern was here ####
+98
+97
+0
+10
+100
+103
+98
+97
+0
+10
+100
+103
+PREHOOK: query: drop table merge0_mm
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@merge0_mm
+PREHOOK: Output: default@merge0_mm
+POSTHOOK: query: drop table merge0_mm
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@merge0_mm
+POSTHOOK: Output: default@merge0_mm
+PREHOOK: query: create table merge2_mm (id int) tblproperties("transactional"="true", "transactional_properties"="insert_only")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@merge2_mm
+POSTHOOK: query: create table merge2_mm (id int) tblproperties("transactional"="true", "transactional_properties"="insert_only")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@merge2_mm
+PREHOOK: query: insert into table merge2_mm select key from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@merge2_mm
+POSTHOOK: query: insert into table merge2_mm select key from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@merge2_mm
+POSTHOOK: Lineage: merge2_mm.id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from merge2_mm
+PREHOOK: type: QUERY
+PREHOOK: Input: default@merge2_mm
+#### A masked pattern was here ####
+POSTHOOK: query: select * from merge2_mm
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@merge2_mm
+#### A masked pattern was here ####
+98
+97
+0
+10
+100
+103
+PREHOOK: query: insert into table merge2_mm select key from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@merge2_mm
+POSTHOOK: query: insert into table merge2_mm select key from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@merge2_mm
+POSTHOOK: Lineage: merge2_mm.id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from merge2_mm
+PREHOOK: type: QUERY
+PREHOOK: Input: default@merge2_mm
+#### A masked pattern was here ####
+POSTHOOK: query: select * from merge2_mm
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@merge2_mm
+#### A masked pattern was here ####
+98
+97
+0
+10
+100
+103
+98
+97
+0
+10
+100
+103
+PREHOOK: query: drop table merge2_mm
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@merge2_mm
+PREHOOK: Output: default@merge2_mm
+POSTHOOK: query: drop table merge2_mm
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@merge2_mm
+POSTHOOK: Output: default@merge2_mm
+PREHOOK: query: create table merge1_mm (id int) partitioned by (key int) stored as orc tblproperties("transactional"="true", "transactional_properties"="insert_only")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@merge1_mm
+POSTHOOK: query: create table merge1_mm (id int) partitioned by (key int) stored as orc tblproperties("transactional"="true", "transactional_properties"="insert_only")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@merge1_mm
+PREHOOK: query: insert into table merge1_mm partition (key) select key, key from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@merge1_mm
+POSTHOOK: query: insert into table merge1_mm partition (key) select key, key from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@merge1_mm@key=0
+POSTHOOK: Output: default@merge1_mm@key=10
+POSTHOOK: Output: default@merge1_mm@key=100
+POSTHOOK: Output: default@merge1_mm@key=103
+POSTHOOK: Output: default@merge1_mm@key=97
+POSTHOOK: Output: default@merge1_mm@key=98
+POSTHOOK: Lineage: merge1_mm PARTITION(key=0).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: merge1_mm PARTITION(key=100).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: merge1_mm PARTITION(key=103).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: merge1_mm PARTITION(key=10).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: merge1_mm PARTITION(key=97).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: merge1_mm PARTITION(key=98).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from merge1_mm
+PREHOOK: type: QUERY
+PREHOOK: Input: default@merge1_mm
+PREHOOK: Input: default@merge1_mm@key=0
+PREHOOK: Input: default@merge1_mm@key=10
+PREHOOK: Input: default@merge1_mm@key=100
+PREHOOK: Input: default@merge1_mm@key=103
+PREHOOK: Input: default@merge1_mm@key=97
+PREHOOK: Input: default@merge1_mm@key=98
+#### A masked pattern was here ####
+POSTHOOK: query: select * from merge1_mm
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@merge1_mm
+POSTHOOK: Input: default@merge1_mm@key=0
+POSTHOOK: Input: default@merge1_mm@key=10
+POSTHOOK: Input: default@merge1_mm@key=100
+POSTHOOK: Input: default@merge1_mm@key=103
+POSTHOOK: Input: default@merge1_mm@key=97
+POSTHOOK: Input: default@merge1_mm@key=98
+#### A masked pattern was here ####
- 97 97
++100 100
+103 103
+98 98
- 100 100
++97 97
+10 10
+0 0
+PREHOOK: query: insert into table merge1_mm partition (key) select key, key from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@merge1_mm
+POSTHOOK: query: insert into table merge1_mm partition (key) select key, key from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@merge1_mm@key=0
+POSTHOOK: Output: default@merge1_mm@key=10
+POSTHOOK: Output: default@merge1_mm@key=100
+POSTHOOK: Output: default@merge1_mm@key=103
+POSTHOOK: Output: default@merge1_mm@key=97
+POSTHOOK: Output: default@merge1_mm@key=98
+POSTHOOK: Lineage: merge1_mm PARTITION(key=0).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: merge1_mm PARTITION(key=100).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: merge1_mm PARTITION(key=103).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: merge1_mm PARTITION(key=10).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: merge1_mm PARTITION(key=97).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: merge1_mm PARTITION(key=98).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from merge1_mm
+PREHOOK: type: QUERY
+PREHOOK: Input: default@merge1_mm
+PREHOOK: Input: default@merge1_mm@key=0
+PREHOOK: Input: default@merge1_mm@key=10
+PREHOOK: Input: default@merge1_mm@key=100
+PREHOOK: Input: default@merge1_mm@key=103
+PREHOOK: Input: default@merge1_mm@key=97
+PREHOOK: Input: default@merge1_mm@key=98
+#### A masked pattern was here ####
+POSTHOOK: query: select * from merge1_mm
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@merge1_mm
+POSTHOOK: Input: default@merge1_mm@key=0
+POSTHOOK: Input: default@merge1_mm@key=10
+POSTHOOK: Input: default@merge1_mm@key=100
+POSTHOOK: Input: default@merge1_mm@key=103
+POSTHOOK: Input: default@merge1_mm@key=97
+POSTHOOK: Input: default@merge1_mm@key=98
+#### A masked pattern was here ####
- 100 100
- 97 97
+103 103
++100 100
+103 103
++97 97
+100 100
+97 97
+98 98
+98 98
- 0 0
+10 10
+0 0
+10 10
++0 0
+PREHOOK: query: drop table merge1_mm
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@merge1_mm
+PREHOOK: Output: default@merge1_mm
+POSTHOOK: query: drop table merge1_mm
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@merge1_mm
+POSTHOOK: Output: default@merge1_mm
+PREHOOK: query: drop table ctas0_mm
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table ctas0_mm
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table ctas0_mm tblproperties ("transactional"="true", "transactional_properties"="insert_only") as select * from intermediate
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ctas0_mm
+POSTHOOK: query: create table ctas0_mm tblproperties ("transactional"="true", "transactional_properties"="insert_only") as select * from intermediate
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ctas0_mm
+POSTHOOK: Lineage: ctas0_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: ctas0_mm.p SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ]
+PREHOOK: query: select * from ctas0_mm
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas0_mm
+#### A masked pattern was here ####
+POSTHOOK: query: select * from ctas0_mm
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas0_mm
+#### A masked pattern was here ####
+98 455
+97 455
+0 456
+10 456
+100 457
+103 457
+PREHOOK: query: drop table ctas0_mm
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@ctas0_mm
+PREHOOK: Output: default@ctas0_mm
+POSTHOOK: query: drop table ctas0_mm
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@ctas0_mm
+POSTHOOK: Output: default@ctas0_mm
+PREHOOK: query: drop table ctas1_mm
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table ctas1_mm
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table ctas1_mm tblproperties ("transactional"="true", "transactional_properties"="insert_only") as
+ select * from intermediate union all select * from intermediate
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ctas1_mm
+POSTHOOK: query: create table ctas1_mm tblproperties ("transactional"="true", "transactional_properties"="insert_only") as
+ select * from intermediate union all select * from intermediate
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ctas1_mm
+POSTHOOK: Lineage: ctas1_mm.key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: ctas1_mm.p EXPRESSION [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ]
+PREHOOK: query: select * from ctas1_mm
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas1_mm
+#### A masked pattern was here ####
+POSTHOOK: query: select * from ctas1_mm
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas1_mm
+#### A masked pattern was here ####
+98 455
+98 455
+97 455
+97 455
+0 456
+0 456
+10 456
+10 456
+100 457
+100 457
+103 457
+103 457
+PREHOOK: query: drop table ctas1_mm
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@ctas1_mm
+PREHOOK: Output: default@ctas1_mm
+POSTHOOK: query: drop table ctas1_mm
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@ctas1_mm
+POSTHOOK: Output: default@ctas1_mm
+PREHOOK: query: drop table iow0_mm
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table iow0_mm
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table iow0_mm(key int) tblproperties("transactional"="true", "transactional_properties"="insert_only")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@iow0_mm
+POSTHOOK: query: create table iow0_mm(key int) tblproperties("transactional"="true", "transactional_properties"="insert_only")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@iow0_mm
+PREHOOK: query: insert overwrite table iow0_mm select key from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@iow0_mm
+POSTHOOK: query: insert overwrite table iow0_mm select key from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@iow0_mm
+POSTHOOK: Lineage: iow0_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: insert into table iow0_mm select key + 1 from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@iow0_mm
+POSTHOOK: query: insert into table iow0_mm select key + 1 from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@iow0_mm
+POSTHOOK: Lineage: iow0_mm.key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from iow0_mm order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@iow0_mm
+#### A masked pattern was here ####
+POSTHOOK: query: select * from iow0_mm order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@iow0_mm
+#### A masked pattern was here ####
+0
+1
+10
+11
+97
+98
+98
+99
+100
+101
+103
+104
+PREHOOK: query: insert overwrite table iow0_mm select key + 2 from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@iow0_mm
+POSTHOOK: query: insert overwrite table iow0_mm select key + 2 from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@iow0_mm
+POSTHOOK: Lineage: iow0_mm.key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from iow0_mm order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@iow0_mm
+#### A masked pattern was here ####
+POSTHOOK: query: select * from iow0_mm order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@iow0_mm
+#### A masked pattern was here ####
+2
+12
+99
+100
+102
+105
+PREHOOK: query: drop table iow0_mm
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@iow0_mm
+PREHOOK: Output: default@iow0_mm
+POSTHOOK: query: drop table iow0_mm
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@iow0_mm
+POSTHOOK: Output: default@iow0_mm
+PREHOOK: query: drop table iow1_mm
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table iow1_mm
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table iow1_mm(key int) partitioned by (key2 int) tblproperties("transactional"="true", "transactional_properties"="insert_only")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@iow1_mm
+POSTHOOK: query: create table iow1_mm(key int) partitioned by (key2 int) tblproperties("transactional"="true", "transactional_properties"="insert_only")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@iow1_mm
+PREHOOK: query: insert overwrite table iow1_mm partition (key2)
+select key as k1, key from intermediate union all select key as k1, key from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@iow1_mm
+POSTHOOK: query: insert overwrite table iow1_mm partition (key2)
+select key as k1, key from intermediate union all select key as k1, key from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@iow1_mm@key2=0
+POSTHOOK: Output: default@iow1_mm@key2=10
+POSTHOOK: Output: default@iow1_mm@key2=100
+POSTHOOK: Output: default@iow1_mm@key2=103
+POSTHOOK: Output: default@iow1_mm@key2=97
+POSTHOOK: Output: default@iow1_mm@key2=98
+POSTHOOK: Lineage: iow1_mm PARTITION(key2=0).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: iow1_mm PARTITION(key2=100).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: iow1_mm PARTITION(key2=103).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: iow1_mm PARTITION(key2=10).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: iow1_mm PARTITION(key2=97).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: iow1_mm PARTITION(key2=98).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: insert into table iow1_mm partition (key2)
+select key + 1 as k1, key from intermediate union all select key as k1, key from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@iow1_mm
+POSTHOOK: query: insert into table iow1_mm partition (key2)
+select key + 1 as k1, key from intermediate union all select key as k1, key from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@iow1_mm@key2=0
+POSTHOOK: Output: default@iow1_mm@key2=10
+POSTHOOK: Output: default@iow1_mm@key2=100
+POSTHOOK: Output: default@iow1_mm@key2=103
+POSTHOOK: Output: default@iow1_mm@key2=97
+POSTHOOK: Output: default@iow1_mm@key2=98
+POSTHOOK: Lineage: iow1_mm PARTITION(key2=0).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: iow1_mm PARTITION(key2=100).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: iow1_mm PARTITION(key2=103).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: iow1_mm PARTITION(key2=10).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: iow1_mm PARTITION(key2=97).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: iow1_mm PARTITION(key2=98).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from iow1_mm order by key, key2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@iow1_mm
+PREHOOK: Input: default@iow1_mm@key2=0
+PREHOOK: Input: default@iow1_mm@key2=10
+PREHOOK: Input: default@iow1_mm@key2=100
+PREHOOK: Input: default@iow1_mm@key2=103
+PREHOOK: Input: default@iow1_mm@key2=97
+PREHOOK: Input: default@iow1_mm@key2=98
+#### A masked pattern was here ####
+POSTHOOK: query: select * from iow1_mm order by key, key2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@iow1_mm
+POSTHOOK: Input: default@iow1_mm@key2=0
+POSTHOOK: Input: default@iow1_mm@key2=10
+POSTHOOK: Input: default@iow1_mm@key2=100
+POSTHOOK: Input: default@iow1_mm@key2=103
+POSTHOOK: Input: default@iow1_mm@key2=97
+POSTHOOK: Input: default@iow1_mm@key2=98
+#### A masked pattern was here ####
+0 0
+0 0
+0 0
+1 0
+10 10
+10 10
+10 10
+11 10
+97 97
+97 97
+97 97
+98 97
+98 98
+98 98
+98 98
+99 98
+100 100
+100 100
+100 100
+101 100
+103 103
+103 103
+103 103
+104 103
+PREHOOK: query: insert overwrite table iow1_mm partition (key2)
+select key + 3 as k1, key from intermediate union all select key + 4 as k1, key from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@iow1_mm
+POSTHOOK: query: insert overwrite table iow1_mm partition (key2)
+select key + 3 as k1, key from intermediate union all select key + 4 as k1, key from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@iow1_mm@key2=0
+POSTHOOK: Output: default@iow1_mm@key2=10
+POSTHOOK: Output: default@iow1_mm@key2=100
+POSTHOOK: Output: default@iow1_mm@key2=103
+POSTHOOK: Output: default@iow1_mm@key2=97
+POSTHOOK: Output: default@iow1_mm@key2=98
+POSTHOOK: Lineage: iow1_mm PARTITION(key2=0).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: iow1_mm PARTITION(key2=100).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: iow1_mm PARTITION(key2=103).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: iow1_mm PARTITION(key2=10).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: iow1_mm PARTITION(key2=97).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: iow1_mm PARTITION(key2=98).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from iow1_mm order by key, key2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@iow1_mm
+PREHOOK: Input: default@iow1_mm@key2=0
+PREHOOK: Input: default@iow1_mm@key2=10
+PREHOOK: Input: default@iow1_mm@key2=100
+PREHOOK: Input: default@iow1_mm@key2=103
+PREHOOK: Input: default@iow1_mm@key2=97
+PREHOOK: Input: default@iow1_mm@key2=98
+#### A masked pattern was here ####
+POSTHOOK: query: select * from iow1_mm order by key, key2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@iow1_mm
+POSTHOOK: Input: default@iow1_mm@key2=0
+POSTHOOK: Input: default@iow1_mm@key2=10
+POSTHOOK: Input: default@iow1_mm@key2=100
+POSTHOOK: Input: default@iow1_mm@key2=103
+POSTHOOK: Input: default@iow1_mm@key2=97
+POSTHOOK: Input: default@iow1_mm@key2=98
+#### A masked pattern was here ####
+3 0
+4 0
+13 10
+14 10
+100 97
+101 97
+101 98
+102 98
+103 100
+104 100
+106 103
+107 103
+PREHOOK: query: insert overwrite table iow1_mm partition (key2)
+select key + 3 as k1, key + 3 from intermediate union all select key + 2 as k1, key + 2 from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@iow1_mm
+POSTHOOK: query: insert overwrite table iow1_mm partition (key2)
+select key + 3 as k1, key + 3 from intermediate union all select key + 2 as k1, key + 2 from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@iow1_mm@key2=100
+POSTHOOK: Output: default@iow1_mm@key2=101
+POSTHOOK: Output: default@iow1_mm@key2=102
+POSTHOOK: Output: default@iow1_mm@key2=103
+POSTHOOK: Output: default@iow1_mm@key2=105
+POSTHOOK: Output: default@iow1_mm@key2=106
+POSTHOOK: Output: default@iow1_mm@key2=12
+POSTHOOK: Output: default@iow1_mm@key2=13
+POSTHOOK: Output: default@iow1_mm@key2=2
+POSTHOOK: Output: default@iow1_mm@key2=3
+POSTHOOK: Output: default@iow1_mm@key2=99
+POSTHOOK: Lineage: iow1_mm PARTITION(key2=100).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: iow1_mm PARTITION(key2=101).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: iow1_mm PARTITION(key2=102).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: iow1_mm PARTITION(key2=103).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: iow1_mm PARTITION(key2=105).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: iow1_mm PARTITION(key2=106).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: iow1_mm PARTITION(key2=12).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: iow1_mm PARTITION(key2=13).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: iow1_mm PARTITION(key2=2).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: iow1_mm PARTITION(key2=3).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: iow1_mm PARTITION(key2=99).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from iow1_mm order by key, key2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@iow1_mm
+PREHOOK: Input: default@iow1_mm@key2=0
+PREHOOK: Input: default@iow1_mm@key2=10
+PREHOOK: Input: default@iow1_mm@key2=100
+PREHOOK: Input: default@iow1_mm@key2=101
+PREHOOK: Input: default@iow1_mm@key2=102
+PREHOOK: Input: default@iow1_mm@key2=103
+PREHOOK: Input: default@iow1_mm@key2=105
+PREHOOK: Input: default@iow1_mm@key2=106
+PREHOOK: Input: default@iow1_mm@key2=12
+PREHOOK: Input: default@iow1_mm@key2=13
+PREHOOK: Input: default@iow1_mm@key2=2
+PREHOOK: Input: default@iow1_mm@key2=3
+PREHOOK: Input: default@iow1_mm@key2=97
+PREHOOK: Input: default@iow1_mm@key2=98
+PREHOOK: Input: default@iow1_mm@key2=99
+#### A masked pattern was here ####
+POSTHOOK: query: select * from iow1_mm order by key, key2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@iow1_mm
+POSTHOOK: Input: default@iow1_mm@key2=0
+POSTHOOK: Input: default@iow1_mm@key2=10
+POSTHOOK: Input: default@iow1_mm@key2=100
+POSTHOOK: Input: default@iow1_mm@key2=101
+POSTHOOK: Input: default@iow1_mm@key2=102
+POSTHOOK: Input: default@iow1_mm@key2=103
+POSTHOOK: Input: default@iow1_mm@key2=105
+POSTHOOK: Input: default@iow1_mm@key2=106
+POSTHOOK: Input: default@iow1_mm@key2=12
+POSTHOOK: Input: default@iow1_mm@key2=13
+POSTHOOK: Input: default@iow1_mm@key2=2
+POSTHOOK: Input: default@iow1_mm@key2=3
+POSTHOOK: Input: default@iow1_mm@key2=97
+POSTHOOK: Input: default@iow1_mm@key2=98
+POSTHOOK: Input: default@iow1_mm@key2=99
+#### A masked pattern was here ####
+2 2
+3 0
+3 3
+4 0
+12 12
+13 10
+13 13
+14 10
+99 99
+100 97
+100 100
+100 100
+101 97
+101 98
+101 101
+102 98
+102 102
+103 103
+105 105
+106 106
+PREHOOK: query: drop table iow1_mm
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@iow1_mm
+PREHOOK: Output: default@iow1_mm
+POSTHOOK: query: drop table iow1_mm
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@iow1_mm
+POSTHOOK: Output: default@iow1_mm
+PREHOOK: query: drop table load0_mm
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table load0_mm
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table load0_mm (key string, value string) stored as textfile tblproperties("transactional"="true", "transactional_properties"="insert_only")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@load0_mm
+POSTHOOK: query: create table load0_mm (key string, value string) stored as textfile tblproperties("transactional"="true", "transactional_properties"="insert_only")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@load0_mm
+PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table load0_mm
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@load0_mm
+POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table load0_mm
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@load0_mm
+PREHOOK: query: select count(1) from load0_mm
+PREHOOK: type: QUERY
+PREHOOK: Input: default@load0_mm
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from load0_mm
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@load0_mm
+#### A masked pattern was here ####
+500
+PREHOOK: query: load data local inpath '../../data/files/kv2.txt' into table load0_mm
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@load0_mm
+POSTHOOK: query: load data local inpath '../../data/files/kv2.txt' into table load0_mm
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@load0_mm
+PREHOOK: query: select count(1) from load0_mm
+PREHOOK: type: QUERY
+PREHOOK: Input: default@load0_mm
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from load0_mm
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@load0_mm
+#### A masked pattern was here ####
+1000
+PREHOOK: query: load data local inpath '../../data/files/kv2.txt' overwrite into table load0_mm
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@load0_mm
+POSTHOOK: query: load data local inpath '../../data/files/kv2.txt' overwrite into table load0_mm
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@load0_mm
+PREHOOK: query: select count(1) from load0_mm
+PREHOOK: type: QUERY
+PREHOOK: Input: default@load0_mm
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from load0_mm
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@load0_mm
+#### A masked pattern was here ####
+500
+PREHOOK: query: drop table load0_mm
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@load0_mm
+PREHOOK: Output: default@load0_mm
+POSTHOOK: query: drop table load0_mm
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@load0_mm
+POSTHOOK: Output: default@load0_mm
+PREHOOK: query: drop table intermediate2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table intermediate2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table intermediate2 (key string, value string) stored as textfile
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@intermediate2
+POSTHOOK: query: create table intermediate2 (key string, value string) stored as textfile
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@intermediate2
+PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table intermediate2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@intermediate2
+POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table intermediate2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@intermediate2
+PREHOOK: query: load data local inpath '../../data/files/kv2.txt' into table intermediate2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@intermediate2
+POSTHOOK: query: load data local inpath '../../data/files/kv2.txt' into table intermediate2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@intermediate2
+PREHOOK: query: load data local inpath '../../data/files/kv3.txt' into table intermediate2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@intermediate2
+POSTHOOK: query: load data local inpath '../../data/files/kv3.txt' into table intermediate2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@intermediate2
+PREHOOK: query: drop table load1_mm
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table load1_mm
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table load1_mm (key string, value string) stored as textfile tblproperties("transactional"="true", "transactional_properties"="insert_only")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@load1_mm
+POSTHOOK: query: create table load1_mm (key string, value string) stored as textfile tblproperties("transactional"="true", "transactional_properties"="insert_only")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@load1_mm
+#### A masked pattern was here ####
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@load1_mm
+#### A masked pattern was here ####
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@load1_mm
+#### A masked pattern was here ####
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@load1_mm
+#### A masked pattern was here ####
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@load1_mm
+PREHOOK: query: select count(1) from load1_mm
+PREHOOK: type: QUERY
+PREHOOK: Input: default@load1_mm
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from load1_mm
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@load1_mm
+#### A masked pattern was here ####
+1000
+PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table intermediate2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@intermediate2
+POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table intermediate2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@intermediate2
+PREHOOK: query: load data local inpath '../../data/files/kv2.txt' into table intermediate2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@intermediate2
+POSTHOOK: query: load data local inpath '../../data/files/kv2.txt' into table intermediate2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@intermediate2
+PREHOOK: query: load data local inpath '../../data/files/kv3.txt' into table intermediate2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@intermediate2
+POSTHOOK: query: load data local inpath '../../data/files/kv3.txt' into table intermediate2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@intermediate2
+#### A masked pattern was here ####
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@load1_mm
+#### A masked pattern was here ####
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@load1_mm
+PREHOOK: query: select count(1) from load1_mm
+PREHOOK: type: QUERY
+PREHOOK: Input: default@load1_mm
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from load1_mm
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@load1_mm
+#### A masked pattern was here ####
+1050
+PREHOOK: query: load data local inpath '../../data/files/kv2.txt' into table intermediate2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@intermediate2
+POSTHOOK: query: load data local inpath '../../data/files/kv2.txt' into table intermediate2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@intermediate2
+#### A masked pattern was here ####
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@load1_mm
+#### A masked pattern was here ####
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@load1_mm
+PREHOOK: query: select count(1) from load1_mm
+PREHOOK: type: QUERY
+PREHOOK: Input: default@load1_mm
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from load1_mm
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@load1_mm
+#### A masked pattern was here ####
+500
+PREHOOK: query: drop table load1_mm
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@load1_mm
+PREHOOK: Output: default@load1_mm
+POSTHOOK: query: drop table load1_mm
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@load1_mm
+POSTHOOK: Output: default@load1_mm
+PREHOOK: query: drop table load2_mm
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table load2_mm
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table load2_mm (key string, value string)
+ partitioned by (k int, l int) stored as textfile tblproperties("transactional"="true", "transactional_properties"="insert_only")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@load2_mm
+POSTHOOK: query: create table load2_mm (key string, value string)
+ partitioned by (k int, l int) stored as textfile tblproperties("transactional"="true", "transactional_properties"="insert_only")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@load2_mm
+PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table intermediate2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@intermediate2
+POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table intermediate2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@intermediate2
+PREHOOK: query: load data local inpath '../../data/files/kv2.txt' into table intermediate2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@intermediate2
+POSTHOOK: query: load data local inpath '../../data/files/kv2.txt' into table intermediate2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@intermediate2
+PREHOOK: query: load data local inpath '../../data/files/kv3.txt' into table intermediate2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@intermediate2
+POSTHOOK: query: load data local inpath '../../data/files/kv3.txt' into table intermediate2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@intermediate2
+#### A masked pattern was here ####
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@load2_mm
+#### A masked pattern was here ####
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@load2_mm
+POSTHOOK: Output: default@load2_mm@k=5/l=5
+PREHOOK: query: select count(1) from load2_mm
+PREHOOK: type: QUERY
+PREHOOK: Input: default@load2_mm
+PREHOOK: Input: default@load2_mm@k=5/l=5
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from load2_mm
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@load2_mm
+POSTHOOK: Input: default@load2_mm@k=5/l=5
+#### A masked pattern was here ####
+1025
+PREHOOK: query: drop table load2_mm
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@load2_mm
+PREHOOK: Output: default@load2_mm
+POSTHOOK: query: drop table load2_mm
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@load2_mm
+POSTHOOK: Output: default@load2_mm
+PREHOOK: query: drop table intermediate2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@intermediate2
+PREHOOK: Output: default@intermediate2
+POSTHOOK: query: drop table intermediate2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@intermediate2
+POSTHOOK: Output: default@intermediate2
+PREHOOK: query: drop table intermediate_nonpart
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table intermediate_nonpart
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table intermmediate_part
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table intermmediate_part
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table intermmediate_nonpart
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table intermmediate_nonpart
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table intermediate_nonpart(key int, p int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@intermediate_nonpart
+POSTHOOK: query: create table intermediate_nonpart(key int, p int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@intermediate_nonpart
+PREHOOK: query: insert into intermediate_nonpart select * from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@intermediate_nonpart
+POSTHOOK: query: insert into intermediate_nonpart select * from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@intermediate_nonpart
+POSTHOOK: Lineage: intermediate_nonpart.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: intermediate_nonpart.p SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ]
+PREHOOK: query: create table intermmediate_nonpart(key int, p int) tblproperties("transactional"="true", "transactional_properties"="insert_only")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@intermmediate_nonpart
+POSTHOOK: query: create table intermmediate_nonpart(key int, p int) tblproperties("transactional"="true", "transactional_properties"="insert_only")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@intermmediate_nonpart
+PREHOOK: query: insert into intermmediate_nonpart select * from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@intermmediate_nonpart
+POSTHOOK: query: insert into intermmediate_nonpart select * from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POS
<TRUNCATED>
[18/50] [abbrv] hive git commit: HIVE-15904: select query throwing
Null Pointer Exception from
org.apache.hadoop.hive.ql.optimizer.DynamicPartitionPruningOptimization.generateSemiJoinOperatorPlan
(Jason Dere, reviewed by Gunther Hagleitner)
Posted by se...@apache.org.
HIVE-15904: select query throwing Null Pointer Exception from org.apache.hadoop.hive.ql.optimizer.DynamicPartitionPruningOptimization.generateSemiJoinOperatorPlan (Jason Dere, reviewed by Gunther Hagleitner)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0debf9f2
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0debf9f2
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0debf9f2
Branch: refs/heads/hive-14535
Commit: 0debf9f2916b2ed115e1cdb392a595ae7cf0c761
Parents: 56f6c9d
Author: Jason Dere <jd...@hortonworks.com>
Authored: Sun Feb 19 15:58:25 2017 -0800
Committer: Jason Dere <jd...@hortonworks.com>
Committed: Sun Feb 19 15:58:25 2017 -0800
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 1 +
.../DynamicPartitionPruningOptimization.java | 36 ++-
.../dynamic_semijoin_reduction_2.q | 41 +++
.../llap/dynamic_semijoin_reduction_2.q.out | 301 +++++++++++++++++++
4 files changed, 365 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/0debf9f2/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 7c54275..4a69bcc 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -482,6 +482,7 @@ minillaplocal.query.files=acid_globallimit.q,\
disable_merge_for_bucketing.q,\
dynamic_partition_pruning.q,\
dynamic_semijoin_reduction.q,\
+ dynamic_semijoin_reduction_2.q,\
dynpart_sort_opt_vectorization.q,\
dynpart_sort_optimization.q,\
dynpart_sort_optimization_acid.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/0debf9f2/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
index c8691e8..8692c45 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
@@ -398,25 +398,33 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor {
String internalColName = null;
ExprNodeDesc exprNodeDesc = key;
// Find the ExprNodeColumnDesc
- while (!(exprNodeDesc instanceof ExprNodeColumnDesc)) {
+ while (!(exprNodeDesc instanceof ExprNodeColumnDesc) &&
+ (exprNodeDesc.getChildren() != null)) {
exprNodeDesc = exprNodeDesc.getChildren().get(0);
}
- internalColName = ((ExprNodeColumnDesc) exprNodeDesc).getColumn();
- ExprNodeColumnDesc colExpr = ((ExprNodeColumnDesc)(parentOfRS.
- getColumnExprMap().get(internalColName)));
- String colName = ExprNodeDescUtils.extractColName(colExpr);
+ if (exprNodeDesc instanceof ExprNodeColumnDesc) {
+ internalColName = ((ExprNodeColumnDesc) exprNodeDesc).getColumn();
- // Fetch the TableScan Operator.
- Operator<?> op = parentOfRS.getParentOperators().get(0);
- while (op != null && !(op instanceof TableScanOperator)) {
- op = op.getParentOperators().get(0);
- }
- assert op != null;
+ ExprNodeColumnDesc colExpr = ((ExprNodeColumnDesc) (parentOfRS.
+ getColumnExprMap().get(internalColName)));
+ String colName = ExprNodeDescUtils.extractColName(colExpr);
+
+ // Fetch the TableScan Operator.
+ Operator<?> op = parentOfRS.getParentOperators().get(0);
+ while (op != null && !(op instanceof TableScanOperator)) {
+ op = op.getParentOperators().get(0);
+ }
+ assert op != null;
- Table table = ((TableScanOperator) op).getConf().getTableMetadata();
- if (table.isPartitionKey(colName)) {
- // The column is partition column, skip the optimization.
+ Table table = ((TableScanOperator) op).getConf().getTableMetadata();
+ if (table.isPartitionKey(colName)) {
+ // The column is partition column, skip the optimization.
+ return false;
+ }
+ } else {
+ // No column found!
+ // Bail out
return false;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/0debf9f2/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_2.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_2.q
new file mode 100644
index 0000000..2306395
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_2.q
@@ -0,0 +1,41 @@
+set hive.compute.query.using.stats=false;
+set hive.mapred.mode=nonstrict;
+set hive.explain.user=false;
+set hive.optimize.ppd=true;
+set hive.ppd.remove.duplicatefilters=true;
+set hive.tez.dynamic.partition.pruning=true;
+set hive.tez.dynamic.semijoin.reduction=true;
+set hive.optimize.metadataonly=false;
+set hive.optimize.index.filter=true;
+
+CREATE TABLE `table_1`(
+ `bigint_col_7` bigint,
+ `decimal2016_col_26` decimal(20,16),
+ `tinyint_col_3` tinyint,
+ `decimal2612_col_77` decimal(26,12),
+ `timestamp_col_9` timestamp);
+
+CREATE TABLE `table_18`(
+ `tinyint_col_15` tinyint,
+ `decimal2709_col_9` decimal(27,9),
+ `tinyint_col_20` tinyint,
+ `smallint_col_19` smallint,
+ `decimal1911_col_16` decimal(19,11),
+ `timestamp_col_18` timestamp);
+
+-- HIVE-15904
+EXPLAIN
+SELECT
+COUNT(*)
+FROM table_1 t1
+INNER JOIN table_18 t2 ON (((t2.tinyint_col_15) = (t1.bigint_col_7)) AND
+((t2.decimal2709_col_9) = (t1.decimal2016_col_26))) AND
+((t2.tinyint_col_20) = (t1.tinyint_col_3))
+WHERE (t2.smallint_col_19) IN (SELECT
+COALESCE(-92, -994) AS int_col
+FROM table_1 tt1
+INNER JOIN table_18 tt2 ON (tt2.decimal1911_col_16) = (tt1.decimal2612_col_77)
+WHERE (t1.timestamp_col_9) = (tt2.timestamp_col_18));
+
+drop table table_1;
+drop table table_18;
http://git-wip-us.apache.org/repos/asf/hive/blob/0debf9f2/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out
new file mode 100644
index 0000000..d3e0e39
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out
@@ -0,0 +1,301 @@
+PREHOOK: query: CREATE TABLE `table_1`(
+ `bigint_col_7` bigint,
+ `decimal2016_col_26` decimal(20,16),
+ `tinyint_col_3` tinyint,
+ `decimal2612_col_77` decimal(26,12),
+ `timestamp_col_9` timestamp)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table_1
+POSTHOOK: query: CREATE TABLE `table_1`(
+ `bigint_col_7` bigint,
+ `decimal2016_col_26` decimal(20,16),
+ `tinyint_col_3` tinyint,
+ `decimal2612_col_77` decimal(26,12),
+ `timestamp_col_9` timestamp)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@table_1
+PREHOOK: query: CREATE TABLE `table_18`(
+ `tinyint_col_15` tinyint,
+ `decimal2709_col_9` decimal(27,9),
+ `tinyint_col_20` tinyint,
+ `smallint_col_19` smallint,
+ `decimal1911_col_16` decimal(19,11),
+ `timestamp_col_18` timestamp)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table_18
+POSTHOOK: query: CREATE TABLE `table_18`(
+ `tinyint_col_15` tinyint,
+ `decimal2709_col_9` decimal(27,9),
+ `tinyint_col_20` tinyint,
+ `smallint_col_19` smallint,
+ `decimal1911_col_16` decimal(19,11),
+ `timestamp_col_18` timestamp)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@table_18
+PREHOOK: query: EXPLAIN
+SELECT
+COUNT(*)
+FROM table_1 t1
+INNER JOIN table_18 t2 ON (((t2.tinyint_col_15) = (t1.bigint_col_7)) AND
+((t2.decimal2709_col_9) = (t1.decimal2016_col_26))) AND
+((t2.tinyint_col_20) = (t1.tinyint_col_3))
+WHERE (t2.smallint_col_19) IN (SELECT
+COALESCE(-92, -994) AS int_col
+FROM table_1 tt1
+INNER JOIN table_18 tt2 ON (tt2.decimal1911_col_16) = (tt1.decimal2612_col_77)
+WHERE (t1.timestamp_col_9) = (tt2.timestamp_col_18))
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT
+COUNT(*)
+FROM table_1 t1
+INNER JOIN table_18 t2 ON (((t2.tinyint_col_15) = (t1.bigint_col_7)) AND
+((t2.decimal2709_col_9) = (t1.decimal2016_col_26))) AND
+((t2.tinyint_col_20) = (t1.tinyint_col_3))
+WHERE (t2.smallint_col_19) IN (SELECT
+COALESCE(-92, -994) AS int_col
+FROM table_1 tt1
+INNER JOIN table_18 tt2 ON (tt2.decimal1911_col_16) = (tt1.decimal2612_col_77)
+WHERE (t1.timestamp_col_9) = (tt2.timestamp_col_18))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 10 <- Reducer 5 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 8 <- Map 10 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
+ Reducer 9 <- Reducer 8 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ filterExpr: (bigint_col_7 is not null and decimal2016_col_26 is not null and tinyint_col_3 is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: (bigint_col_7 is not null and decimal2016_col_26 is not null and tinyint_col_3 is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: bigint_col_7 (type: bigint), decimal2016_col_26 (type: decimal(20,16)), tinyint_col_3 (type: tinyint), timestamp_col_9 (type: timestamp)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: decimal(34,16)), _col2 (type: tinyint), _col0 (type: bigint)
+ sort order: +++
+ Map-reduce partition columns: _col1 (type: decimal(34,16)), _col2 (type: tinyint), _col0 (type: bigint)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col3 (type: timestamp)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: tt2
+ filterExpr: ((timestamp_col_18 = timestamp_col_18) and decimal1911_col_16 is not null and timestamp_col_18 BETWEEN DynamicValue(RS_23_t1_timestamp_col_18_min) AND DynamicValue(RS_23_t1_timestamp_col_18_max) and in_bloom_filter(timestamp_col_18, DynamicValue(RS_23_t1_timestamp_col_18_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: ((timestamp_col_18 = timestamp_col_18) and decimal1911_col_16 is not null and timestamp_col_18 BETWEEN DynamicValue(RS_23_t1_timestamp_col_18_min) AND DynamicValue(RS_23_t1_timestamp_col_18_max) and in_bloom_filter(timestamp_col_18, DynamicValue(RS_23_t1_timestamp_col_18_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: decimal1911_col_16 (type: decimal(19,11)), timestamp_col_18 (type: timestamp)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: decimal(26,12))
+ sort order: +
+ Map-reduce partition columns: _col0 (type: decimal(26,12))
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col1 (type: timestamp)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: t2
+ filterExpr: (tinyint_col_15 is not null and decimal2709_col_9 is not null and tinyint_col_20 is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: (tinyint_col_15 is not null and decimal2709_col_9 is not null and tinyint_col_20 is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: tinyint_col_15 (type: tinyint), decimal2709_col_9 (type: decimal(27,9)), tinyint_col_20 (type: tinyint), smallint_col_19 (type: smallint)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: decimal(34,16)), _col2 (type: tinyint), UDFToLong(_col0) (type: bigint)
+ sort order: +++
+ Map-reduce partition columns: _col1 (type: decimal(34,16)), _col2 (type: tinyint), UDFToLong(_col0) (type: bigint)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col3 (type: smallint)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: tt1
+ filterExpr: decimal2612_col_77 is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: decimal2612_col_77 is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: decimal2612_col_77 (type: decimal(26,12))
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: decimal(26,12))
+ sort order: +
+ Map-reduce partition columns: _col0 (type: decimal(26,12))
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: decimal(34,16)), _col2 (type: tinyint), _col0 (type: bigint)
+ 1 _col1 (type: decimal(34,16)), _col2 (type: tinyint), UDFToLong(_col0) (type: bigint)
+ outputColumnNames: _col3, _col7
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: timestamp), UDFToInteger(_col7) (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col3 (type: timestamp), UDFToInteger(_col7) (type: int)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: _col3 (type: timestamp)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: binary)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col3 (type: timestamp), UDFToInteger(_col7) (type: int)
+ 1 _col1 (type: timestamp), -92 (type: int)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 4
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: binary)
+ Reducer 8
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: decimal(26,12))
+ 1 _col0 (type: decimal(26,12))
+ outputColumnNames: _col2
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ keys: _col2 (type: timestamp)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: timestamp)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: timestamp)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reducer 9
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: timestamp)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: timestamp)
+ outputColumnNames: _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: timestamp), -92 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: timestamp), -92 (type: int)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: drop table table_1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@table_1
+PREHOOK: Output: default@table_1
+POSTHOOK: query: drop table table_1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@table_1
+POSTHOOK: Output: default@table_1
+PREHOOK: query: drop table table_18
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@table_18
+PREHOOK: Output: default@table_18
+POSTHOOK: query: drop table table_18
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@table_18
+POSTHOOK: Output: default@table_18
[39/50] [abbrv] hive git commit: HIVE-15955: make explain formatted
to include opId and etc (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Posted by se...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/759766ee/ql/src/test/results/clientpositive/vector_outer_join6.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_outer_join6.q.out b/ql/src/test/results/clientpositive/vector_outer_join6.q.out
index a910ed2..bf5d503 100644
--- a/ql/src/test/results/clientpositive/vector_outer_join6.q.out
+++ b/ql/src/test/results/clientpositive/vector_outer_join6.q.out
@@ -130,7 +130,7 @@ POSTHOOK: query: explain vectorization detail formatted
select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from
(select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1
POSTHOOK: type: QUERY
-{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-7":{"ROOT STAGE":"TRUE"},"Stage-5":{"DEPENDENT STAGES":"Stage-7"},"Stage-0":{"DEPENDENT STAGES":"Stage-5"}},"STAGE PLANS":{"Stage-7":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_0:$hdt$_1:tjoin2":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_1:tjoin3":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_0:$hdt$_1:tjoin2":{"TableScan":{"alias:":"tjoin2","Statistics:":"Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)"}}}}}}},"$hdt$_1:tjoin3":{"TableScan":{"alias:":"tjoin3","Statistics:":"Num rows:
2 Data size: 188 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col2 (type: int)","1":"_col1 (type: int)"}}}}}}}}}},"Stage-5":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"tjoin1","Statistics:":"Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2]"},"children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 1]"},"Statistics:":"Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Out
er Join0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col2","_col3"],"Statistics:":"Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"_col0 (type: int), _col2 (type: int), _col3 (type: int)","outputColumnNames:":["_col0","_col1","_col2"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 1, 2]"},"Statistics:":"Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE","children":{"
Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col2 (type: int)","1":"_col1 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col1","_col3"],"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"_col0 (type: int), _col1 (type: int), _col3 (type: int)","outputColumnNames:":["_col0","_col1","_col2"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 1, 2]"},"Statistics:":"Num rows: 4 Data size: 449 Basi
c stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"3","includeColumns:":"[0, 1]","dataColumns:":["rnum:int","c1:int","c2:int"],"partitionColumnCount:":"0","scratchColumnTypeNames:":["b
igint","bigint"]}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}}
+{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-7":{"ROOT STAGE":"TRUE"},"Stage-5":{"DEPENDENT STAGES":"Stage-7"},"Stage-0":{"DEPENDENT STAGES":"Stage-5"}},"STAGE PLANS":{"Stage-7":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_0:$hdt$_1:tjoin2":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_1:tjoin3":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_0:$hdt$_1:tjoin2":{"TableScan":{"alias:":"tjoin2","Statistics:":"Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)"},"OperatorId:":"HASHTABLESINK_21"}}}}
}},"$hdt$_1:tjoin3":{"TableScan":{"alias:":"tjoin3","Statistics:":"Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_8","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_9","children":{"HashTable Sink Operator":{"keys:":{"0":"_col2 (type: int)","1":"_col1 (type: int)"},"OperatorId:":"HASHTABLESINK_19"}}}}}}}}},"Stage-5":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"tjoin1","Statistics:":"Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2]"},"OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projected
OutputColumns:":"[0, 1]"},"Statistics:":"Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_23","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col2","_col3"],"Statistics:":"Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_24","children":{"Select Operator":{"expressions:":"_col0 (type: int), _col2 (type: int), _col3 (type: int)","outputColumnNames:":["_col0","_col1",
"_col2"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 1, 2]"},"Statistics:":"Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_25","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col2 (type: int)","1":"_col1 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col1","_col3"],"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_26","children":{"Select Operator":{"expressio
ns:":"_col0 (type: int), _col1 (type: int), _col3 (type: int)","outputColumnNames:":["_col0","_col1","_col2"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 1, 2]"},"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_27","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_28"}}}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","
inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"3","includeColumns:":"[0, 1]","dataColumns:":["rnum:int","c1:int","c2:int"],"partitionColumnCount:":"0","scratchColumnTypeNames:":["bigint","bigint"]}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_29"}}}}}}
PREHOOK: query: select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from
(select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1
PREHOOK: type: QUERY
@@ -157,7 +157,7 @@ POSTHOOK: query: explain vectorization detail formatted
select tj1rnum, tj2rnum as rnumt3 from
(select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1
POSTHOOK: type: QUERY
-{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-7":{"ROOT STAGE":"TRUE"},"Stage-5":{"DEPENDENT STAGES":"Stage-7"},"Stage-0":{"DEPENDENT STAGES":"Stage-5"}},"STAGE PLANS":{"Stage-7":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_0:$hdt$_1:tjoin2":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_1:tjoin3":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_0:$hdt$_1:tjoin2":{"TableScan":{"alias:":"tjoin2","Statistics:":"Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)"}}}}}}},"$hdt$_1:tjoin3":{"TableScan":{"alias:":"tjoin3","Statistics:":"Num rows:
2 Data size: 188 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"c1 (type: int)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col2 (type: int)","1":"_col0 (type: int)"}}}}}}}}}},"Stage-5":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"tjoin1","Statistics:":"Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2]"},"children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 1]"},"Statistics:":"Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{
"0":"_col1 (type: int)","1":"_col1 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col2","_col3"],"Statistics:":"Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"_col0 (type: int), _col2 (type: int), _col3 (type: int)","outputColumnNames:":["_col0","_col1","_col2"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 1, 2]"},"Statistics:":"Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condi
tion map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col2 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.
io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"3","includeColumns:":"[0, 1]","dataColumns:":["rnum:int","c1:int","c2:int"],"partitionColumnCount:":"0","scratchColumnTypeNames:":["bigint","bigint"]}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}}
+{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-7":{"ROOT STAGE":"TRUE"},"Stage-5":{"DEPENDENT STAGES":"Stage-7"},"Stage-0":{"DEPENDENT STAGES":"Stage-5"}},"STAGE PLANS":{"Stage-7":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_0:$hdt$_1:tjoin2":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_1:tjoin3":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_0:$hdt$_1:tjoin2":{"TableScan":{"alias:":"tjoin2","Statistics:":"Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)"},"OperatorId:":"HASHTABLESINK_21"}}}}
}},"$hdt$_1:tjoin3":{"TableScan":{"alias:":"tjoin3","Statistics:":"Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_8","children":{"Select Operator":{"expressions:":"c1 (type: int)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_9","children":{"HashTable Sink Operator":{"keys:":{"0":"_col2 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_19"}}}}}}}}},"Stage-5":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"tjoin1","Statistics:":"Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2]"},"OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"rnum (type: int), c1 (type: int)","outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 1]"},
"Statistics:":"Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_23","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col1 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col2","_col3"],"Statistics:":"Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_24","children":{"Select Operator":{"expressions:":"_col0 (type: int), _col2 (type: int), _col3 (type: int)","outputColumnNames:":["_col0","_col1","_col2"],"Select Vectoriza
tion:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 1, 2]"},"Statistics:":"Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_25","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col2 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_26","children":{"File Output Operator":{"compressed:":"false","File Sink Vector
ization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_27"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"3","includeColumns:":"[0, 1]","dataColumns:":["rnum:int","c1:int","c2:int"],"partitionColumnCount:":"0","scratchColumnTypeNames:":["bigint","bigint"]}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{
"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_28"}}}}}}
PREHOOK: query: select tj1rnum, tj2rnum as rnumt3 from
(select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1
PREHOOK: type: QUERY
[30/50] [abbrv] hive git commit: HIVE-15971: LLAP: logs urls should
use daemon container id instead of fake container id
Posted by se...@apache.org.
HIVE-15971: LLAP: logs urls should use daemon container id instead of fake container id
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d5bb76cf
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d5bb76cf
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d5bb76cf
Branch: refs/heads/hive-14535
Commit: d5bb76cf2da3934d1de6b3087ac4bfafa2b2cb6f
Parents: de532b1
Author: Prasanth Jayachandran <pr...@apache.org>
Authored: Tue Feb 21 14:25:47 2017 -0800
Committer: Prasanth Jayachandran <pr...@apache.org>
Committed: Tue Feb 21 14:25:47 2017 -0800
----------------------------------------------------------------------
.../llap/registry/impl/LlapRegistryService.java | 13 +--
.../llap/tezplugins/LlapTaskCommunicator.java | 91 +++++++++++---------
2 files changed, 57 insertions(+), 47 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/d5bb76cf/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapRegistryService.java
----------------------------------------------------------------------
diff --git a/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapRegistryService.java b/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapRegistryService.java
index 5a94db9..610c0a5 100644
--- a/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapRegistryService.java
+++ b/llap-client/src/java/org/apache/hadoop/hive/llap/registry/impl/LlapRegistryService.java
@@ -23,6 +23,7 @@ import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.llap.registry.ServiceInstanceSet;
import org.apache.hadoop.hive.llap.registry.ServiceInstanceStateChangeListener;
import org.apache.hadoop.hive.llap.registry.ServiceRegistry;
+import org.apache.hadoop.registry.client.binding.RegistryUtils;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.slf4j.Logger;
@@ -57,17 +58,17 @@ public class LlapRegistryService extends AbstractService {
String hosts = HiveConf.getTrimmedVar(conf, HiveConf.ConfVars.LLAP_DAEMON_SERVICE_HOSTS);
Preconditions.checkNotNull(hosts, ConfVars.LLAP_DAEMON_SERVICE_HOSTS.toString() + " must be defined");
LlapRegistryService registry;
- // TODO: this is not going to work with multiple users.
if (hosts.startsWith("@")) {
// Caching instances only in case of the YARN registry. Each host based list will get it's own copy.
- String name = hosts.substring(1);
- if (yarnRegistries.containsKey(name) && yarnRegistries.get(name).isInState(STATE.STARTED)) {
- registry = yarnRegistries.get(name);
- } else {
+ String appName = hosts.substring(1);
+ String userName = HiveConf.getVar(conf, ConfVars.LLAP_ZK_REGISTRY_USER, RegistryUtils.currentUser());
+ String key = appName + "-" + userName;
+ registry = yarnRegistries.get(key);
+ if (registry == null || !registry.isInState(STATE.STARTED)) {
registry = new LlapRegistryService(false);
registry.init(conf);
registry.start();
- yarnRegistries.put(name, registry);
+ yarnRegistries.put(key, registry);
}
} else {
registry = new LlapRegistryService(false);
http://git-wip-us.apache.org/repos/asf/hive/blob/d5bb76cf/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java
----------------------------------------------------------------------
diff --git a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java
index 3aae7a4..e593b33 100644
--- a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java
+++ b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java
@@ -14,12 +14,12 @@
package org.apache.hadoop.hive.llap.tezplugins;
+import org.apache.hadoop.hive.llap.registry.ServiceInstance;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.hive.llap.protocol.LlapTaskUmbilicalProtocol.TezAttemptArray;
import java.io.IOException;
-import java.net.URI;
import java.nio.ByteBuffer;
import java.util.HashSet;
import java.util.Map;
@@ -75,7 +75,7 @@ import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.NodeId;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.webapp.util.WebAppUtils;
import org.apache.tez.common.TezTaskUmbilicalProtocol;
import org.apache.tez.common.TezUtils;
import org.apache.tez.common.security.JobTokenSecretManager;
@@ -119,7 +119,8 @@ public class LlapTaskCommunicator extends TezTaskCommunicatorImpl {
private final Token<LlapTokenIdentifier> token;
private final String user;
private String amHost;
- private URI timelineServerUri;
+ private String timelineServerUri;
+ private int nmPort;
// These two structures track the list of known nodes, and the list of nodes which are sending in keep-alive heartbeats.
// Primarily for debugging purposes a.t.m, since there's some unexplained TASK_TIMEOUTS which are currently being observed.
@@ -149,7 +150,6 @@ public class LlapTaskCommunicator extends TezTaskCommunicatorImpl {
Preconditions.checkState((token != null) == UserGroupInformation.isSecurityEnabled());
// Not closing this at the moment at shutdown, since this could be a shared instance.
- // TODO: this is unused.
serviceRegistry = LlapRegistryService.getClient(conf);
umbilical = new LlapTaskUmbilicalProtocolImpl(getUmbilical());
@@ -191,18 +191,10 @@ public class LlapTaskCommunicator extends TezTaskCommunicatorImpl {
+ "fileCleanupDelay=" + deleteDelayOnDagComplete
+ ", numCommunicatorThreads=" + numThreads);
this.communicator.init(conf);
- if (YarnConfiguration.useHttps(conf)) {
- timelineServerUri = URI
- .create(JOINER.join("https://", conf.get(
- YarnConfiguration.TIMELINE_SERVICE_WEBAPP_HTTPS_ADDRESS,
- YarnConfiguration.DEFAULT_TIMELINE_SERVICE_WEBAPP_HTTPS_ADDRESS),
- RESOURCE_URI_STR));
- } else {
- timelineServerUri = URI.create(JOINER.join("http://", conf.get(
- YarnConfiguration.TIMELINE_SERVICE_WEBAPP_ADDRESS,
- YarnConfiguration.DEFAULT_TIMELINE_SERVICE_WEBAPP_ADDRESS),
- RESOURCE_URI_STR));
- }
+ String scheme = WebAppUtils.getHttpSchemePrefix(conf);
+ String ahsUrl = WebAppUtils.getAHSWebAppURLWithoutScheme(conf);
+ this.timelineServerUri = WebAppUtils.getURLWithScheme(scheme, ahsUrl);
+ this.nmPort = Integer.valueOf(WebAppUtils.getNMWebAppURLWithoutScheme(conf).split(":")[1]);
}
@Override
@@ -540,37 +532,54 @@ public class LlapTaskCommunicator extends TezTaskCommunicatorImpl {
@Override
public String getInProgressLogsUrl(TezTaskAttemptID attemptID, NodeId containerNodeId) {
- String url = "";
- if (timelineServerUri != null && containerNodeId != null) {
- LlapNodeId llapNodeId = LlapNodeId.getInstance(containerNodeId.getHost(), containerNodeId.getPort());
- BiMap<ContainerId, TezTaskAttemptID> biMap = entityTracker.getContainerAttemptMapForNode(llapNodeId);
- ContainerId containerId = biMap.inverse().get(attemptID);
- if (containerId != null) {
- String dagId = attemptID.getTaskID().getVertexID().getDAGId().toString();
- String filename = currentHiveQueryId + "-" + dagId + ".log";
- // YARN-6011 provides a webservice to get the logs
- url = PATH_JOINER.join(timelineServerUri.toString(), "containers", containerId.toString(), "logs",
- filename);
- }
- }
- return url;
+ return constructLogUrl(attemptID, containerNodeId, false);
}
@Override
public String getCompletedLogsUrl(TezTaskAttemptID attemptID, NodeId containerNodeId) {
- String url = "";
- if (timelineServerUri != null && containerNodeId != null) {
- LlapNodeId llapNodeId = LlapNodeId.getInstance(containerNodeId.getHost(), containerNodeId.getPort());
- BiMap<ContainerId, TezTaskAttemptID> biMap = entityTracker.getContainerAttemptMapForNode(llapNodeId);
- ContainerId containerId = biMap.inverse().get(attemptID);
- if (containerId != null) {
- String dagId = attemptID.getTaskID().getVertexID().getDAGId().toString();
- String filename = currentHiveQueryId + "-" + dagId + ".log.done";
- // YARN-6011 provides a webservice to get the logs
- url = PATH_JOINER.join(timelineServerUri.toString(), "containers", containerId.toString(), "logs",
- filename);
+ return constructLogUrl(attemptID, containerNodeId, true);
+ }
+
+ private String constructLogUrl(final TezTaskAttemptID attemptID, final NodeId containerNodeId, final boolean isDone) {
+ if (timelineServerUri == null || containerNodeId == null) {
+ return null;
+ }
+ Set<ServiceInstance> instanceSet;
+ try {
+ instanceSet = serviceRegistry.getInstances().getByHost(containerNodeId.getHost());
+ } catch (IOException e) {
+ // Not failing the job due to a failure constructing the log url
+ LOG.warn(
+ "Unable to find instance for yarnNodeId={} to construct the log url. Exception message={}",
+ containerNodeId, e.getMessage());
+ return null;
+ }
+ if (instanceSet != null) {
+ ServiceInstance matchedInstance = null;
+ for (ServiceInstance instance : instanceSet) {
+ if (instance.getRpcPort() == containerNodeId.getPort()) {
+ matchedInstance = instance;
+ break;
+ }
+ }
+ if (matchedInstance != null) {
+ String containerIdString = matchedInstance.getProperties()
+ .get(HiveConf.ConfVars.LLAP_DAEMON_CONTAINER_ID.varname);
+ if (containerIdString != null) {
+ return constructLlapLogUrl(attemptID, containerIdString, isDone, containerNodeId.getHost());
+ }
}
}
+ return null;
+ }
+
+ private String constructLlapLogUrl(final TezTaskAttemptID attemptID, final String containerIdString,
+ final boolean isDone, final String nmHost) {
+ String dagId = attemptID.getTaskID().getVertexID().getDAGId().toString();
+ String filename = JOINER.join(currentHiveQueryId, "-", dagId, ".log", (isDone ? ".done" : ""),
+ "?nm.id=", nmHost, ":", nmPort);
+ String url = PATH_JOINER.join(timelineServerUri, "ws", "v1", "applicationhistory", "containers",
+ containerIdString, "logs", filename);
return url;
}
[11/50] [abbrv] hive git commit: HIVE-15954 : LLAP: some Tez INFO
logs are too noisy (Sergey Shelukhin, reviewed by Prasanth Jayachandran)
Posted by se...@apache.org.
HIVE-15954 : LLAP: some Tez INFO logs are too noisy (Sergey Shelukhin, reviewed by Prasanth Jayachandran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bba18181
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bba18181
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bba18181
Branch: refs/heads/hive-14535
Commit: bba18181ad107fbea17c2d2bea9fd88873a4a943
Parents: 6f6a558
Author: Sergey Shelukhin <se...@apache.org>
Authored: Fri Feb 17 11:49:38 2017 -0800
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Fri Feb 17 11:50:14 2017 -0800
----------------------------------------------------------------------
.../src/main/resources/llap-daemon-log4j2.properties | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/bba18181/llap-server/src/main/resources/llap-daemon-log4j2.properties
----------------------------------------------------------------------
diff --git a/llap-server/src/main/resources/llap-daemon-log4j2.properties b/llap-server/src/main/resources/llap-daemon-log4j2.properties
index a9bfa34..31a34be 100644
--- a/llap-server/src/main/resources/llap-daemon-log4j2.properties
+++ b/llap-server/src/main/resources/llap-daemon-log4j2.properties
@@ -100,7 +100,19 @@ appender.query-routing.routes.route-mdc.file-mdc.app.layout.type = PatternLayout
appender.query-routing.routes.route-mdc.file-mdc.app.layout.pattern = %d{ISO8601} %5p [%t (%X{fragmentId})] %c{2}: %m%n
# list of all loggers
-loggers = PerfLogger, EncodedReader, NIOServerCnxn, ClientCnxnSocketNIO, DataNucleus, Datastore, JPOX, HistoryLogger, LlapIoImpl, LlapIoOrc, LlapIoCache, LlapIoLocking
+loggers = PerfLogger, EncodedReader, NIOServerCnxn, ClientCnxnSocketNIO, DataNucleus, Datastore, JPOX, HistoryLogger, LlapIoImpl, LlapIoOrc, LlapIoCache, LlapIoLocking, TezSM, TezSS, TezM, TezHC
+
+
+# shut up the Tez logs that log debug-level stuff on INFO
+
+logger.TezSM.name = org.apache.tez.runtime.library.common.shuffle.impl.ShuffleManager
+logger.TezSM.level = WARN
+logger.TezSS.name = org.apache.tez.runtime.library.common.shuffle.orderedgrouped.ShuffleScheduler
+logger.TezSS.level = WARN
+logger.TezM.name = org.apache.tez.runtime.library.common.sort.impl.TezMerger
+logger.TezM.level = WARN
+logger.TezHC.name = org.apache.tez.http.HttpConnection
+logger.TezHC.level = WARN
logger.PerfLogger.name = org.apache.hadoop.hive.ql.log.PerfLogger
logger.PerfLogger.level = DEBUG
[04/50] [abbrv] hive git commit: HIVE-15902: Select query involving
date throwing Hive 2 Internal error: unsupported conversion from type: date
(Jason Dere, reviewed by Matt McCline)
Posted by se...@apache.org.
HIVE-15902: Select query involving date throwing Hive 2 Internal error: unsupported conversion from type: date (Jason Dere, reviewed by Matt McCline)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f3790ce2
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f3790ce2
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f3790ce2
Branch: refs/heads/hive-14535
Commit: f3790ce2604621ac2512053b50eaca1ad053f178
Parents: e49a074
Author: Jason Dere <jd...@hortonworks.com>
Authored: Thu Feb 16 13:52:29 2017 -0800
Committer: Jason Dere <jd...@hortonworks.com>
Committed: Thu Feb 16 13:52:29 2017 -0800
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 1 +
.../FilterColumnBetweenDynamicValue.txt | 9 +-
.../ql/exec/vector/VectorizationContext.java | 2 +-
.../vectorized_dynamic_semijoin_reduction2.q | 50 ++
...vectorized_dynamic_semijoin_reduction2.q.out | 772 +++++++++++++++++++
.../apache/hadoop/hive/tools/GenVectorCode.java | 18 +
6 files changed, 847 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/f3790ce2/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 2c53047..e8db920 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -610,6 +610,7 @@ minillaplocal.query.files=acid_globallimit.q,\
vectorization_short_regress.q,\
vectorized_dynamic_partition_pruning.q,\
vectorized_dynamic_semijoin_reduction.q,\
+ vectorized_dynamic_semijoin_reduction2.q,\
vectorized_ptf.q,\
windowing.q,\
windowing_gby.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/f3790ce2/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetweenDynamicValue.txt
----------------------------------------------------------------------
diff --git a/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetweenDynamicValue.txt b/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetweenDynamicValue.txt
index 97ab7aa..1aee9b3 100644
--- a/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetweenDynamicValue.txt
+++ b/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetweenDynamicValue.txt
@@ -22,6 +22,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.Filter<TypeName>ColumnBetween;
import org.apache.hadoop.hive.ql.plan.DynamicValue;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -79,12 +80,12 @@ public class <ClassName> extends Filter<TypeName>ColumnBetween {
if (lVal == null || rVal == null) {
isLeftOrRightNull = true;
} else {
- <VectorType> min = PrimitiveObjectInspectorUtils.<GetPrimitiveMethod>(
- lVal, leftDynamicValue.getObjectInspector())<GetValueMethod>;
+ <VectorType> min = <ConversionMethod>(PrimitiveObjectInspectorUtils.<GetPrimitiveMethod>(
+ lVal, leftDynamicValue.getObjectInspector())<GetValueMethod>);
setLeftValue(min);
- <VectorType> max = PrimitiveObjectInspectorUtils.<GetPrimitiveMethod>(
- rVal, rightDynamicValue.getObjectInspector())<GetValueMethod>;
+ <VectorType> max = <ConversionMethod>(PrimitiveObjectInspectorUtils.<GetPrimitiveMethod>(
+ rVal, rightDynamicValue.getObjectInspector())<GetValueMethod>);
setRightValue(max);
}
initialized = true;
http://git-wip-us.apache.org/repos/asf/hive/blob/f3790ce2/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 4802489..8164684 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -2240,7 +2240,7 @@ public class VectorizationContext {
cl = FilterDecimalColumnNotBetween.class;
} else if (isDateFamily(colType) && !notKeywordPresent) {
cl = (hasDynamicValues ?
- FilterLongColumnBetweenDynamicValue.class :
+ FilterDateColumnBetweenDynamicValue.class :
FilterLongColumnBetween.class);
} else if (isDateFamily(colType) && notKeywordPresent) {
cl = FilterLongColumnNotBetween.class;
http://git-wip-us.apache.org/repos/asf/hive/blob/f3790ce2/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q b/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q
new file mode 100644
index 0000000..446407d
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q
@@ -0,0 +1,50 @@
+set hive.compute.query.using.stats=false;
+set hive.mapred.mode=nonstrict;
+set hive.explain.user=false;
+set hive.optimize.ppd=true;
+set hive.ppd.remove.duplicatefilters=true;
+set hive.tez.dynamic.partition.pruning=true;
+set hive.tez.dynamic.semijoin.reduction=true;
+set hive.optimize.metadataonly=false;
+set hive.optimize.index.filter=true;
+
+set hive.vectorized.adaptor.usage.mode=none;
+set hive.vectorized.execution.enabled=true;
+
+-- Create Tables
+create table dsrv2_big stored as orc as
+ select
+ cast(L_PARTKEY as bigint) as partkey_bigint,
+ cast(L_PARTKEY as decimal(10,1)) as partkey_decimal,
+ cast(L_PARTKEY as double) as partkey_double,
+ cast(l_shipdate as date) as shipdate_date,
+ cast(cast(l_shipdate as date) as timestamp) as shipdate_ts
+ from lineitem;
+create table dsrv2_small stored as orc as select * from dsrv2_big limit 20;
+analyze table dsrv2_big compute statistics;
+analyze table dsrv2_small compute statistics;
+analyze table dsrv2_big compute statistics for columns;
+analyze table dsrv2_small compute statistics for columns;
+
+-- single key (bigint)
+EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.partkey_bigint = b.partkey_bigint);
+select count(*) from dsrv2_big a join dsrv2_small b on (a.partkey_bigint = b.partkey_bigint);
+
+-- single key (decimal)
+EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.partkey_decimal = b.partkey_decimal);
+select count(*) from dsrv2_big a join dsrv2_small b on (a.partkey_decimal = b.partkey_decimal);
+
+-- single key (double)
+EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.partkey_double = b.partkey_double);
+select count(*) from dsrv2_big a join dsrv2_small b on (a.partkey_double = b.partkey_double);
+
+-- single key (date)
+EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_date = b.shipdate_date);
+select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_date = b.shipdate_date);
+
+-- single key (timestamp)
+EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_ts = b.shipdate_ts);
+select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_ts = b.shipdate_ts);
+
+drop table dsrv2_big;
+drop table dsrv2_small;
http://git-wip-us.apache.org/repos/asf/hive/blob/f3790ce2/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out
new file mode 100644
index 0000000..27d8152
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out
@@ -0,0 +1,772 @@
+PREHOOK: query: create table dsrv2_big stored as orc as
+ select
+ cast(L_PARTKEY as bigint) as partkey_bigint,
+ cast(L_PARTKEY as decimal(10,1)) as partkey_decimal,
+ cast(L_PARTKEY as double) as partkey_double,
+ cast(l_shipdate as date) as shipdate_date,
+ cast(cast(l_shipdate as date) as timestamp) as shipdate_ts
+ from lineitem
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@lineitem
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dsrv2_big
+POSTHOOK: query: create table dsrv2_big stored as orc as
+ select
+ cast(L_PARTKEY as bigint) as partkey_bigint,
+ cast(L_PARTKEY as decimal(10,1)) as partkey_decimal,
+ cast(L_PARTKEY as double) as partkey_double,
+ cast(l_shipdate as date) as shipdate_date,
+ cast(cast(l_shipdate as date) as timestamp) as shipdate_ts
+ from lineitem
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@lineitem
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dsrv2_big
+POSTHOOK: Lineage: dsrv2_big.partkey_bigint EXPRESSION [(lineitem)lineitem.FieldSchema(name:l_partkey, type:int, comment:null), ]
+POSTHOOK: Lineage: dsrv2_big.partkey_decimal EXPRESSION [(lineitem)lineitem.FieldSchema(name:l_partkey, type:int, comment:null), ]
+POSTHOOK: Lineage: dsrv2_big.partkey_double EXPRESSION [(lineitem)lineitem.FieldSchema(name:l_partkey, type:int, comment:null), ]
+POSTHOOK: Lineage: dsrv2_big.shipdate_date EXPRESSION [(lineitem)lineitem.FieldSchema(name:l_shipdate, type:string, comment:null), ]
+POSTHOOK: Lineage: dsrv2_big.shipdate_ts EXPRESSION [(lineitem)lineitem.FieldSchema(name:l_shipdate, type:string, comment:null), ]
+PREHOOK: query: create table dsrv2_small stored as orc as select * from dsrv2_big limit 20
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@dsrv2_big
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dsrv2_small
+POSTHOOK: query: create table dsrv2_small stored as orc as select * from dsrv2_big limit 20
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@dsrv2_big
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dsrv2_small
+POSTHOOK: Lineage: dsrv2_small.partkey_bigint SIMPLE [(dsrv2_big)dsrv2_big.FieldSchema(name:partkey_bigint, type:bigint, comment:null), ]
+POSTHOOK: Lineage: dsrv2_small.partkey_decimal SIMPLE [(dsrv2_big)dsrv2_big.FieldSchema(name:partkey_decimal, type:decimal(10,1), comment:null), ]
+POSTHOOK: Lineage: dsrv2_small.partkey_double SIMPLE [(dsrv2_big)dsrv2_big.FieldSchema(name:partkey_double, type:double, comment:null), ]
+POSTHOOK: Lineage: dsrv2_small.shipdate_date SIMPLE [(dsrv2_big)dsrv2_big.FieldSchema(name:shipdate_date, type:date, comment:null), ]
+POSTHOOK: Lineage: dsrv2_small.shipdate_ts SIMPLE [(dsrv2_big)dsrv2_big.FieldSchema(name:shipdate_ts, type:timestamp, comment:null), ]
+PREHOOK: query: analyze table dsrv2_big compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dsrv2_big
+PREHOOK: Output: default@dsrv2_big
+POSTHOOK: query: analyze table dsrv2_big compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dsrv2_big
+POSTHOOK: Output: default@dsrv2_big
+PREHOOK: query: analyze table dsrv2_small compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dsrv2_small
+PREHOOK: Output: default@dsrv2_small
+POSTHOOK: query: analyze table dsrv2_small compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dsrv2_small
+POSTHOOK: Output: default@dsrv2_small
+PREHOOK: query: analyze table dsrv2_big compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dsrv2_big
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table dsrv2_big compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dsrv2_big
+#### A masked pattern was here ####
+PREHOOK: query: analyze table dsrv2_small compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dsrv2_small
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table dsrv2_small compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dsrv2_small
+#### A masked pattern was here ####
+PREHOOK: query: EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.partkey_bigint = b.partkey_bigint)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.partkey_bigint = b.partkey_bigint)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Reducer 5 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ filterExpr: (partkey_bigint is not null and partkey_bigint BETWEEN DynamicValue(RS_7_b_partkey_bigint_min) AND DynamicValue(RS_7_b_partkey_bigint_max) and in_bloom_filter(partkey_bigint, DynamicValue(RS_7_b_partkey_bigint_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (partkey_bigint is not null and partkey_bigint BETWEEN DynamicValue(RS_7_b_partkey_bigint_min) AND DynamicValue(RS_7_b_partkey_bigint_max) and in_bloom_filter(partkey_bigint, DynamicValue(RS_7_b_partkey_bigint_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: partkey_bigint (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: b
+ filterExpr: partkey_bigint is not null (type: boolean)
+ Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: partkey_bigint is not null (type: boolean)
+ Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: partkey_bigint (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=18)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: bigint)
+ 1 _col0 (type: bigint)
+ Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=18)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from dsrv2_big a join dsrv2_small b on (a.partkey_bigint = b.partkey_bigint)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dsrv2_big
+PREHOOK: Input: default@dsrv2_small
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from dsrv2_big a join dsrv2_small b on (a.partkey_bigint = b.partkey_bigint)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dsrv2_big
+POSTHOOK: Input: default@dsrv2_small
+#### A masked pattern was here ####
+20
+PREHOOK: query: EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.partkey_decimal = b.partkey_decimal)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.partkey_decimal = b.partkey_decimal)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Reducer 5 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ filterExpr: (partkey_decimal is not null and partkey_decimal BETWEEN DynamicValue(RS_7_b_partkey_decimal_min) AND DynamicValue(RS_7_b_partkey_decimal_max) and in_bloom_filter(partkey_decimal, DynamicValue(RS_7_b_partkey_decimal_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 100 Data size: 11200 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (partkey_decimal is not null and partkey_decimal BETWEEN DynamicValue(RS_7_b_partkey_decimal_min) AND DynamicValue(RS_7_b_partkey_decimal_max) and in_bloom_filter(partkey_decimal, DynamicValue(RS_7_b_partkey_decimal_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 100 Data size: 11200 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: partkey_decimal (type: decimal(10,1))
+ outputColumnNames: _col0
+ Statistics: Num rows: 100 Data size: 11200 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: decimal(10,1))
+ sort order: +
+ Map-reduce partition columns: _col0 (type: decimal(10,1))
+ Statistics: Num rows: 100 Data size: 11200 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: all inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: b
+ filterExpr: partkey_decimal is not null (type: boolean)
+ Statistics: Num rows: 20 Data size: 2240 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: partkey_decimal is not null (type: boolean)
+ Statistics: Num rows: 20 Data size: 2240 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: partkey_decimal (type: decimal(10,1))
+ outputColumnNames: _col0
+ Statistics: Num rows: 20 Data size: 2240 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: decimal(10,1))
+ sort order: +
+ Map-reduce partition columns: _col0 (type: decimal(10,1))
+ Statistics: Num rows: 20 Data size: 2240 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: decimal(10,1))
+ outputColumnNames: _col0
+ Statistics: Num rows: 20 Data size: 2240 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=16)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: decimal(10,1)), _col1 (type: decimal(10,1)), _col2 (type: binary)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: decimal(10,1))
+ 1 _col0 (type: decimal(10,1))
+ Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=16)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: decimal(10,1)), _col1 (type: decimal(10,1)), _col2 (type: binary)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from dsrv2_big a join dsrv2_small b on (a.partkey_decimal = b.partkey_decimal)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dsrv2_big
+PREHOOK: Input: default@dsrv2_small
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from dsrv2_big a join dsrv2_small b on (a.partkey_decimal = b.partkey_decimal)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dsrv2_big
+POSTHOOK: Input: default@dsrv2_small
+#### A masked pattern was here ####
+20
+PREHOOK: query: EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.partkey_double = b.partkey_double)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.partkey_double = b.partkey_double)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Reducer 5 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ filterExpr: (partkey_double is not null and partkey_double BETWEEN DynamicValue(RS_7_b_partkey_double_min) AND DynamicValue(RS_7_b_partkey_double_max) and in_bloom_filter(partkey_double, DynamicValue(RS_7_b_partkey_double_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (partkey_double is not null and partkey_double BETWEEN DynamicValue(RS_7_b_partkey_double_min) AND DynamicValue(RS_7_b_partkey_double_max) and in_bloom_filter(partkey_double, DynamicValue(RS_7_b_partkey_double_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: partkey_double (type: double)
+ outputColumnNames: _col0
+ Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: double)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: double)
+ Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: b
+ filterExpr: partkey_double is not null (type: boolean)
+ Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: partkey_double is not null (type: boolean)
+ Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: partkey_double (type: double)
+ outputColumnNames: _col0
+ Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: double)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: double)
+ Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: double)
+ outputColumnNames: _col0
+ Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=30)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: binary)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: double)
+ 1 _col0 (type: double)
+ Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=30)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: binary)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from dsrv2_big a join dsrv2_small b on (a.partkey_double = b.partkey_double)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dsrv2_big
+PREHOOK: Input: default@dsrv2_small
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from dsrv2_big a join dsrv2_small b on (a.partkey_double = b.partkey_double)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dsrv2_big
+POSTHOOK: Input: default@dsrv2_small
+#### A masked pattern was here ####
+20
+PREHOOK: query: EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_date = b.shipdate_date)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_date = b.shipdate_date)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Reducer 5 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ filterExpr: (shipdate_date is not null and shipdate_date BETWEEN DynamicValue(RS_7_b_shipdate_date_min) AND DynamicValue(RS_7_b_shipdate_date_max) and in_bloom_filter(shipdate_date, DynamicValue(RS_7_b_shipdate_date_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 100 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (shipdate_date is not null and shipdate_date BETWEEN DynamicValue(RS_7_b_shipdate_date_min) AND DynamicValue(RS_7_b_shipdate_date_max) and in_bloom_filter(shipdate_date, DynamicValue(RS_7_b_shipdate_date_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 100 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: shipdate_date (type: date)
+ outputColumnNames: _col0
+ Statistics: Num rows: 100 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: date)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: date)
+ Statistics: Num rows: 100 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: b
+ filterExpr: shipdate_date is not null (type: boolean)
+ Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: shipdate_date is not null (type: boolean)
+ Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: shipdate_date (type: date)
+ outputColumnNames: _col0
+ Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: date)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: date)
+ Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: date)
+ outputColumnNames: _col0
+ Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=20)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: date)
+ 1 _col0 (type: date)
+ Statistics: Num rows: 2000 Data size: 16000 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=20)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_date = b.shipdate_date)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dsrv2_big
+PREHOOK: Input: default@dsrv2_small
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_date = b.shipdate_date)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dsrv2_big
+POSTHOOK: Input: default@dsrv2_small
+#### A masked pattern was here ####
+23
+PREHOOK: query: EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_ts = b.shipdate_ts)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_ts = b.shipdate_ts)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Reducer 5 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ filterExpr: (shipdate_ts is not null and shipdate_ts BETWEEN DynamicValue(RS_7_b_shipdate_ts_min) AND DynamicValue(RS_7_b_shipdate_ts_max) and in_bloom_filter(shipdate_ts, DynamicValue(RS_7_b_shipdate_ts_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 100 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (shipdate_ts is not null and shipdate_ts BETWEEN DynamicValue(RS_7_b_shipdate_ts_min) AND DynamicValue(RS_7_b_shipdate_ts_max) and in_bloom_filter(shipdate_ts, DynamicValue(RS_7_b_shipdate_ts_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 100 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: shipdate_ts (type: timestamp)
+ outputColumnNames: _col0
+ Statistics: Num rows: 100 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: timestamp)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: timestamp)
+ Statistics: Num rows: 100 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: all inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: b
+ filterExpr: shipdate_ts is not null (type: boolean)
+ Statistics: Num rows: 20 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: shipdate_ts is not null (type: boolean)
+ Statistics: Num rows: 20 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: shipdate_ts (type: timestamp)
+ outputColumnNames: _col0
+ Statistics: Num rows: 20 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: timestamp)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: timestamp)
+ Statistics: Num rows: 20 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: timestamp)
+ outputColumnNames: _col0
+ Statistics: Num rows: 20 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=20)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: binary)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: timestamp)
+ 1 _col0 (type: timestamp)
+ Statistics: Num rows: 2000 Data size: 16000 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=20)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: binary)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_ts = b.shipdate_ts)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dsrv2_big
+PREHOOK: Input: default@dsrv2_small
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_ts = b.shipdate_ts)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dsrv2_big
+POSTHOOK: Input: default@dsrv2_small
+#### A masked pattern was here ####
+23
+PREHOOK: query: drop table dsrv2_big
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dsrv2_big
+PREHOOK: Output: default@dsrv2_big
+POSTHOOK: query: drop table dsrv2_big
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@dsrv2_big
+POSTHOOK: Output: default@dsrv2_big
+PREHOOK: query: drop table dsrv2_small
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dsrv2_small
+PREHOOK: Output: default@dsrv2_small
+POSTHOOK: query: drop table dsrv2_small
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@dsrv2_small
+POSTHOOK: Output: default@dsrv2_small
http://git-wip-us.apache.org/repos/asf/hive/blob/f3790ce2/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java
----------------------------------------------------------------------
diff --git a/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java b/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java
index 22b8752..55cfb7b 100644
--- a/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java
+++ b/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java
@@ -796,6 +796,7 @@ public class GenVectorCode extends Task {
{"FilterColumnBetweenDynamicValue", "string", ""},
{"FilterColumnBetweenDynamicValue", "char", ""},
{"FilterColumnBetweenDynamicValue", "varchar", ""},
+ {"FilterColumnBetweenDynamicValue", "date", ""},
{"FilterColumnBetweenDynamicValue", "timestamp", ""},
{"ColumnCompareColumn", "Equal", "long", "double", "=="},
@@ -1402,42 +1403,58 @@ public class GenVectorCode extends Task {
String vectorType;
String getPrimitiveMethod;
String getValueMethod;
+ String conversionMethod;
if (operandType.equals("long")) {
defaultValue = "0";
vectorType = "long";
getPrimitiveMethod = "getLong";
getValueMethod = "";
+ conversionMethod = "";
} else if (operandType.equals("double")) {
defaultValue = "0";
vectorType = "double";
getPrimitiveMethod = "getDouble";
getValueMethod = "";
+ conversionMethod = "";
} else if (operandType.equals("decimal")) {
defaultValue = "null";
vectorType = "HiveDecimal";
getPrimitiveMethod = "getHiveDecimal";
getValueMethod = "";
+ conversionMethod = "";
} else if (operandType.equals("string")) {
defaultValue = "null";
vectorType = "byte[]";
getPrimitiveMethod = "getString";
getValueMethod = ".getBytes()";
+ conversionMethod = "";
} else if (operandType.equals("char")) {
defaultValue = "null";
vectorType = "byte[]";
getPrimitiveMethod = "getHiveChar";
getValueMethod = ".getStrippedValue().getBytes()"; // Does vectorization use stripped char values?
+ conversionMethod = "";
} else if (operandType.equals("varchar")) {
defaultValue = "null";
vectorType = "byte[]";
getPrimitiveMethod = "getHiveVarchar";
getValueMethod = ".getValue().getBytes()";
+ conversionMethod = "";
+ } else if (operandType.equals("date")) {
+ defaultValue = "0";
+ vectorType = "long";
+ getPrimitiveMethod = "getDate";
+ getValueMethod = "";
+ conversionMethod = "DateWritable.dateToDays";
+ // Special case - Date requires its own specific BetweenDynamicValue class, but derives from FilterLongColumnBetween
+ typeName = "Long";
} else if (operandType.equals("timestamp")) {
defaultValue = "null";
vectorType = "Timestamp";
getPrimitiveMethod = "getTimestamp";
getValueMethod = "";
+ conversionMethod = "";
} else {
throw new IllegalArgumentException("Type " + operandType + " not supported");
}
@@ -1451,6 +1468,7 @@ public class GenVectorCode extends Task {
templateString = templateString.replaceAll("<VectorType>", vectorType);
templateString = templateString.replaceAll("<GetPrimitiveMethod>", getPrimitiveMethod);
templateString = templateString.replaceAll("<GetValueMethod>", getValueMethod);
+ templateString = templateString.replaceAll("<ConversionMethod>", conversionMethod);
writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory,
className, templateString);
[06/50] [abbrv] hive git commit: HIVE-15710: HS2 Stopped when running
in background (Rui reviewed by Ferdinand, Xuefu and Mohit)
Posted by se...@apache.org.
HIVE-15710: HS2 Stopped when running in background (Rui reviewed by Ferdinand, Xuefu and Mohit)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/90688bde
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/90688bde
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/90688bde
Branch: refs/heads/hive-14535
Commit: 90688bde9b1172fb2a042905dba645fbeda92510
Parents: ef61a9b
Author: Rui Li <li...@apache.org>
Authored: Fri Feb 17 11:08:40 2017 +0800
Committer: Rui Li <sh...@cn.ibm.com>
Committed: Fri Feb 17 11:08:40 2017 +0800
----------------------------------------------------------------------
bin/beeline | 5 -----
bin/ext/cli.sh | 8 --------
bin/hive | 7 +++++++
3 files changed, 7 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/90688bde/bin/beeline
----------------------------------------------------------------------
diff --git a/bin/beeline b/bin/beeline
index 7b974d4..d247c39 100644
--- a/bin/beeline
+++ b/bin/beeline
@@ -18,9 +18,4 @@
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
-# If process is backgrounded, don't change terminal settings
-if [[ ( ! $(ps -o stat= -p $$) =~ "+" ) && ! ( -p /dev/stdin ) ]]; then
- export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS -Djline.terminal=jline.UnsupportedTerminal"
-fi
-
. "$bin"/hive --service beeline "$@"
http://git-wip-us.apache.org/repos/asf/hive/blob/90688bde/bin/ext/cli.sh
----------------------------------------------------------------------
diff --git a/bin/ext/cli.sh b/bin/ext/cli.sh
index 4933c2e..87329f3 100644
--- a/bin/ext/cli.sh
+++ b/bin/ext/cli.sh
@@ -22,13 +22,6 @@ if [ -z "$USE_DEPRECATED_CLI" ] || [ "$USE_DEPRECATED_CLI" != "false" ]; then
USE_DEPRECATED_CLI="true"
fi
-updateBeelineOpts() {
- # If process is backgrounded, don't change terminal settings
- if [[ ( ! $(ps -o stat= -p $$) =~ *+ ) && ! ( -p /dev/stdin ) ]]; then
- export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS -Djline.terminal=jline.UnsupportedTerminal"
- fi
-}
-
updateCli() {
if [ "$USE_DEPRECATED_CLI" == "true" ]; then
CLASS=org.apache.hadoop.hive.cli.CliDriver
@@ -37,7 +30,6 @@ updateCli() {
export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS -Dlog4j.configurationFile=beeline-log4j2.properties"
CLASS=org.apache.hive.beeline.cli.HiveCli
JAR=hive-beeline-*.jar
- updateBeelineOpts
fi
}
http://git-wip-us.apache.org/repos/asf/hive/blob/90688bde/bin/hive
----------------------------------------------------------------------
diff --git a/bin/hive b/bin/hive
index 50fbddd..e1ee206 100755
--- a/bin/hive
+++ b/bin/hive
@@ -344,6 +344,13 @@ else
export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS -Dlog4j.configurationFile=hive-log4j2.properties -Djava.util.logging.config.file=$bin/../conf/parquet-logging.properties "
fi
+if [[ "$SERVICE" =~ ^(hiveserver2|beeline|cli)$ ]] ; then
+ # If process is backgrounded, don't change terminal settings
+ if [[ ( ! $(ps -o stat= -p $$) =~ "+" ) && ! ( -p /dev/stdin ) ]]; then
+ export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS -Djline.terminal=jline.UnsupportedTerminal"
+ fi
+fi
+
if [ "$TORUN" = "" ] ; then
echo "Service $SERVICE not found"
echo "Available Services: $SERVICE_LIST"
[46/50] [abbrv] hive git commit: HIVE-15830. Allow additional ACLs
for tez jobs. (Siddharth Seth, reviewed by Daniel Dai)
Posted by se...@apache.org.
HIVE-15830. Allow additional ACLs for tez jobs. (Siddharth Seth, reviewed by Daniel Dai)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/657236ee
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/657236ee
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/657236ee
Branch: refs/heads/hive-14535
Commit: 657236ee0cce12d6e02bf0c2c406fc5dbf7b3791
Parents: 539d3c6
Author: Siddharth Seth <ss...@apache.org>
Authored: Thu Feb 23 14:25:07 2017 -0800
Committer: Siddharth Seth <ss...@apache.org>
Committed: Thu Feb 23 14:25:07 2017 -0800
----------------------------------------------------------------------
.../org/apache/hadoop/hive/conf/HiveConf.java | 2 +
.../common/util/ACLConfigurationParser.java | 167 +++++++++++++++++++
.../common/util/TestACLConfigurationParser.java | 99 +++++++++++
.../java/org/apache/hadoop/hive/ql/Driver.java | 3 +-
.../apache/hadoop/hive/ql/exec/Utilities.java | 23 +++
.../hive/ql/exec/tez/TezSessionState.java | 27 +++
.../apache/hadoop/hive/ql/exec/tez/TezTask.java | 34 +++-
.../apache/hadoop/hive/ql/hooks/ATSHook.java | 2 +-
.../hadoop/hive/ql/exec/tez/TestTezTask.java | 6 +
9 files changed, 354 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/657236ee/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 46be3fb..f0c129b 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1462,6 +1462,8 @@ public class HiveConf extends Configuration {
HIVETEZLOGLEVEL("hive.tez.log.level", "INFO",
"The log level to use for tasks executing as part of the DAG.\n" +
"Used only if hive.tez.java.opts is used to configure Java options."),
+ HIVETEZHS2USERACCESS("hive.tez.hs2.user.access", true,
+ "Whether to grant access to the hs2/hive user for queries"),
HIVEQUERYNAME ("hive.query.name", null,
"This named is used by Tez to set the dag name. This name in turn will appear on \n" +
"the Tez UI representing the work that was done."),
http://git-wip-us.apache.org/repos/asf/hive/blob/657236ee/common/src/java/org/apache/hive/common/util/ACLConfigurationParser.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hive/common/util/ACLConfigurationParser.java b/common/src/java/org/apache/hive/common/util/ACLConfigurationParser.java
new file mode 100644
index 0000000..cb1eaf9
--- /dev/null
+++ b/common/src/java/org/apache/hive/common/util/ACLConfigurationParser.java
@@ -0,0 +1,167 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.common.util;
+
+import java.util.Collections;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import org.apache.commons.lang3.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.classification.InterfaceAudience.Private;
+import org.apache.hadoop.conf.Configuration;
+
+import com.google.common.collect.Sets;
+
+/**
+ * Parser for extracting ACL information from Configs
+ */
+@Private
+public class ACLConfigurationParser {
+
+ private static final Logger LOG =
+ LoggerFactory.getLogger(ACLConfigurationParser.class);
+
+ private static final String WILDCARD_ACL_VALUE = "*";
+ private static final Pattern splitPattern = Pattern.compile("\\s+");
+
+ private final Set<String> allowedUsers;
+ private final Set<String> allowedGroups;
+
+ public ACLConfigurationParser(Configuration conf, String confPropertyName) {
+ allowedUsers = Sets.newLinkedHashSet();
+ allowedGroups = Sets.newLinkedHashSet();
+ parse(conf, confPropertyName);
+ }
+
+
+ private boolean isWildCard(String aclStr) {
+ return aclStr.trim().equals(WILDCARD_ACL_VALUE);
+ }
+
+ private void parse(Configuration conf, String configProperty) {
+ String aclsStr = conf.get(configProperty);
+ if (aclsStr == null || aclsStr.isEmpty()) {
+ return;
+ }
+ if (isWildCard(aclsStr)) {
+ allowedUsers.add(WILDCARD_ACL_VALUE);
+ return;
+ }
+
+ final String[] splits = splitPattern.split(aclsStr);
+ int counter = -1;
+ String userListStr = null;
+ String groupListStr = null;
+ for (String s : splits) {
+ if (s.isEmpty()) {
+ if (userListStr != null) {
+ continue;
+ }
+ }
+ ++counter;
+ if (counter == 0) {
+ userListStr = s;
+ } else if (counter == 1) {
+ groupListStr = s;
+ } else {
+ LOG.warn("Invalid configuration specified for " + configProperty
+ + ", ignoring configured ACLs, value=" + aclsStr);
+ return;
+ }
+ }
+
+ if (userListStr == null) {
+ return;
+ }
+ if (userListStr.length() >= 1) {
+ allowedUsers.addAll(
+ org.apache.hadoop.util.StringUtils.getTrimmedStringCollection(userListStr));
+ }
+ if (groupListStr != null && groupListStr.length() >= 1) {
+ allowedGroups.addAll(
+ org.apache.hadoop.util.StringUtils.getTrimmedStringCollection(groupListStr));
+ }
+ }
+
+ public Set<String> getAllowedUsers() {
+ return Collections.unmodifiableSet(allowedUsers);
+ }
+
+ public Set<String> getAllowedGroups() {
+ return Collections.unmodifiableSet(allowedGroups);
+ }
+
+ public void addAllowedUser(String user) {
+ if (StringUtils.isBlank(user)) {
+ return;
+ }
+ if (allowedUsers.contains(WILDCARD_ACL_VALUE)) {
+ return;
+ }
+ if (user.equals(WILDCARD_ACL_VALUE)) {
+ allowedUsers.clear();
+ allowedGroups.clear();
+ }
+ allowedUsers.add(user);
+ }
+
+ public void addAllowedGroup(String group) {
+ allowedGroups.add(group);
+ }
+
+ public String toAclString() {
+ return toString();
+ }
+
+ @Override
+ public String toString() {
+ if (getAllowedUsers().contains(WILDCARD_ACL_VALUE)) {
+ return WILDCARD_ACL_VALUE;
+ } else {
+ if (allowedUsers.size() == 0 && allowedGroups.size() == 0) {
+ return " ";
+ }
+ String userString = constructCsv(allowedUsers);
+ String groupString = "";
+ if (allowedGroups.size() > 0) {
+ groupString = " " + constructCsv(allowedGroups);
+ }
+ return userString + groupString;
+ }
+ }
+
+ private String constructCsv(Set<String> inSet) {
+ StringBuilder sb = new StringBuilder();
+ if (inSet != null) {
+ boolean isFirst = true;
+ for (String s : inSet) {
+ if (!isFirst) {
+ sb.append(",");
+ } else {
+ isFirst = false;
+ }
+ sb.append(s);
+ }
+ }
+ return sb.toString();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/657236ee/common/src/test/org/apache/hive/common/util/TestACLConfigurationParser.java
----------------------------------------------------------------------
diff --git a/common/src/test/org/apache/hive/common/util/TestACLConfigurationParser.java b/common/src/test/org/apache/hive/common/util/TestACLConfigurationParser.java
new file mode 100644
index 0000000..f23573f
--- /dev/null
+++ b/common/src/test/org/apache/hive/common/util/TestACLConfigurationParser.java
@@ -0,0 +1,99 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.common.util;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.hadoop.conf.Configuration;
+import org.junit.Test;
+
+public class TestACLConfigurationParser {
+
+
+ @Test (timeout = 10_000L)
+ public void test() {
+
+ ACLConfigurationParser aclConf;
+ Configuration conf = new Configuration();
+ conf.set("ACL_ALL_ACCESS", "*");
+ aclConf = new ACLConfigurationParser(conf, "ACL_ALL_ACCESS");
+ assertEquals(1, aclConf.getAllowedUsers().size());
+ assertTrue(aclConf.getAllowedUsers().contains("*"));
+ assertEquals(0, aclConf.getAllowedGroups().size());
+ assertEquals("*", aclConf.toAclString());
+
+ conf.set("ACL_INVALID1", "u1, u2, u3");
+ aclConf = new ACLConfigurationParser(conf, "ACL_INVALID1");
+ assertEquals(0, aclConf.getAllowedUsers().size());
+ assertEquals(0, aclConf.getAllowedGroups().size());
+ assertEquals(" ", aclConf.toAclString());
+
+ conf.set("ACL_NONE", " ");
+ aclConf = new ACLConfigurationParser(conf, "ACL_NONE");
+ assertEquals(0, aclConf.getAllowedUsers().size());
+ assertEquals(0, aclConf.getAllowedGroups().size());
+ assertEquals(" ", aclConf.toAclString());
+
+ conf.set("ACL_VALID1", "user1,user2");
+ aclConf = new ACLConfigurationParser(conf, "ACL_VALID1");
+ assertEquals(2, aclConf.getAllowedUsers().size());
+ assertTrue(aclConf.getAllowedUsers().contains("user1"));
+ assertTrue(aclConf.getAllowedUsers().contains("user2"));
+ assertEquals(0, aclConf.getAllowedGroups().size());
+ assertEquals("user1,user2", aclConf.toAclString());
+
+ conf.set("ACL_VALID2", "user1,user2 group1,group2");
+ aclConf = new ACLConfigurationParser(conf, "ACL_VALID2");
+ assertEquals(2, aclConf.getAllowedUsers().size());
+ assertTrue(aclConf.getAllowedUsers().contains("user1"));
+ assertTrue(aclConf.getAllowedUsers().contains("user2"));
+ assertEquals(2, aclConf.getAllowedGroups().size());
+ assertTrue(aclConf.getAllowedGroups().contains("group1"));
+ assertTrue(aclConf.getAllowedGroups().contains("group2"));
+ assertEquals("user1,user2 group1,group2", aclConf.toAclString());
+
+
+ conf.set("ACL_VALID3", "user1 group1");
+ aclConf = new ACLConfigurationParser(conf, "ACL_VALID3");
+ assertEquals(1, aclConf.getAllowedUsers().size());
+ assertTrue(aclConf.getAllowedUsers().contains("user1"));
+ assertEquals(1, aclConf.getAllowedGroups().size());
+ assertTrue(aclConf.getAllowedGroups().contains("group1"));
+ assertEquals("user1 group1", aclConf.toAclString());
+
+ aclConf.addAllowedUser("user2");
+ assertEquals(2, aclConf.getAllowedUsers().size());
+ assertTrue(aclConf.getAllowedUsers().contains("user1"));
+ assertTrue(aclConf.getAllowedUsers().contains("user2"));
+ assertEquals("user1,user2 group1", aclConf.toAclString());
+
+ aclConf.addAllowedGroup("group2");
+ assertEquals(2, aclConf.getAllowedGroups().size());
+ assertTrue(aclConf.getAllowedGroups().contains("group1"));
+ assertTrue(aclConf.getAllowedGroups().contains("group2"));
+ assertEquals("user1,user2 group1,group2", aclConf.toAclString());
+
+ aclConf.addAllowedUser("*");
+ assertEquals(1, aclConf.getAllowedUsers().size());
+ assertTrue(aclConf.getAllowedUsers().contains("*"));
+ assertTrue(aclConf.getAllowedGroups().isEmpty());
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/657236ee/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
index 592b1f1..cdf24d4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
@@ -1747,7 +1747,8 @@ public class Driver implements CommandProcessor {
resStream = null;
SessionState ss = SessionState.get();
- hookContext = new HookContext(plan, queryState, ctx.getPathToCS(), ss.getUserName(),
+
+ hookContext = new HookContext(plan, queryState, ctx.getPathToCS(), ss.getUserFromAuthenticator(),
ss.getUserIpAddress(), InetAddress.getLocalHost().getHostAddress(), operationId,
ss.getSessionId(), Thread.currentThread().getName(), ss.isHiveServerQuery(), perfLogger);
hookContext.setHookType(HookContext.HookType.PRE_EXEC_HOOK);
http://git-wip-us.apache.org/repos/asf/hive/blob/657236ee/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index e81cbce..3484493 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@ -143,6 +143,7 @@ import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.Shell;
+import org.apache.hive.common.util.ACLConfigurationParser;
import org.apache.hive.common.util.ReflectionUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -3794,4 +3795,26 @@ public final class Utilities {
String suffix = "KMGTPE".charAt(exp-1) + "";
return String.format("%.2f%sB", bytes / Math.pow(unit, exp), suffix);
}
+
+
+ public static String getAclStringWithHiveModification(Configuration tezConf,
+ String propertyName,
+ boolean addHs2User,
+ String user,
+ String hs2User) throws
+ IOException {
+
+ // Start with initial ACLs
+ ACLConfigurationParser aclConf =
+ new ACLConfigurationParser(tezConf, propertyName);
+
+ // Always give access to the user
+ aclConf.addAllowedUser(user);
+
+ // Give access to the process user if the config is set.
+ if (addHs2User && hs2User != null) {
+ aclConf.addAllowedUser(hs2User);
+ }
+ return aclConf.toAclString();
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/657236ee/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
index 62f65c2..ed1ba9c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
@@ -308,6 +308,8 @@ public class TezSessionState {
tezConfig.setInt(TezConfiguration.TEZ_AM_SESSION_MIN_HELD_CONTAINERS, n);
}
+ setupSessionAcls(tezConfig, conf);
+
final TezClient session = TezClient.newBuilder("HIVE-" + sessionId, tezConfig)
.setIsSession(true).setLocalResources(commonLocalResources)
.setCredentials(llapCredentials).setServicePluginDescriptor(servicePluginsDescriptor)
@@ -433,6 +435,31 @@ public class TezSessionState {
}
}
+ private void setupSessionAcls(Configuration tezConf, HiveConf hiveConf) throws
+ IOException {
+
+ String user = SessionState.getUserFromAuthenticator();
+ UserGroupInformation loginUserUgi = UserGroupInformation.getLoginUser();
+ String loginUser =
+ loginUserUgi == null ? null : loginUserUgi.getShortUserName();
+ boolean addHs2User =
+ HiveConf.getBoolVar(hiveConf, ConfVars.HIVETEZHS2USERACCESS);
+
+ String viewStr = Utilities.getAclStringWithHiveModification(tezConf,
+ TezConfiguration.TEZ_AM_VIEW_ACLS, addHs2User, user, loginUser);
+ String modifyStr = Utilities.getAclStringWithHiveModification(tezConf,
+ TezConfiguration.TEZ_AM_MODIFY_ACLS, addHs2User, user, loginUser);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(
+ "Setting Tez Session access for sessionId={} with viewAclString={}, modifyStr={}",
+ SessionState.get().getSessionId(), viewStr, modifyStr);
+ }
+
+ tezConf.set(TezConfiguration.TEZ_AM_VIEW_ACLS, viewStr);
+ tezConf.set(TezConfiguration.TEZ_AM_MODIFY_ACLS, modifyStr);
+ }
+
public void refreshLocalResourcesFromConf(HiveConf conf)
throws IOException, LoginException, IllegalArgumentException, URISyntaxException, TezException {
http://git-wip-us.apache.org/repos/asf/hive/blob/657236ee/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
index 58f0b33..740e41b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
@@ -33,6 +33,7 @@ import java.util.Set;
import javax.annotation.Nullable;
import org.apache.hadoop.classification.InterfaceAudience.Private;
+import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.metrics.common.Metrics;
@@ -57,6 +58,7 @@ import org.apache.hadoop.hive.ql.plan.UnionWork;
import org.apache.hadoop.hive.ql.plan.api.StageType;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.records.ApplicationReport;
import org.apache.hadoop.yarn.api.records.LocalResource;
@@ -71,6 +73,7 @@ import org.apache.tez.dag.api.DAG;
import org.apache.tez.dag.api.Edge;
import org.apache.tez.dag.api.GroupInputEdge;
import org.apache.tez.dag.api.SessionNotRunning;
+import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.dag.api.TezException;
import org.apache.tez.dag.api.Vertex;
import org.apache.tez.dag.api.VertexGroup;
@@ -348,7 +351,7 @@ public class TezTask extends Task<TezWork> {
dag.setDAGInfo(dagInfo);
dag.setCredentials(conf.getCredentials());
- setAccessControlsForCurrentUser(dag);
+ setAccessControlsForCurrentUser(dag, queryPlan.getQueryId(), conf);
for (BaseWork w: ws) {
@@ -431,14 +434,31 @@ public class TezTask extends Task<TezWork> {
return dag;
}
- public static void setAccessControlsForCurrentUser(DAG dag) {
- // get current user
- String currentUser = SessionState.getUserFromAuthenticator();
- if(LOG.isDebugEnabled()) {
- LOG.debug("Setting Tez DAG access for " + currentUser);
+ private static void setAccessControlsForCurrentUser(DAG dag, String queryId,
+ Configuration conf) throws
+ IOException {
+ String user = SessionState.getUserFromAuthenticator();
+ UserGroupInformation loginUserUgi = UserGroupInformation.getLoginUser();
+ String loginUser =
+ loginUserUgi == null ? null : loginUserUgi.getShortUserName();
+ boolean addHs2User =
+ HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVETEZHS2USERACCESS);
+
+ // Temporarily re-using the TEZ AM View ACLs property for individual dag access control.
+ // Hive may want to setup it's own parameters if it wants to control per dag access.
+ // Setting the tez-property per dag should work for now.
+
+ String viewStr = Utilities.getAclStringWithHiveModification(conf,
+ TezConfiguration.TEZ_AM_VIEW_ACLS, addHs2User, user, loginUser);
+ String modifyStr = Utilities.getAclStringWithHiveModification(conf,
+ TezConfiguration.TEZ_AM_MODIFY_ACLS, addHs2User, user, loginUser);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Setting Tez DAG access for queryId={} with viewAclString={}, modifyStr={}",
+ queryId, viewStr, modifyStr);
}
// set permissions for current user on DAG
- DAGAccessControls ac = new DAGAccessControls(currentUser, currentUser);
+ DAGAccessControls ac = new DAGAccessControls(viewStr, modifyStr);
dag.setAccessControls(ac);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/657236ee/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java
index 72a1acc..13ccd93 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java
@@ -162,7 +162,7 @@ public class ATSHook implements ExecuteWithHookContext {
String queryId = plan.getQueryId();
String opId = hookContext.getOperationId();
long queryStartTime = plan.getQueryStartTime();
- String user = hookContext.getUgi().getUserName();
+ String user = hookContext.getUgi().getShortUserName();
String requestuser = hookContext.getUserName();
if (hookContext.getUserName() == null ){
requestuser = hookContext.getUgi().getUserName() ;
http://git-wip-us.apache.org/repos/asf/hive/blob/657236ee/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java
index 5c012f3..2b52056 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java
@@ -23,6 +23,7 @@ import static org.junit.Assert.assertTrue;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyBoolean;
import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
@@ -36,6 +37,7 @@ import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
+import java.util.UUID;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
@@ -44,6 +46,7 @@ import org.apache.hadoop.hive.common.metrics.common.Metrics;
import org.apache.hadoop.hive.common.metrics.common.MetricsConstant;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.Context;
+import org.apache.hadoop.hive.ql.QueryPlan;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.BaseWork;
@@ -161,6 +164,9 @@ public class TestTezTask {
task = new TezTask(utils);
task.setWork(work);
task.setConsole(mock(LogHelper.class));
+ QueryPlan mockQueryPlan = mock(QueryPlan.class);
+ doReturn(UUID.randomUUID().toString()).when(mockQueryPlan).getQueryId();
+ task.setQueryPlan(mockQueryPlan);
conf = new JobConf();
appLr = mock(LocalResource.class);
[29/50] [abbrv] hive git commit: HIVE-15959 : LLAP: fix headroom
calculation and move it to daemon (Sergey Shelukhin,
reviewed by Siddharth Seth)
Posted by se...@apache.org.
HIVE-15959 : LLAP: fix headroom calculation and move it to daemon (Sergey Shelukhin, reviewed by Siddharth Seth)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/de532b1f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/de532b1f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/de532b1f
Branch: refs/heads/hive-14535
Commit: de532b1f9bb21daa668dac0f2b4f2429c9b4bd37
Parents: af606ff
Author: Sergey Shelukhin <se...@apache.org>
Authored: Tue Feb 21 13:56:17 2017 -0800
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Tue Feb 21 13:56:17 2017 -0800
----------------------------------------------------------------------
.../org/apache/hadoop/hive/conf/HiveConf.java | 12 ++--
.../hadoop/hive/llap/cli/LlapServiceDriver.java | 64 ++++++++------------
.../hive/llap/daemon/impl/LlapDaemon.java | 49 +++++++++++----
llap-server/src/main/resources/package.py | 8 +--
.../hive/llap/daemon/MiniLlapCluster.java | 2 +-
5 files changed, 72 insertions(+), 63 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/de532b1f/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 1af59ba..4faaa8b 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -381,7 +381,7 @@ public class HiveConf extends Configuration {
llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname);
llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_RPC_PORT.varname);
llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB.varname);
- llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_HEADROOM_MEMORY_PER_INSTANCE_MB.varname);
+ llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_XMX_HEADROOM.varname);
llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_VCPUS_PER_INSTANCE.varname);
llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_NUM_FILE_CLEANER_THREADS.varname);
llapDaemonVarsSetLocal.add(ConfVars.LLAP_FILE_CLEANUP_DELAY_SECONDS.varname);
@@ -3072,11 +3072,11 @@ public class HiveConf extends Configuration {
LLAP_DAEMON_MEMORY_PER_INSTANCE_MB("hive.llap.daemon.memory.per.instance.mb", 4096,
"The total amount of memory to use for the executors inside LLAP (in megabytes).",
"llap.daemon.memory.per.instance.mb"),
- LLAP_DAEMON_HEADROOM_MEMORY_PER_INSTANCE_MB("hive.llap.daemon.headroom.memory.per.instance.mb", 512,
- "The total amount of memory deducted from daemon memory required for other LLAP services. The remaining memory" +
- " will be used by the executors. If the cache is off-heap, Executor memory + Headroom memory = Xmx. If the " +
- "cache is on-heap, Executor memory + Cache memory + Headroom memory = Xmx. The headroom memory has to be " +
- "minimum of 5% from the daemon memory."),
+ LLAP_DAEMON_XMX_HEADROOM("hive.llap.daemon.xmx.headroom", "5%",
+ "The total amount of heap memory set aside by LLAP and not used by the executors. Can\n" +
+ "be specified as size (e.g. '512Mb'), or percentage (e.g. '5%'). Note that the latter is\n" +
+ "derived from the total daemon XMX, which can be different from the total executor\n" +
+ "memory if the cache is on-heap; although that's not the default configuration."),
LLAP_DAEMON_VCPUS_PER_INSTANCE("hive.llap.daemon.vcpus.per.instance", 4,
"The total number of vcpus to use for the executors inside LLAP.",
"llap.daemon.vcpus.per.instance"),
http://git-wip-us.apache.org/repos/asf/hive/blob/de532b1f/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapServiceDriver.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapServiceDriver.java b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapServiceDriver.java
index a93d53a..e8517ab 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapServiceDriver.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapServiceDriver.java
@@ -18,7 +18,6 @@
package org.apache.hadoop.hive.llap.cli;
-import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
@@ -48,17 +47,10 @@ import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hive.llap.LlapUtil;
import org.apache.hadoop.hive.llap.configuration.LlapDaemonConfiguration;
import org.apache.hadoop.hive.llap.daemon.impl.LlapConstants;
-import org.apache.hadoop.hive.llap.daemon.impl.LlapDaemon;
import org.apache.hadoop.hive.llap.daemon.impl.StaticPermanentFunctionChecker;
import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos;
import org.apache.hadoop.hive.llap.tezplugins.LlapTezUtils;
import org.apache.hadoop.registry.client.binding.RegistryUtils;
-import org.apache.slider.client.SliderClient;
-import org.apache.slider.common.params.ActionCreateArgs;
-import org.apache.slider.common.params.ActionDestroyArgs;
-import org.apache.slider.common.params.ActionFreezeArgs;
-import org.apache.slider.common.params.ActionInstallPackageArgs;
-import org.apache.slider.core.exceptions.UnknownApplicationInstanceException;
import org.apache.tez.dag.api.TezConfiguration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -84,7 +76,6 @@ import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
-import org.apache.hadoop.yarn.exceptions.YarnException;
import org.eclipse.jetty.server.ssl.SslSocketConnector;
import org.joda.time.DateTime;
import org.json.JSONException;
@@ -244,6 +235,7 @@ public class LlapServiceDriver {
HiveConf.setVar(conf, ConfVars.LLAP_DAEMON_LOGGER, options.getLogger());
propsDirectOptions.setProperty(ConfVars.LLAP_DAEMON_LOGGER.varname, options.getLogger());
}
+ boolean isDirect = HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOCATOR_DIRECT);
if (options.getSize() != -1) {
if (options.getCache() != -1) {
@@ -263,8 +255,7 @@ public class LlapServiceDriver {
+ " smaller than the container sizing (" + LlapUtil.humanReadableByteCount(options.getSize())
+ ")");
}
- if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOCATOR_DIRECT)
- && false == HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOCATOR_MAPPED)) {
+ if (isDirect && !HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOCATOR_MAPPED)) {
// direct and not memory mapped
Preconditions.checkArgument(options.getXmx() + options.getCache() <= options.getSize(),
"Working memory (Xmx=" + LlapUtil.humanReadableByteCount(options.getXmx()) + ") + cache size ("
@@ -273,19 +264,6 @@ public class LlapServiceDriver {
}
}
- // This parameter is read in package.py - and nowhere else. Does not need to be part of
- // HiveConf - that's just confusing.
- final long minAlloc = conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, -1);
- long containerSize = -1;
- if (options.getSize() != -1) {
- containerSize = options.getSize() / (1024 * 1024);
- Preconditions.checkArgument(containerSize >= minAlloc, "Container size ("
- + LlapUtil.humanReadableByteCount(options.getSize()) + ") should be greater"
- + " than minimum allocation(" + LlapUtil.humanReadableByteCount(minAlloc * 1024L * 1024L) + ")");
- conf.setLong(ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB.varname, containerSize);
- propsDirectOptions.setProperty(ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB.varname,
- String.valueOf(containerSize));
- }
if (options.getExecutors() != -1) {
conf.setLong(ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname, options.getExecutors());
@@ -319,17 +297,30 @@ public class LlapServiceDriver {
String.valueOf(xmxMb));
}
- final long currentHeadRoom = options.getSize() - options.getXmx() - options.getCache();
- final long minHeadRoom = (long) (options.getXmx() * LlapDaemon.MIN_HEADROOM_PERCENT);
- final long headRoom = currentHeadRoom < minHeadRoom ? minHeadRoom : currentHeadRoom;
- final long headRoomMb = headRoom / (1024L * 1024L);
- conf.setLong(ConfVars.LLAP_DAEMON_HEADROOM_MEMORY_PER_INSTANCE_MB.varname, headRoomMb);
- propsDirectOptions.setProperty(ConfVars.LLAP_DAEMON_HEADROOM_MEMORY_PER_INSTANCE_MB.varname,
- String.valueOf(headRoomMb));
-
- LOG.info("Memory settings: container memory: {} executor memory: {} cache memory: {} headroom memory: {}",
- LlapUtil.humanReadableByteCount(options.getSize()), LlapUtil.humanReadableByteCount(options.getXmx()),
- LlapUtil.humanReadableByteCount(options.getCache()), LlapUtil.humanReadableByteCount(headRoom));
+ long size = options.getSize();
+ if (size == -1) {
+ long heapSize = xmx;
+ if (!isDirect) {
+ heapSize += cache;
+ }
+ size = Math.min((long)(heapSize * 1.2), heapSize + 1024L*1024*1024);
+ if (isDirect) {
+ size += cache;
+ }
+ }
+ long containerSize = size / (1024 * 1024);
+ final long minAlloc = conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, -1);
+ Preconditions.checkArgument(containerSize >= minAlloc, "Container size ("
+ + LlapUtil.humanReadableByteCount(options.getSize()) + ") should be greater"
+ + " than minimum allocation(" + LlapUtil.humanReadableByteCount(minAlloc * 1024L * 1024L) + ")");
+ conf.setLong(ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB.varname, containerSize);
+ propsDirectOptions.setProperty(ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB.varname,
+ String.valueOf(containerSize));
+
+ LOG.info("Memory settings: container memory: {} executor memory: {} cache memory: {}",
+ LlapUtil.humanReadableByteCount(options.getSize()),
+ LlapUtil.humanReadableByteCount(options.getXmx()),
+ LlapUtil.humanReadableByteCount(options.getCache()));
if (options.getLlapQueueName() != null && !options.getLlapQueueName().isEmpty()) {
conf.set(ConfVars.LLAP_DAEMON_QUEUE_NAME.varname, options.getLlapQueueName());
@@ -642,9 +633,6 @@ public class LlapServiceDriver {
configs.put(ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB.varname,
HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB));
- configs.put(ConfVars.LLAP_DAEMON_HEADROOM_MEMORY_PER_INSTANCE_MB.varname,
- HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_HEADROOM_MEMORY_PER_INSTANCE_MB));
-
configs.put(ConfVars.LLAP_DAEMON_VCPUS_PER_INSTANCE.varname,
HiveConf.getIntVar(conf, ConfVars.LLAP_DAEMON_VCPUS_PER_INSTANCE));
http://git-wip-us.apache.org/repos/asf/hive/blob/de532b1f/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java
index e737fdd..fc9f530 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java
@@ -83,7 +83,6 @@ import com.google.common.primitives.Ints;
public class LlapDaemon extends CompositeService implements ContainerRunner, LlapDaemonMXBean {
private static final Logger LOG = LoggerFactory.getLogger(LlapDaemon.class);
- public static final double MIN_HEADROOM_PERCENT = 0.05;
private final Configuration shuffleHandlerConf;
private final SecretManager secretManager;
@@ -114,7 +113,7 @@ public class LlapDaemon extends CompositeService implements ContainerRunner, Lla
public LlapDaemon(Configuration daemonConf, int numExecutors, long executorMemoryBytes,
boolean ioEnabled, boolean isDirectCache, long ioMemoryBytes, String[] localDirs, int srvPort,
- int mngPort, int shufflePort, int webPort, String appName, final long headRoomBytes) {
+ int mngPort, int shufflePort, int webPort, String appName) {
super("LlapDaemon");
printAsciiArt();
@@ -158,11 +157,9 @@ public class LlapDaemon extends CompositeService implements ContainerRunner, Lla
this.maxJvmMemory = getTotalHeapSize();
this.llapIoEnabled = ioEnabled;
- Preconditions.checkArgument(headRoomBytes < executorMemoryBytes, "LLAP daemon headroom size should be less " +
- "than daemon max memory size. headRoomBytes: " + headRoomBytes + " executorMemoryBytes: " + executorMemoryBytes);
- final long minHeadRoomBytes = (long) (executorMemoryBytes * MIN_HEADROOM_PERCENT);
- final long headroom = headRoomBytes < minHeadRoomBytes ? minHeadRoomBytes : headRoomBytes;
- this.executorMemoryPerInstance = executorMemoryBytes - headroom;
+
+ long xmxHeadRoomBytes = determineXmxHeadroom(daemonConf, executorMemoryBytes, maxJvmMemory);
+ this.executorMemoryPerInstance = executorMemoryBytes - xmxHeadRoomBytes;
this.ioMemoryPerInstance = ioMemoryBytes;
this.numExecutors = numExecutors;
this.localDirs = localDirs;
@@ -173,11 +170,14 @@ public class LlapDaemon extends CompositeService implements ContainerRunner, Lla
boolean enablePreemption = HiveConf.getBoolVar(
daemonConf, ConfVars.LLAP_DAEMON_TASK_SCHEDULER_ENABLE_PREEMPTION);
LOG.warn("Attempting to start LlapDaemonConf with the following configuration: " +
- "maxJvmMemory=" + maxJvmMemory + " (" + LlapUtil.humanReadableByteCount(maxJvmMemory) + ")" +
+ "maxJvmMemory=" + maxJvmMemory + " ("
+ + LlapUtil.humanReadableByteCount(maxJvmMemory) + ")" +
", requestedExecutorMemory=" + executorMemoryBytes +
" (" + LlapUtil.humanReadableByteCount(executorMemoryBytes) + ")" +
- ", llapIoCacheSize=" + ioMemoryBytes + " (" + LlapUtil.humanReadableByteCount(ioMemoryBytes) + ")" +
- ", headRoomMemory=" + headroom + " (" + LlapUtil.humanReadableByteCount(headroom) + ")" +
+ ", llapIoCacheSize=" + ioMemoryBytes + " ("
+ + LlapUtil.humanReadableByteCount(ioMemoryBytes) + ")" +
+ ", xmxHeadRoomMemory=" + xmxHeadRoomBytes + " ("
+ + LlapUtil.humanReadableByteCount(xmxHeadRoomBytes) + ")" +
", adjustedExecutorMemory=" + executorMemoryPerInstance +
" (" + LlapUtil.humanReadableByteCount(executorMemoryPerInstance) + ")" +
", numExecutors=" + numExecutors +
@@ -293,6 +293,30 @@ public class LlapDaemon extends CompositeService implements ContainerRunner, Lla
addIfService(amReporter);
}
+ private static long determineXmxHeadroom(
+ Configuration daemonConf, long executorMemoryBytes, long maxJvmMemory) {
+ String headroomStr = HiveConf.getVar(daemonConf, ConfVars.LLAP_DAEMON_XMX_HEADROOM).trim();
+ long xmxHeadRoomBytes = Long.MAX_VALUE;
+ try {
+ if (headroomStr.endsWith("%")) {
+ long percentage = Integer.parseInt(headroomStr.substring(0, headroomStr.length() - 1));
+ Preconditions.checkState(percentage >= 0 && percentage < 100,
+ "Headroom percentage should be in [0, 100) range; found " + headroomStr);
+ xmxHeadRoomBytes = maxJvmMemory * percentage / 100L;
+ } else {
+ xmxHeadRoomBytes = HiveConf.toSizeBytes(headroomStr);
+ }
+ } catch (NumberFormatException ex) {
+ throw new RuntimeException("Invalid headroom configuration " + headroomStr);
+ }
+
+ Preconditions.checkArgument(xmxHeadRoomBytes < executorMemoryBytes,
+ "LLAP daemon headroom size should be less than daemon max memory size. headRoomBytes: "
+ + xmxHeadRoomBytes + " executorMemoryBytes: " + executorMemoryBytes + " (derived from "
+ + headroomStr + " out of xmx of " + maxJvmMemory + ")");
+ return xmxHeadRoomBytes;
+ }
+
private static void initializeLogging(final Configuration conf) {
long start = System.currentTimeMillis();
URL llap_l4j2 = LlapDaemon.class.getClassLoader().getResource(
@@ -467,15 +491,14 @@ public class LlapDaemon extends CompositeService implements ContainerRunner, Lla
int webPort = HiveConf.getIntVar(daemonConf, ConfVars.LLAP_DAEMON_WEB_PORT);
long executorMemoryBytes = HiveConf.getIntVar(
daemonConf, ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB) * 1024l * 1024l;
- long headroomBytes = HiveConf.getIntVar(
- daemonConf, ConfVars.LLAP_DAEMON_HEADROOM_MEMORY_PER_INSTANCE_MB) * 1024l * 1024l;
long ioMemoryBytes = HiveConf.getSizeVar(daemonConf, ConfVars.LLAP_IO_MEMORY_MAX_SIZE);
boolean isDirectCache = HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_ALLOCATOR_DIRECT);
boolean isLlapIo = HiveConf.getBoolVar(daemonConf, HiveConf.ConfVars.LLAP_IO_ENABLED, true);
+
LlapDaemon.initializeLogging(daemonConf);
llapDaemon = new LlapDaemon(daemonConf, numExecutors, executorMemoryBytes, isLlapIo,
isDirectCache, ioMemoryBytes, localDirs, rpcPort, mngPort, shufflePort, webPort,
- appName, headroomBytes);
+ appName);
LOG.info("Adding shutdown hook for LlapDaemon");
ShutdownHookManager.addShutdownHook(new CompositeServiceShutdownHook(llapDaemon), 1);
http://git-wip-us.apache.org/repos/asf/hive/blob/de532b1f/llap-server/src/main/resources/package.py
----------------------------------------------------------------------
diff --git a/llap-server/src/main/resources/package.py b/llap-server/src/main/resources/package.py
index 66648b6..8a378ef 100644
--- a/llap-server/src/main/resources/package.py
+++ b/llap-server/src/main/resources/package.py
@@ -20,17 +20,15 @@ class LlapResource(object):
# convert to Mb
self.cache = config["hive.llap.io.memory.size"] / (1024*1024.0)
self.direct = config["hive.llap.io.allocator.direct"]
- self.min_mb = -1
self.min_cores = -1
# compute heap + cache as final Xmx
h = self.memory
if (not self.direct):
h += self.cache
if size == -1:
- c = min(h*1.2, h + 1024) # + 1024 or 20%
- c += (self.direct and self.cache) or 0
- if self.min_mb > 0:
- c = c + c%self.min_mb
+ print "Cannot determine the container size"
+ sys.exit(1)
+ return
else:
# do not mess with user input
c = size
http://git-wip-us.apache.org/repos/asf/hive/blob/de532b1f/llap-server/src/test/org/apache/hadoop/hive/llap/daemon/MiniLlapCluster.java
----------------------------------------------------------------------
diff --git a/llap-server/src/test/org/apache/hadoop/hive/llap/daemon/MiniLlapCluster.java b/llap-server/src/test/org/apache/hadoop/hive/llap/daemon/MiniLlapCluster.java
index a9b23b6..06f6dac 100644
--- a/llap-server/src/test/org/apache/hadoop/hive/llap/daemon/MiniLlapCluster.java
+++ b/llap-server/src/test/org/apache/hadoop/hive/llap/daemon/MiniLlapCluster.java
@@ -164,7 +164,7 @@ public class MiniLlapCluster extends AbstractService {
LOG.info("Initializing {} llap instances for MiniLlapCluster with name={}", numInstances, clusterNameTrimmed);
for (int i = 0 ;i < numInstances ; i++) {
llapDaemons[i] = new LlapDaemon(conf, numExecutorsPerService, execBytesPerService, llapIoEnabled,
- ioIsDirect, ioBytesPerService, localDirs, rpcPort, mngPort, shufflePort, webPort, clusterNameTrimmed, 0);
+ ioIsDirect, ioBytesPerService, localDirs, rpcPort, mngPort, shufflePort, webPort, clusterNameTrimmed);
llapDaemons[i].init(new Configuration(conf));
}
LOG.info("Initialized {} llap instances for MiniLlapCluster with name={}", numInstances, clusterNameTrimmed);
[20/50] [abbrv] hive git commit: HIVE-15973 : Make
interval_arithmetic.q test robust
Posted by se...@apache.org.
HIVE-15973 : Make interval_arithmetic.q test robust
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1e00fb35
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1e00fb35
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1e00fb35
Branch: refs/heads/hive-14535
Commit: 1e00fb357c305b29cada4819793eb9ad5e63cb55
Parents: 54977d2
Author: Ashutosh Chauhan <ha...@apache.org>
Authored: Sun Feb 19 17:47:58 2017 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Sun Feb 19 17:47:58 2017 -0800
----------------------------------------------------------------------
.../test/queries/clientpositive/interval_arithmetic.q | 4 ++--
.../results/clientpositive/interval_arithmetic.q.out | 12 ++++++------
2 files changed, 8 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/1e00fb35/ql/src/test/queries/clientpositive/interval_arithmetic.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/interval_arithmetic.q b/ql/src/test/queries/clientpositive/interval_arithmetic.q
index 445cdfe..ae14579 100644
--- a/ql/src/test/queries/clientpositive/interval_arithmetic.q
+++ b/ql/src/test/queries/clientpositive/interval_arithmetic.q
@@ -160,6 +160,6 @@ from interval_arithmetic_1
limit 2;
explain
-select current_date + interval '1 2:02:00' day to second + interval '2' day + interval '1' hour + interval '1' minute + interval '60' second from interval_arithmetic_1 limit 1;
-select current_date + interval '1 2:02:00' day to second + interval '2' day + interval '1' hour + interval '1' minute + interval '60' second from interval_arithmetic_1 limit 1;
+select date '2016-11-08' + interval '1 2:02:00' day to second + interval '2' day + interval '1' hour + interval '1' minute + interval '60' second from interval_arithmetic_1 limit 1;
+select date '2016-11-08' + interval '1 2:02:00' day to second + interval '2' day + interval '1' hour + interval '1' minute + interval '60' second from interval_arithmetic_1 limit 1;
drop table interval_arithmetic_1;
http://git-wip-us.apache.org/repos/asf/hive/blob/1e00fb35/ql/src/test/results/clientpositive/interval_arithmetic.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/interval_arithmetic.q.out b/ql/src/test/results/clientpositive/interval_arithmetic.q.out
index 039d3e1..eba97fa 100644
--- a/ql/src/test/results/clientpositive/interval_arithmetic.q.out
+++ b/ql/src/test/results/clientpositive/interval_arithmetic.q.out
@@ -607,10 +607,10 @@ POSTHOOK: Input: default@interval_arithmetic_1
109 20:30:40.246913578 89 02:14:26.000000000
109 20:30:40.246913578 89 02:14:26.000000000
PREHOOK: query: explain
-select current_date + interval '1 2:02:00' day to second + interval '2' day + interval '1' hour + interval '1' minute + interval '60' second from interval_arithmetic_1 limit 1
+select date '2016-11-08' + interval '1 2:02:00' day to second + interval '2' day + interval '1' hour + interval '1' minute + interval '60' second from interval_arithmetic_1 limit 1
PREHOOK: type: QUERY
POSTHOOK: query: explain
-select current_date + interval '1 2:02:00' day to second + interval '2' day + interval '1' hour + interval '1' minute + interval '60' second from interval_arithmetic_1 limit 1
+select date '2016-11-08' + interval '1 2:02:00' day to second + interval '2' day + interval '1' hour + interval '1' minute + interval '60' second from interval_arithmetic_1 limit 1
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-0 is a root stage
@@ -624,7 +624,7 @@ STAGE PLANS:
alias: interval_arithmetic_1
Statistics: Num rows: 12288 Data size: 326837 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: 2017-02-20 03:04:00.0 (type: timestamp)
+ expressions: 2016-11-11 03:04:00.0 (type: timestamp)
outputColumnNames: _col0
Statistics: Num rows: 12288 Data size: 491520 Basic stats: COMPLETE Column stats: COMPLETE
Limit
@@ -632,15 +632,15 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
ListSink
-PREHOOK: query: select current_date + interval '1 2:02:00' day to second + interval '2' day + interval '1' hour + interval '1' minute + interval '60' second from interval_arithmetic_1 limit 1
+PREHOOK: query: select date '2016-11-08' + interval '1 2:02:00' day to second + interval '2' day + interval '1' hour + interval '1' minute + interval '60' second from interval_arithmetic_1 limit 1
PREHOOK: type: QUERY
PREHOOK: Input: default@interval_arithmetic_1
#### A masked pattern was here ####
-POSTHOOK: query: select current_date + interval '1 2:02:00' day to second + interval '2' day + interval '1' hour + interval '1' minute + interval '60' second from interval_arithmetic_1 limit 1
+POSTHOOK: query: select date '2016-11-08' + interval '1 2:02:00' day to second + interval '2' day + interval '1' hour + interval '1' minute + interval '60' second from interval_arithmetic_1 limit 1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@interval_arithmetic_1
#### A masked pattern was here ####
-2017-02-20 03:04:00
+2016-11-11 03:04:00
PREHOOK: query: drop table interval_arithmetic_1
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@interval_arithmetic_1
[37/50] [abbrv] hive git commit: HIVE-15991 : Flaky Test:
TestEncryptedHDFSCliDriver encryption_join_with_different_encryption_keys
(Sahil Takiar via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan
Posted by se...@apache.org.
HIVE-15991 : Flaky Test: TestEncryptedHDFSCliDriver encryption_join_with_different_encryption_keys (Sahil Takiar via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ede8a558
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ede8a558
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ede8a558
Branch: refs/heads/hive-14535
Commit: ede8a5585d90f0306cc67e65e2f472c1fe351d12
Parents: 806d6e1
Author: Sahil Takiar <ta...@gmail.com>
Authored: Wed Feb 22 11:35:20 2017 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Wed Feb 22 11:35:20 2017 -0800
----------------------------------------------------------------------
...cryption_join_with_different_encryption_keys.q.out | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/ede8a558/ql/src/test/results/clientpositive/encrypted/encryption_join_with_different_encryption_keys.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/encrypted/encryption_join_with_different_encryption_keys.q.out b/ql/src/test/results/clientpositive/encrypted/encryption_join_with_different_encryption_keys.q.out
index 7a91538..54f6b27 100644
--- a/ql/src/test/results/clientpositive/encrypted/encryption_join_with_different_encryption_keys.q.out
+++ b/ql/src/test/results/clientpositive/encrypted/encryption_join_with_different_encryption_keys.q.out
@@ -1,12 +1,6 @@
-PREHOOK: query: --SORT_QUERY_RESULTS
-
--- Java JCE must be installed in order to hava a key length of 256 bits
-DROP TABLE IF EXISTS table_key_1 PURGE
+PREHOOK: query: DROP TABLE IF EXISTS table_key_1 PURGE
PREHOOK: type: DROPTABLE
-POSTHOOK: query: --SORT_QUERY_RESULTS
-
--- Java JCE must be installed in order to hava a key length of 256 bits
-DROP TABLE IF EXISTS table_key_1 PURGE
+POSTHOOK: query: DROP TABLE IF EXISTS table_key_1 PURGE
POSTHOOK: type: DROPTABLE
#### A masked pattern was here ####
PREHOOK: type: CREATETABLE
@@ -121,6 +115,7 @@ STAGE PLANS:
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
bucket_count -1
+ column.name.delimiter ,
columns key,value
columns.comments
columns.types int:string
@@ -141,6 +136,7 @@ STAGE PLANS:
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
bucket_count -1
+ column.name.delimiter ,
columns key,value
columns.comments
columns.types int:string
@@ -165,6 +161,7 @@ STAGE PLANS:
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
bucket_count -1
+ column.name.delimiter ,
columns key,value
columns.comments
columns.types int:string
@@ -185,6 +182,7 @@ STAGE PLANS:
properties:
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
bucket_count -1
+ column.name.delimiter ,
columns key,value
columns.comments
columns.types int:string
[12/50] [abbrv] hive git commit: HIVE-15489: Alternatively use table
scan stats for HoS (Chao Sun, reviewed by Xuefu Zhang)
Posted by se...@apache.org.
HIVE-15489: Alternatively use table scan stats for HoS (Chao Sun, reviewed by Xuefu Zhang)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/368d916b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/368d916b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/368d916b
Branch: refs/heads/hive-14535
Commit: 368d916b369f1adc58da884463b1dedb8c010616
Parents: bba1818
Author: Chao Sun <su...@apache.org>
Authored: Thu Jan 19 16:42:49 2017 -0800
Committer: Chao Sun <su...@apache.org>
Committed: Fri Feb 17 12:06:48 2017 -0800
----------------------------------------------------------------------
.../org/apache/hadoop/hive/conf/HiveConf.java | 3 +
.../test/resources/testconfiguration.properties | 3 +-
.../hadoop/hive/ql/exec/OperatorUtils.java | 34 +++
.../SparkRemoveDynamicPruningBySize.java | 4 +-
.../optimizer/spark/SparkMapJoinOptimizer.java | 34 ++-
.../hive/ql/parse/spark/GenSparkUtils.java | 36 +--
.../hive/ql/parse/spark/SparkCompiler.java | 3 +-
.../spark_use_file_size_for_mapjoin.q | 30 +++
.../spark/spark_use_file_size_for_mapjoin.q.out | 257 +++++++++++++++++++
9 files changed, 364 insertions(+), 40 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/368d916b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 0bff243..1af59ba 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -3193,6 +3193,9 @@ public class HiveConf extends Configuration {
Constants.LLAP_LOGGER_NAME_CONSOLE),
"logger used for llap-daemons."),
+ SPARK_USE_FILE_SIZE_FOR_MAPJOIN("hive.spark.use.file.size.for.mapjoin", false,
+ "If this is set to true, mapjoin optimization in Hive/Spark will use source file sizes associated"
+ + "with TableScan operator on the root of operator tree, instead of using operator statistics."),
SPARK_CLIENT_FUTURE_TIMEOUT("hive.spark.client.future.timeout",
"60s", new TimeValidator(TimeUnit.SECONDS),
"Timeout for requests from Hive client to remote Spark driver."),
http://git-wip-us.apache.org/repos/asf/hive/blob/368d916b/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index e8db920..7c54275 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -1481,7 +1481,8 @@ spark.query.files=add_part_multiple.q, \
spark.only.query.files=spark_combine_equivalent_work.q,\
spark_dynamic_partition_pruning.q,\
spark_dynamic_partition_pruning_2.q,\
- spark_vectorized_dynamic_partition_pruning.q
+ spark_vectorized_dynamic_partition_pruning.q,\
+ spark_use_file_size_for_mapjoin.q
miniSparkOnYarn.query.files=auto_sortmerge_join_16.q,\
bucket4.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/368d916b/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java
index d294e25..5bbfe12 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java
@@ -346,4 +346,38 @@ public class OperatorUtils {
}
}
}
+
+ /**
+ * Given the input operator 'op', walk up the operator tree from 'op', and collect all the
+ * roots that can be reached from it. The results are stored in 'roots'.
+ */
+ public static void findRoots(Operator<?> op, Collection<Operator<?>> roots) {
+ List<Operator<?>> parents = op.getParentOperators();
+ if (parents == null || parents.isEmpty()) {
+ roots.add(op);
+ return;
+ }
+ for (Operator<?> p : parents) {
+ findRoots(p, roots);
+ }
+ }
+
+ /**
+ * Remove the branch that contains the specified operator. Do nothing if there's no branching,
+ * i.e. all the upstream operators have only one child.
+ */
+ public static void removeBranch(Operator<?> op) {
+ Operator<?> child = op;
+ Operator<?> curr = op;
+
+ while (curr.getChildOperators().size() <= 1) {
+ child = curr;
+ if (curr.getParentOperators() == null || curr.getParentOperators().isEmpty()) {
+ return;
+ }
+ curr = curr.getParentOperators().get(0);
+ }
+
+ curr.removeChild(child);
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/368d916b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SparkRemoveDynamicPruningBySize.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SparkRemoveDynamicPruningBySize.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SparkRemoveDynamicPruningBySize.java
index c41a0c8..26a1088 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SparkRemoveDynamicPruningBySize.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SparkRemoveDynamicPruningBySize.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.optimizer;
import java.util.Stack;
+import org.apache.hadoop.hive.ql.exec.OperatorUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
@@ -28,7 +29,6 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.optimizer.spark.SparkPartitionPruningSinkDesc;
import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.ql.parse.spark.GenSparkUtils;
import org.apache.hadoop.hive.ql.parse.spark.OptimizeSparkProcContext;
import org.apache.hadoop.hive.ql.parse.spark.SparkPartitionPruningSinkOperator;
@@ -54,7 +54,7 @@ public class SparkRemoveDynamicPruningBySize implements NodeProcessor {
if (desc.getStatistics().getDataSize() > context.getConf()
.getLongVar(ConfVars.SPARK_DYNAMIC_PARTITION_PRUNING_MAX_DATA_SIZE)) {
- GenSparkUtils.removeBranch(op);
+ OperatorUtils.removeBranch(op);
// at this point we've found the fork in the op pipeline that has the pruning as a child plan.
LOG.info("Disabling dynamic pruning for: "
+ desc.getTableScan().getName()
http://git-wip-us.apache.org/repos/asf/hive/blob/368d916b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
index 7faff88..d8f37ae 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
@@ -24,6 +24,8 @@ import java.util.Map;
import java.util.Set;
import java.util.Stack;
+import org.apache.hadoop.hive.ql.exec.OperatorUtils;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.conf.HiveConf;
@@ -191,12 +193,40 @@ public class SparkMapJoinOptimizer implements NodeProcessor {
int pos = 0;
// bigTableFound means we've encountered a table that's bigger than the
- // max. This table is either the the big table or we cannot convert.
+ // max. This table is either the big table or we cannot convert.
boolean bigTableFound = false;
+ boolean useTsStats = context.getConf().getBoolean(HiveConf.ConfVars.SPARK_USE_FILE_SIZE_FOR_MAPJOIN.varname, false);
+ boolean hasUpstreamSinks = false;
+ // Check whether there's any upstream RS.
+ // If so, don't use TS stats because they could be inaccurate.
for (Operator<? extends OperatorDesc> parentOp : joinOp.getParentOperators()) {
+ Set<ReduceSinkOperator> parentSinks =
+ OperatorUtils.findOperatorsUpstream(parentOp, ReduceSinkOperator.class);
+ parentSinks.remove(parentOp);
+ if (!parentSinks.isEmpty()) {
+ hasUpstreamSinks = true;
+ }
+ }
+
+ // If we are using TS stats and this JOIN has at least one upstream RS, disable MapJoin conversion.
+ if (useTsStats && hasUpstreamSinks) {
+ return new long[]{-1, 0, 0};
+ }
+
+ for (Operator<? extends OperatorDesc> parentOp : joinOp.getParentOperators()) {
+ Statistics currInputStat;
+ if (useTsStats) {
+ currInputStat = new Statistics();
+ // Find all root TSs and add up all data sizes
+ // Not adding other stats (e.g., # of rows, col stats) since only data size is used here
+ for (TableScanOperator root : OperatorUtils.findOperatorsUpstream(parentOp, TableScanOperator.class)) {
+ currInputStat.addToDataSize(root.getStatistics().getDataSize());
+ }
+ } else {
+ currInputStat = parentOp.getStatistics();
+ }
- Statistics currInputStat = parentOp.getStatistics();
if (currInputStat == null) {
LOG.warn("Couldn't get statistics from: " + parentOp);
return new long[]{-1, 0, 0};
http://git-wip-us.apache.org/repos/asf/hive/blob/368d916b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java
index 7b2b3c0..36bde30 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java
@@ -38,6 +38,7 @@ import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.OperatorUtils;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.SerializationUtilities;
@@ -573,7 +574,7 @@ public class GenSparkUtils {
*/
public BaseWork getEnclosingWork(Operator<?> op, GenSparkProcContext procCtx) {
List<Operator<?>> ops = new ArrayList<Operator<?>>();
- findRoots(op, ops);
+ OperatorUtils.findRoots(op, ops);
for (Operator<?> r : ops) {
BaseWork work = procCtx.rootToWorkMap.get(r);
if (work != null) {
@@ -582,37 +583,4 @@ public class GenSparkUtils {
}
return null;
}
-
- /*
- * findRoots returns all root operators (in ops) that result in operator op
- */
- private void findRoots(Operator<?> op, List<Operator<?>> ops) {
- List<Operator<?>> parents = op.getParentOperators();
- if (parents == null || parents.isEmpty()) {
- ops.add(op);
- return;
- }
- for (Operator<?> p : parents) {
- findRoots(p, ops);
- }
- }
-
- /**
- * Remove the branch that contains the specified operator. Do nothing if there's no branching,
- * i.e. all the upstream operators have only one child.
- */
- public static void removeBranch(Operator<?> op) {
- Operator<?> child = op;
- Operator<?> curr = op;
-
- while (curr.getChildOperators().size() <= 1) {
- child = curr;
- if (curr.getParentOperators() == null || curr.getParentOperators().isEmpty()) {
- return;
- }
- curr = curr.getParentOperators().get(0);
- }
-
- curr.removeChild(child);
- }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/368d916b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
index 71528e8..c4b1640 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
@@ -37,6 +37,7 @@ import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.OperatorUtils;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
@@ -169,7 +170,7 @@ public class SparkCompiler extends TaskCompiler {
return;
}
- GenSparkUtils.removeBranch(toRemove);
+ OperatorUtils.removeBranch(toRemove);
// at this point we've found the fork in the op pipeline that has the pruning as a child plan.
LOG.info("Disabling dynamic pruning for: "
+ toRemove.getConf().getTableScan().toString() + ". Needed to break cyclic dependency");
http://git-wip-us.apache.org/repos/asf/hive/blob/368d916b/ql/src/test/queries/clientpositive/spark_use_file_size_for_mapjoin.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/spark_use_file_size_for_mapjoin.q b/ql/src/test/queries/clientpositive/spark_use_file_size_for_mapjoin.q
new file mode 100644
index 0000000..b623b83
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/spark_use_file_size_for_mapjoin.q
@@ -0,0 +1,30 @@
+set hive.mapred.mode=nonstrict;
+set hive.auto.convert.join=true;
+set hive.spark.use.file.size.for.mapjoin=true;
+set hive.auto.convert.join.noconditionaltask.size=4000;
+
+EXPLAIN
+SELECT src1.key, src2.value
+FROM src src1 JOIN src src2 ON (src1.key = src2.key)
+WHERE src1.key = 97;
+
+SELECT src1.key, src2.value
+FROM src src1 JOIN src src2 ON (src1.key = src2.key)
+WHERE src1.key = 97;
+
+set hive.auto.convert.join.noconditionaltask.size=8000;
+
+-- This is copied from auto_join2. Without the configuration both joins are mapjoins,
+-- but with the configuration on, Hive should not turn the second join into mapjoin since it
+-- has a upstream reduce sink.
+
+CREATE TABLE dest(key INT, value STRING) STORED AS TEXTFILE;
+
+EXPLAIN
+FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key)
+INSERT OVERWRITE TABLE dest SELECT src1.key, src3.value;
+
+FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key)
+INSERT OVERWRITE TABLE dest SELECT src1.key, src3.value;
+
+SELECT sum(hash(dest.key,dest.value)) FROM dest;
http://git-wip-us.apache.org/repos/asf/hive/blob/368d916b/ql/src/test/results/clientpositive/spark/spark_use_file_size_for_mapjoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/spark_use_file_size_for_mapjoin.q.out b/ql/src/test/results/clientpositive/spark/spark_use_file_size_for_mapjoin.q.out
new file mode 100644
index 0000000..9044140
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/spark_use_file_size_for_mapjoin.q.out
@@ -0,0 +1,257 @@
+PREHOOK: query: EXPLAIN
+SELECT src1.key, src2.value
+FROM src src1 JOIN src src2 ON (src1.key = src2.key)
+WHERE src1.key = 97
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT src1.key, src2.value
+FROM src src1 JOIN src src2 ON (src1.key = src2.key)
+WHERE src1.key = 97
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (UDFToDouble(key) = 97.0) (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: src2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (UDFToDouble(key) = 97.0) (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT src1.key, src2.value
+FROM src src1 JOIN src src2 ON (src1.key = src2.key)
+WHERE src1.key = 97
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src1.key, src2.value
+FROM src src1 JOIN src src2 ON (src1.key = src2.key)
+WHERE src1.key = 97
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+97 val_97
+97 val_97
+97 val_97
+97 val_97
+PREHOOK: query: CREATE TABLE dest(key INT, value STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest
+POSTHOOK: query: CREATE TABLE dest(key INT, value STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest
+PREHOOK: query: EXPLAIN
+FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key)
+INSERT OVERWRITE TABLE dest SELECT src1.key, src3.value
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key)
+INSERT OVERWRITE TABLE dest SELECT src1.key, src3.value
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-3
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: src2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double)
+ sort order: +
+ Map-reduce partition columns: (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double)
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Local Work:
+ Map Reduce Local Work
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: src3
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: UDFToDouble(_col0) (type: double)
+ sort order: +
+ Map-reduce partition columns: UDFToDouble(_col0) (type: double)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double)
+ 1 UDFToDouble(_col0) (type: double)
+ outputColumnNames: _col0, _col3
+ Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(_col0) (type: int), _col3 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key)
+INSERT OVERWRITE TABLE dest SELECT src1.key, src3.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest
+POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key)
+INSERT OVERWRITE TABLE dest SELECT src1.key, src3.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest
+POSTHOOK: Lineage: dest.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest.value SIMPLE [(src)src3.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: SELECT sum(hash(dest.key,dest.value)) FROM dest
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(dest.key,dest.value)) FROM dest
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest
+#### A masked pattern was here ####
+33815990627
[07/50] [abbrv] hive git commit: HIVE-15936:
ConcurrentModificationException in ATSHook (Daniel Dai,
reviewed by Jason Dere)
Posted by se...@apache.org.
HIVE-15936: ConcurrentModificationException in ATSHook (Daniel Dai, reviewed by Jason Dere)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3485d02c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3485d02c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3485d02c
Branch: refs/heads/hive-14535
Commit: 3485d02cbfca5603e86e7b370da86b607003f337
Parents: 90688bd
Author: Daniel Dai <da...@hortonworks.com>
Authored: Fri Feb 17 00:22:54 2017 -0800
Committer: Daniel Dai <da...@hortonworks.com>
Committed: Fri Feb 17 00:22:54 2017 -0800
----------------------------------------------------------------------
.../org/apache/hadoop/hive/ql/hooks/ATSHook.java | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/3485d02c/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java
index 55b922b..72a1acc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java
@@ -143,6 +143,11 @@ public class ATSHook implements ExecuteWithHookContext {
final QueryState queryState = hookContext.getQueryState();
final String queryId = queryState.getQueryId();
+ final Map<String, Long> durations = new HashMap<String, Long>();
+ for (String key : hookContext.getPerfLogger().getEndTimes().keySet()) {
+ durations.put(key, hookContext.getPerfLogger().getDuration(key));
+ }
+
try {
setupAtsExecutor(conf);
@@ -203,10 +208,10 @@ public class ATSHook implements ExecuteWithHookContext {
tablesRead, tablesWritten, conf, llapId));
break;
case POST_EXEC_HOOK:
- fireAndForget(createPostHookEvent(queryId, currentTime, user, requestuser, true, opId, hookContext.getPerfLogger()));
+ fireAndForget(createPostHookEvent(queryId, currentTime, user, requestuser, true, opId, durations));
break;
case ON_FAILURE_HOOK:
- fireAndForget(createPostHookEvent(queryId, currentTime, user, requestuser , false, opId, hookContext.getPerfLogger()));
+ fireAndForget(createPostHookEvent(queryId, currentTime, user, requestuser , false, opId, durations));
break;
default:
//ignore
@@ -325,7 +330,7 @@ public class ATSHook implements ExecuteWithHookContext {
}
TimelineEntity createPostHookEvent(String queryId, long stopTime, String user, String requestuser, boolean success,
- String opId, PerfLogger perfLogger) throws Exception {
+ String opId, Map<String, Long> durations) throws Exception {
LOG.info("Received post-hook notification for :" + queryId);
TimelineEntity atsEntity = new TimelineEntity();
@@ -346,8 +351,8 @@ public class ATSHook implements ExecuteWithHookContext {
// Perf times
JSONObject perfObj = new JSONObject(new LinkedHashMap<>());
- for (String key : perfLogger.getEndTimes().keySet()) {
- perfObj.put(key, perfLogger.getDuration(key));
+ for (Map.Entry<String, Long> entry : durations.entrySet()) {
+ perfObj.put(entry.getKey(), entry.getValue());
}
atsEntity.addOtherInfo(OtherInfoTypes.PERF.name(), perfObj.toString());
[19/50] [abbrv] hive git commit: HIVE-15941: Fix
o.a.h.hive.ql.exec.tez.TezTask compilation issue with tez master (Rajesh
Balamohan, reviewed by Siddharth Seth)
Posted by se...@apache.org.
HIVE-15941: Fix o.a.h.hive.ql.exec.tez.TezTask compilation issue with tez master (Rajesh Balamohan, reviewed by Siddharth Seth)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/54977d2f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/54977d2f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/54977d2f
Branch: refs/heads/hive-14535
Commit: 54977d2f05fca63d1bcfebf6673f85a3f5c6f8ad
Parents: 0debf9f
Author: Rajesh Balamohan <rb...@apache.org>
Authored: Mon Feb 20 05:53:41 2017 +0530
Committer: Rajesh Balamohan <rb...@apache.org>
Committed: Mon Feb 20 05:53:41 2017 +0530
----------------------------------------------------------------------
.../java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java | 11 +++++++++++
1 file changed, 11 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/54977d2f/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
index 69cbe0b..58f0b33 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
@@ -627,6 +627,17 @@ public class TezTask extends Task<TezWork> {
dagClient.close(); // Don't sync.
}
+ public String getDagIdentifierString() {
+ // TODO: Implement this when tez is upgraded. TEZ-3550
+ return null;
+ }
+
+ public String getSessionIdentifierString() {
+ // TODO: Implement this when tez is upgraded. TEZ-3550
+ return null;
+ }
+
+
@Override
public String getExecutionContext() {
return dagClient.getExecutionContext(); // Don't sync.
[03/50] [abbrv] hive git commit: HIVE-15917 : incorrect error
handling from BackgroundWork can cause beeline query to hang (Sergey
Shelukhin, reviewed by Siddharth Seth)
Posted by se...@apache.org.
HIVE-15917 : incorrect error handling from BackgroundWork can cause beeline query to hang (Sergey Shelukhin, reviewed by Siddharth Seth)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e49a0742
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e49a0742
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e49a0742
Branch: refs/heads/hive-14535
Commit: e49a07426884d8494a37046a227ff4a77cf67f57
Parents: 60a36d1
Author: Sergey Shelukhin <se...@apache.org>
Authored: Thu Feb 16 12:39:55 2017 -0800
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Thu Feb 16 12:39:55 2017 -0800
----------------------------------------------------------------------
.../org/apache/hive/service/cli/CLIService.java | 45 +++++++++++---------
.../hive/service/cli/operation/Operation.java | 16 +------
.../service/cli/operation/SQLOperation.java | 4 +-
3 files changed, 30 insertions(+), 35 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/e49a0742/service/src/java/org/apache/hive/service/cli/CLIService.java
----------------------------------------------------------------------
diff --git a/service/src/java/org/apache/hive/service/cli/CLIService.java b/service/src/java/org/apache/hive/service/cli/CLIService.java
index b842f37..714b259 100644
--- a/service/src/java/org/apache/hive/service/cli/CLIService.java
+++ b/service/src/java/org/apache/hive/service/cli/CLIService.java
@@ -404,7 +404,7 @@ public class CLIService extends CompositeService implements ICLIService {
*/
@Override
public OperationHandle getPrimaryKeys(SessionHandle sessionHandle,
- String catalog, String schema, String table)
+ String catalog, String schema, String table)
throws HiveSQLException {
OperationHandle opHandle = sessionManager.getSession(sessionHandle)
.getPrimaryKeys(catalog, schema, table);
@@ -417,9 +417,9 @@ public class CLIService extends CompositeService implements ICLIService {
*/
@Override
public OperationHandle getCrossReference(SessionHandle sessionHandle,
- String primaryCatalog,
- String primarySchema, String primaryTable, String foreignCatalog,
- String foreignSchema, String foreignTable)
+ String primaryCatalog,
+ String primarySchema, String primaryTable, String foreignCatalog,
+ String foreignSchema, String foreignTable)
throws HiveSQLException {
OperationHandle opHandle = sessionManager.getSession(sessionHandle)
.getCrossReference(primaryCatalog, primarySchema, primaryTable,
@@ -460,6 +460,8 @@ public class CLIService extends CompositeService implements ICLIService {
// The background operation thread was cancelled
LOG.trace(opHandle + ": The background operation was cancelled", e);
} catch (ExecutionException e) {
+ // Note: Hive ops do not use the normal Future failure path, so this will not happen
+ // in case of actual failure; the Future will just be done.
// The background operation thread was aborted
LOG.warn(opHandle + ": The background operation was aborted", e);
} catch (InterruptedException e) {
@@ -473,23 +475,28 @@ public class CLIService extends CompositeService implements ICLIService {
return opStatus;
}
+ private static final long PROGRESS_MAX_WAIT_NS = 30 * 1000000000l;
private JobProgressUpdate progressUpdateLog(boolean isProgressLogRequested, Operation operation) {
- if (isProgressLogRequested && canProvideProgressLog()) {
- if (OperationType.EXECUTE_STATEMENT.equals(operation.getType())) {
- SessionState sessionState = operation.getParentSession().getSessionState();
- try {
- while (sessionState.getProgressMonitor() == null && !operation.isFinished()) {
- Thread.sleep(10L); // sleep for 10 ms
- }
- } catch (InterruptedException e) {
- LOG.warn("Error while getting progress update", e);
- }
- if (sessionState.getProgressMonitor() != null) {
- return new JobProgressUpdate(sessionState.getProgressMonitor());
- }
- }
+ if (!isProgressLogRequested || !canProvideProgressLog()
+ || !OperationType.EXECUTE_STATEMENT.equals(operation.getType())) {
+ return new JobProgressUpdate(ProgressMonitor.NULL);
+ }
+
+ SessionState sessionState = operation.getParentSession().getSessionState();
+ long startTime = System.nanoTime();
+ int timeOutMs = 8;
+ try {
+ while (sessionState.getProgressMonitor() == null && !operation.isDone()) {
+ long remainingMs = (PROGRESS_MAX_WAIT_NS - (System.nanoTime() - startTime)) / 1000000l;
+ if (remainingMs <= 0) return new JobProgressUpdate(ProgressMonitor.NULL);
+ Thread.sleep(Math.min(remainingMs, timeOutMs));
+ timeOutMs <<= 1;
+ }
+ } catch (InterruptedException e) {
+ LOG.warn("Error while getting progress update", e);
}
- return new JobProgressUpdate(ProgressMonitor.NULL);
+ ProgressMonitor pm = sessionState.getProgressMonitor();
+ return new JobProgressUpdate(pm != null ? pm : ProgressMonitor.NULL);
}
private boolean canProvideProgressLog() {
http://git-wip-us.apache.org/repos/asf/hive/blob/e49a0742/service/src/java/org/apache/hive/service/cli/operation/Operation.java
----------------------------------------------------------------------
diff --git a/service/src/java/org/apache/hive/service/cli/operation/Operation.java b/service/src/java/org/apache/hive/service/cli/operation/Operation.java
index 2039946..11a820f 100644
--- a/service/src/java/org/apache/hive/service/cli/operation/Operation.java
+++ b/service/src/java/org/apache/hive/service/cli/operation/Operation.java
@@ -206,20 +206,8 @@ public abstract class Operation {
this.lastAccessTime = System.currentTimeMillis();
}
- public boolean isRunning() {
- return OperationState.RUNNING.equals(state);
- }
-
- public boolean isFinished() {
- return OperationState.FINISHED.equals(state);
- }
-
- public boolean isCanceled() {
- return OperationState.CANCELED.equals(state);
- }
-
- public boolean isFailed() {
- return OperationState.ERROR.equals(state);
+ public boolean isDone() {
+ return state.isTerminal();
}
protected void createOperationLog() {
http://git-wip-us.apache.org/repos/asf/hive/blob/e49a0742/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java
----------------------------------------------------------------------
diff --git a/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java b/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java
index 668b4b7..7dde7bf 100644
--- a/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java
+++ b/service/src/java/org/apache/hive/service/cli/operation/SQLOperation.java
@@ -346,6 +346,7 @@ public class SQLOperation extends ExecuteStatementOperation {
}
runQuery();
} catch (HiveSQLException e) {
+ // TODO: why do we invent our own error path op top of the one from Future.get?
setOperationException(e);
LOG.error("Error running hive query: ", e);
} finally {
@@ -361,8 +362,7 @@ public class SQLOperation extends ExecuteStatementOperation {
} catch (Exception e) {
setOperationException(new HiveSQLException(e));
LOG.error("Error running hive query as user : " + currentUGI.getShortUserName(), e);
- }
- finally {
+ } finally {
/**
* We'll cache the ThreadLocal RawStore object for this background thread for an orderly cleanup
* when this thread is garbage collected later.
[23/50] [abbrv] hive git commit: HIVE-15972: Runtime filtering not
vectorizing for decimal/timestamp/char/varchar (Jason Dere,
reviewed by Matt McCline)
Posted by se...@apache.org.
HIVE-15972: Runtime filtering not vectorizing for decimal/timestamp/char/varchar (Jason Dere, reviewed by Matt McCline)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/016afe0d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/016afe0d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/016afe0d
Branch: refs/heads/hive-14535
Commit: 016afe0d69f3a90290e3a127149430ad6d4c603f
Parents: 08ca7b2
Author: Jason Dere <jd...@hortonworks.com>
Authored: Mon Feb 20 13:03:06 2017 -0800
Committer: Jason Dere <jd...@hortonworks.com>
Committed: Mon Feb 20 13:03:06 2017 -0800
----------------------------------------------------------------------
.../FilterColumnBetweenDynamicValue.txt | 2 +
.../vectorized_dynamic_semijoin_reduction2.q | 17 +-
...vectorized_dynamic_semijoin_reduction2.q.out | 428 ++++++++++++++++++-
.../apache/hadoop/hive/tools/GenVectorCode.java | 8 +-
4 files changed, 446 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/016afe0d/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetweenDynamicValue.txt
----------------------------------------------------------------------
diff --git a/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetweenDynamicValue.txt b/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetweenDynamicValue.txt
index 1aee9b3..9d5432f 100644
--- a/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetweenDynamicValue.txt
+++ b/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetweenDynamicValue.txt
@@ -27,7 +27,9 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.sql.Timestamp;
+import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
public class <ClassName> extends Filter<TypeName>ColumnBetween {
http://git-wip-us.apache.org/repos/asf/hive/blob/016afe0d/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q b/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q
index 446407d..be8e4af 100644
--- a/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q
+++ b/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q
@@ -18,7 +18,10 @@ create table dsrv2_big stored as orc as
cast(L_PARTKEY as decimal(10,1)) as partkey_decimal,
cast(L_PARTKEY as double) as partkey_double,
cast(l_shipdate as date) as shipdate_date,
- cast(cast(l_shipdate as date) as timestamp) as shipdate_ts
+ cast(cast(l_shipdate as date) as timestamp) as shipdate_ts,
+ cast(l_shipdate as string) as shipdate_string,
+ cast(l_shipdate as char(10)) as shipdate_char,
+ cast(l_shipdate as varchar(10)) as shipdate_varchar
from lineitem;
create table dsrv2_small stored as orc as select * from dsrv2_big limit 20;
analyze table dsrv2_big compute statistics;
@@ -46,5 +49,17 @@ select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_date = b.ship
EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_ts = b.shipdate_ts);
select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_ts = b.shipdate_ts);
+-- single key (string)
+EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_string = b.shipdate_string);
+select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_string = b.shipdate_string);
+
+-- single key (char)
+EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_char = b.shipdate_char);
+select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_char = b.shipdate_char);
+
+-- single key (varchar)
+EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_varchar = b.shipdate_varchar);
+select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_varchar = b.shipdate_varchar);
+
drop table dsrv2_big;
drop table dsrv2_small;
http://git-wip-us.apache.org/repos/asf/hive/blob/016afe0d/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out
index 27d8152..062fef6 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out
@@ -4,7 +4,10 @@ PREHOOK: query: create table dsrv2_big stored as orc as
cast(L_PARTKEY as decimal(10,1)) as partkey_decimal,
cast(L_PARTKEY as double) as partkey_double,
cast(l_shipdate as date) as shipdate_date,
- cast(cast(l_shipdate as date) as timestamp) as shipdate_ts
+ cast(cast(l_shipdate as date) as timestamp) as shipdate_ts,
+ cast(l_shipdate as string) as shipdate_string,
+ cast(l_shipdate as char(10)) as shipdate_char,
+ cast(l_shipdate as varchar(10)) as shipdate_varchar
from lineitem
PREHOOK: type: CREATETABLE_AS_SELECT
PREHOOK: Input: default@lineitem
@@ -16,7 +19,10 @@ POSTHOOK: query: create table dsrv2_big stored as orc as
cast(L_PARTKEY as decimal(10,1)) as partkey_decimal,
cast(L_PARTKEY as double) as partkey_double,
cast(l_shipdate as date) as shipdate_date,
- cast(cast(l_shipdate as date) as timestamp) as shipdate_ts
+ cast(cast(l_shipdate as date) as timestamp) as shipdate_ts,
+ cast(l_shipdate as string) as shipdate_string,
+ cast(l_shipdate as char(10)) as shipdate_char,
+ cast(l_shipdate as varchar(10)) as shipdate_varchar
from lineitem
POSTHOOK: type: CREATETABLE_AS_SELECT
POSTHOOK: Input: default@lineitem
@@ -25,8 +31,11 @@ POSTHOOK: Output: default@dsrv2_big
POSTHOOK: Lineage: dsrv2_big.partkey_bigint EXPRESSION [(lineitem)lineitem.FieldSchema(name:l_partkey, type:int, comment:null), ]
POSTHOOK: Lineage: dsrv2_big.partkey_decimal EXPRESSION [(lineitem)lineitem.FieldSchema(name:l_partkey, type:int, comment:null), ]
POSTHOOK: Lineage: dsrv2_big.partkey_double EXPRESSION [(lineitem)lineitem.FieldSchema(name:l_partkey, type:int, comment:null), ]
+POSTHOOK: Lineage: dsrv2_big.shipdate_char EXPRESSION [(lineitem)lineitem.FieldSchema(name:l_shipdate, type:string, comment:null), ]
POSTHOOK: Lineage: dsrv2_big.shipdate_date EXPRESSION [(lineitem)lineitem.FieldSchema(name:l_shipdate, type:string, comment:null), ]
+POSTHOOK: Lineage: dsrv2_big.shipdate_string SIMPLE [(lineitem)lineitem.FieldSchema(name:l_shipdate, type:string, comment:null), ]
POSTHOOK: Lineage: dsrv2_big.shipdate_ts EXPRESSION [(lineitem)lineitem.FieldSchema(name:l_shipdate, type:string, comment:null), ]
+POSTHOOK: Lineage: dsrv2_big.shipdate_varchar EXPRESSION [(lineitem)lineitem.FieldSchema(name:l_shipdate, type:string, comment:null), ]
PREHOOK: query: create table dsrv2_small stored as orc as select * from dsrv2_big limit 20
PREHOOK: type: CREATETABLE_AS_SELECT
PREHOOK: Input: default@dsrv2_big
@@ -40,8 +49,11 @@ POSTHOOK: Output: default@dsrv2_small
POSTHOOK: Lineage: dsrv2_small.partkey_bigint SIMPLE [(dsrv2_big)dsrv2_big.FieldSchema(name:partkey_bigint, type:bigint, comment:null), ]
POSTHOOK: Lineage: dsrv2_small.partkey_decimal SIMPLE [(dsrv2_big)dsrv2_big.FieldSchema(name:partkey_decimal, type:decimal(10,1), comment:null), ]
POSTHOOK: Lineage: dsrv2_small.partkey_double SIMPLE [(dsrv2_big)dsrv2_big.FieldSchema(name:partkey_double, type:double, comment:null), ]
+POSTHOOK: Lineage: dsrv2_small.shipdate_char SIMPLE [(dsrv2_big)dsrv2_big.FieldSchema(name:shipdate_char, type:char(10), comment:null), ]
POSTHOOK: Lineage: dsrv2_small.shipdate_date SIMPLE [(dsrv2_big)dsrv2_big.FieldSchema(name:shipdate_date, type:date, comment:null), ]
+POSTHOOK: Lineage: dsrv2_small.shipdate_string SIMPLE [(dsrv2_big)dsrv2_big.FieldSchema(name:shipdate_string, type:string, comment:null), ]
POSTHOOK: Lineage: dsrv2_small.shipdate_ts SIMPLE [(dsrv2_big)dsrv2_big.FieldSchema(name:shipdate_ts, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: dsrv2_small.shipdate_varchar SIMPLE [(dsrv2_big)dsrv2_big.FieldSchema(name:shipdate_varchar, type:varchar(10), comment:null), ]
PREHOOK: query: analyze table dsrv2_big compute statistics
PREHOOK: type: QUERY
PREHOOK: Input: default@dsrv2_big
@@ -247,7 +259,7 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col0 (type: decimal(10,1))
Statistics: Num rows: 100 Data size: 11200 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: all inputs
Map 4
Map Operator Tree:
@@ -655,7 +667,7 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col0 (type: timestamp)
Statistics: Num rows: 100 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: all inputs
Map 4
Map Operator Tree:
@@ -754,6 +766,414 @@ POSTHOOK: Input: default@dsrv2_big
POSTHOOK: Input: default@dsrv2_small
#### A masked pattern was here ####
23
+PREHOOK: query: EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_string = b.shipdate_string)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_string = b.shipdate_string)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Reducer 5 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ filterExpr: (shipdate_string is not null and shipdate_string BETWEEN DynamicValue(RS_7_b_shipdate_string_min) AND DynamicValue(RS_7_b_shipdate_string_max) and in_bloom_filter(shipdate_string, DynamicValue(RS_7_b_shipdate_string_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 100 Data size: 9400 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (shipdate_string is not null and shipdate_string BETWEEN DynamicValue(RS_7_b_shipdate_string_min) AND DynamicValue(RS_7_b_shipdate_string_max) and in_bloom_filter(shipdate_string, DynamicValue(RS_7_b_shipdate_string_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 100 Data size: 9400 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: shipdate_string (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 100 Data size: 9400 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 100 Data size: 9400 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: b
+ filterExpr: shipdate_string is not null (type: boolean)
+ Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: shipdate_string is not null (type: boolean)
+ Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: shipdate_string (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=15)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=15)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_string = b.shipdate_string)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dsrv2_big
+PREHOOK: Input: default@dsrv2_small
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_string = b.shipdate_string)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dsrv2_big
+POSTHOOK: Input: default@dsrv2_small
+#### A masked pattern was here ####
+23
+PREHOOK: query: EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_char = b.shipdate_char)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_char = b.shipdate_char)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Reducer 5 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ filterExpr: (shipdate_char is not null and shipdate_char BETWEEN DynamicValue(RS_7_b_shipdate_char_min) AND DynamicValue(RS_7_b_shipdate_char_max) and in_bloom_filter(shipdate_char, DynamicValue(RS_7_b_shipdate_char_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 100 Data size: 9400 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (shipdate_char is not null and shipdate_char BETWEEN DynamicValue(RS_7_b_shipdate_char_min) AND DynamicValue(RS_7_b_shipdate_char_max) and in_bloom_filter(shipdate_char, DynamicValue(RS_7_b_shipdate_char_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 100 Data size: 9400 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: shipdate_char (type: char(10))
+ outputColumnNames: _col0
+ Statistics: Num rows: 100 Data size: 9400 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: char(10))
+ sort order: +
+ Map-reduce partition columns: _col0 (type: char(10))
+ Statistics: Num rows: 100 Data size: 9400 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: b
+ filterExpr: shipdate_char is not null (type: boolean)
+ Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: shipdate_char is not null (type: boolean)
+ Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: shipdate_char (type: char(10))
+ outputColumnNames: _col0
+ Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: char(10))
+ sort order: +
+ Map-reduce partition columns: _col0 (type: char(10))
+ Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: char(10))
+ outputColumnNames: _col0
+ Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=15)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: char(10)), _col1 (type: char(10)), _col2 (type: binary)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: char(10))
+ 1 _col0 (type: char(10))
+ Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=15)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: char(10)), _col1 (type: char(10)), _col2 (type: binary)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_char = b.shipdate_char)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dsrv2_big
+PREHOOK: Input: default@dsrv2_small
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_char = b.shipdate_char)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dsrv2_big
+POSTHOOK: Input: default@dsrv2_small
+#### A masked pattern was here ####
+23
+PREHOOK: query: EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_varchar = b.shipdate_varchar)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_varchar = b.shipdate_varchar)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Reducer 5 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ filterExpr: (shipdate_varchar is not null and shipdate_varchar BETWEEN DynamicValue(RS_7_b_shipdate_varchar_min) AND DynamicValue(RS_7_b_shipdate_varchar_max) and in_bloom_filter(shipdate_varchar, DynamicValue(RS_7_b_shipdate_varchar_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 100 Data size: 9400 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (shipdate_varchar is not null and shipdate_varchar BETWEEN DynamicValue(RS_7_b_shipdate_varchar_min) AND DynamicValue(RS_7_b_shipdate_varchar_max) and in_bloom_filter(shipdate_varchar, DynamicValue(RS_7_b_shipdate_varchar_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 100 Data size: 9400 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: shipdate_varchar (type: varchar(10))
+ outputColumnNames: _col0
+ Statistics: Num rows: 100 Data size: 9400 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: varchar(10))
+ sort order: +
+ Map-reduce partition columns: _col0 (type: varchar(10))
+ Statistics: Num rows: 100 Data size: 9400 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: b
+ filterExpr: shipdate_varchar is not null (type: boolean)
+ Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: shipdate_varchar is not null (type: boolean)
+ Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: shipdate_varchar (type: varchar(10))
+ outputColumnNames: _col0
+ Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: varchar(10))
+ sort order: +
+ Map-reduce partition columns: _col0 (type: varchar(10))
+ Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: varchar(10))
+ outputColumnNames: _col0
+ Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=15)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: varchar(10)), _col1 (type: varchar(10)), _col2 (type: binary)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: varchar(10))
+ 1 _col0 (type: varchar(10))
+ Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=15)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: varchar(10)), _col1 (type: varchar(10)), _col2 (type: binary)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_varchar = b.shipdate_varchar)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dsrv2_big
+PREHOOK: Input: default@dsrv2_small
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_varchar = b.shipdate_varchar)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dsrv2_big
+POSTHOOK: Input: default@dsrv2_small
+#### A masked pattern was here ####
+23
PREHOOK: query: drop table dsrv2_big
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@dsrv2_big
http://git-wip-us.apache.org/repos/asf/hive/blob/016afe0d/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java
----------------------------------------------------------------------
diff --git a/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java b/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java
index 55cfb7b..926321e 100644
--- a/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java
+++ b/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java
@@ -1418,7 +1418,7 @@ public class GenVectorCode extends Task {
getValueMethod = "";
conversionMethod = "";
} else if (operandType.equals("decimal")) {
- defaultValue = "null";
+ defaultValue = "HiveDecimal.ZERO";
vectorType = "HiveDecimal";
getPrimitiveMethod = "getHiveDecimal";
getValueMethod = "";
@@ -1430,13 +1430,13 @@ public class GenVectorCode extends Task {
getValueMethod = ".getBytes()";
conversionMethod = "";
} else if (operandType.equals("char")) {
- defaultValue = "null";
+ defaultValue = "new HiveChar(\"\", 1)";
vectorType = "byte[]";
getPrimitiveMethod = "getHiveChar";
getValueMethod = ".getStrippedValue().getBytes()"; // Does vectorization use stripped char values?
conversionMethod = "";
} else if (operandType.equals("varchar")) {
- defaultValue = "null";
+ defaultValue = "new HiveVarchar(\"\", 1)";
vectorType = "byte[]";
getPrimitiveMethod = "getHiveVarchar";
getValueMethod = ".getValue().getBytes()";
@@ -1450,7 +1450,7 @@ public class GenVectorCode extends Task {
// Special case - Date requires its own specific BetweenDynamicValue class, but derives from FilterLongColumnBetween
typeName = "Long";
} else if (operandType.equals("timestamp")) {
- defaultValue = "null";
+ defaultValue = "new Timestamp(0)";
vectorType = "Timestamp";
getPrimitiveMethod = "getTimestamp";
getValueMethod = "";
[21/50] [abbrv] hive git commit: HIVE-15891 : Detect query rewrite
scenario for UPDATE/DELETE/MERGE and fail fast (Wei Zheng,
reviewed by Eugene Koifman)
Posted by se...@apache.org.
HIVE-15891 : Detect query rewrite scenario for UPDATE/DELETE/MERGE and fail fast (Wei Zheng, reviewed by Eugene Koifman)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/091ac8e0
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/091ac8e0
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/091ac8e0
Branch: refs/heads/hive-14535
Commit: 091ac8e05940ce327d068cee77817bbe428797e2
Parents: 1e00fb3
Author: Wei Zheng <we...@apache.org>
Authored: Sun Feb 19 18:17:10 2017 -0800
Committer: Wei Zheng <we...@apache.org>
Committed: Sun Feb 19 18:17:10 2017 -0800
----------------------------------------------------------------------
.../java/org/apache/hadoop/hive/ql/Context.java | 12 +++++++++++
.../org/apache/hadoop/hive/ql/ErrorMsg.java | 3 +++
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 5 +++++
.../ql/parse/UpdateDeleteSemanticAnalyzer.java | 1 +
.../clientnegative/masking_acid_delete.q | 10 ++++++++++
.../queries/clientnegative/masking_acid_merge.q | 15 ++++++++++++++
.../clientnegative/masking_acid_update.q | 10 ++++++++++
.../clientnegative/masking_acid_delete.q.out | 13 ++++++++++++
.../clientnegative/masking_acid_merge.q.out | 21 ++++++++++++++++++++
.../clientnegative/masking_acid_update.q.out | 13 ++++++++++++
10 files changed, 103 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/091ac8e0/ql/src/java/org/apache/hadoop/hive/ql/Context.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java b/ql/src/java/org/apache/hadoop/hive/ql/Context.java
index fae2a12..d1d2789 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java
@@ -125,6 +125,10 @@ public class Context {
private Heartbeater heartbeater;
private boolean skipTableMasking;
+
+ // Identify whether the query involves an UPDATE, DELETE or MERGE
+ private boolean isUpdateDeleteMerge;
+
/**
* This determines the prefix of the
* {@link org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.Phase1Ctx#dest}
@@ -955,4 +959,12 @@ public class Context {
opContext = new CompilationOpContext();
sequencer = new AtomicInteger();
}
+
+ public boolean getIsUpdateDeleteMerge() {
+ return isUpdateDeleteMerge;
+ }
+
+ public void setIsUpdateDeleteMerge(boolean isUpdate) {
+ this.isUpdateDeleteMerge = isUpdate;
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/091ac8e0/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
index 6013218..2ffc130 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
@@ -398,6 +398,9 @@ public enum ErrorMsg {
"time."),
DISTINCT_NOT_SUPPORTED(10285, "Distinct keyword is not support in current context"),
NONACID_COMPACTION_NOT_SUPPORTED(10286, "Compaction is not allowed on non-ACID table {0}.{1}", true),
+ MASKING_FILTERING_ON_ACID_NOT_SUPPORTED(10287,
+ "Detected {0}.{1} has row masking/column filtering enabled, " +
+ "which is not supported for query involving ACID operations", true),
UPDATEDELETE_PARSE_ERROR(10290, "Encountered parse error while parsing rewritten merge/update or " +
"delete query"),
http://git-wip-us.apache.org/repos/asf/hive/blob/091ac8e0/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index f053093..9c37af8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -10820,6 +10820,11 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
.applyRowFilterAndColumnMasking(basicPrivObjs);
if (needRewritePrivObjs != null && !needRewritePrivObjs.isEmpty()) {
for (HivePrivilegeObject privObj : needRewritePrivObjs) {
+ // We don't support masking/filtering against ACID query at the moment
+ if (ctx.getIsUpdateDeleteMerge()) {
+ throw new SemanticException(ErrorMsg.MASKING_FILTERING_ON_ACID_NOT_SUPPORTED,
+ privObj.getDbname(), privObj.getObjectName());
+ }
MaskAndFilterInfo info = basicInfos.get(privObj);
String replacementText = tableMask.create(privObj, info);
if (replacementText != null) {
http://git-wip-us.apache.org/repos/asf/hive/blob/091ac8e0/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
index 725f2ce..865c03a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
@@ -293,6 +293,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
rewrittenCtx = new Context(conf);
rewrittenCtx.setExplainConfig(ctx.getExplainConfig());
+ rewrittenCtx.setIsUpdateDeleteMerge(true);
} catch (IOException e) {
throw new SemanticException(ErrorMsg.UPDATEDELETE_IO_ERROR.getMsg());
}
http://git-wip-us.apache.org/repos/asf/hive/blob/091ac8e0/ql/src/test/queries/clientnegative/masking_acid_delete.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/masking_acid_delete.q b/ql/src/test/queries/clientnegative/masking_acid_delete.q
new file mode 100644
index 0000000..2785d7a
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/masking_acid_delete.q
@@ -0,0 +1,10 @@
+set hive.mapred.mode=nonstrict;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+create table masking_test (key int, value string)
+clustered by (value) into 2 buckets stored as orc
+tblproperties ("transactional"="true");
+
+delete from masking_test where value='ddd';
http://git-wip-us.apache.org/repos/asf/hive/blob/091ac8e0/ql/src/test/queries/clientnegative/masking_acid_merge.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/masking_acid_merge.q b/ql/src/test/queries/clientnegative/masking_acid_merge.q
new file mode 100644
index 0000000..50471e0
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/masking_acid_merge.q
@@ -0,0 +1,15 @@
+set hive.mapred.mode=nonstrict;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+create table nonacid (key int, value string) stored as orc;
+
+create table masking_test (key int, value string)
+clustered by (value) into 2 buckets stored as orc
+tblproperties ("transactional"="true");
+
+MERGE INTO masking_test as t using nonacid as s ON t.key = s.key
+WHEN MATCHED AND s.key < 5 THEN DELETE
+WHEN MATCHED AND s.key < 3 THEN UPDATE set key = 1
+WHEN NOT MATCHED THEN INSERT VALUES (s.key, s.value);
http://git-wip-us.apache.org/repos/asf/hive/blob/091ac8e0/ql/src/test/queries/clientnegative/masking_acid_update.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/masking_acid_update.q b/ql/src/test/queries/clientnegative/masking_acid_update.q
new file mode 100644
index 0000000..586d20b
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/masking_acid_update.q
@@ -0,0 +1,10 @@
+set hive.mapred.mode=nonstrict;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+create table masking_test (key int, value string)
+clustered by (value) into 2 buckets stored as orc
+tblproperties ("transactional"="true");
+
+update masking_test set key=1 where value='ddd';
http://git-wip-us.apache.org/repos/asf/hive/blob/091ac8e0/ql/src/test/results/clientnegative/masking_acid_delete.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/masking_acid_delete.q.out b/ql/src/test/results/clientnegative/masking_acid_delete.q.out
new file mode 100644
index 0000000..021f2cd
--- /dev/null
+++ b/ql/src/test/results/clientnegative/masking_acid_delete.q.out
@@ -0,0 +1,13 @@
+PREHOOK: query: create table masking_test (key int, value string)
+clustered by (value) into 2 buckets stored as orc
+tblproperties ("transactional"="true")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@masking_test
+POSTHOOK: query: create table masking_test (key int, value string)
+clustered by (value) into 2 buckets stored as orc
+tblproperties ("transactional"="true")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@masking_test
+FAILED: SemanticException [Error 10287]: Detected default.masking_test has row masking/column filtering enabled, which is not supported for query involving ACID operations
http://git-wip-us.apache.org/repos/asf/hive/blob/091ac8e0/ql/src/test/results/clientnegative/masking_acid_merge.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/masking_acid_merge.q.out b/ql/src/test/results/clientnegative/masking_acid_merge.q.out
new file mode 100644
index 0000000..917c44a
--- /dev/null
+++ b/ql/src/test/results/clientnegative/masking_acid_merge.q.out
@@ -0,0 +1,21 @@
+PREHOOK: query: create table nonacid (key int, value string) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@nonacid
+POSTHOOK: query: create table nonacid (key int, value string) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@nonacid
+PREHOOK: query: create table masking_test (key int, value string)
+clustered by (value) into 2 buckets stored as orc
+tblproperties ("transactional"="true")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@masking_test
+POSTHOOK: query: create table masking_test (key int, value string)
+clustered by (value) into 2 buckets stored as orc
+tblproperties ("transactional"="true")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@masking_test
+FAILED: SemanticException [Error 10287]: Detected default.masking_test has row masking/column filtering enabled, which is not supported for query involving ACID operations
http://git-wip-us.apache.org/repos/asf/hive/blob/091ac8e0/ql/src/test/results/clientnegative/masking_acid_update.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/masking_acid_update.q.out b/ql/src/test/results/clientnegative/masking_acid_update.q.out
new file mode 100644
index 0000000..021f2cd
--- /dev/null
+++ b/ql/src/test/results/clientnegative/masking_acid_update.q.out
@@ -0,0 +1,13 @@
+PREHOOK: query: create table masking_test (key int, value string)
+clustered by (value) into 2 buckets stored as orc
+tblproperties ("transactional"="true")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@masking_test
+POSTHOOK: query: create table masking_test (key int, value string)
+clustered by (value) into 2 buckets stored as orc
+tblproperties ("transactional"="true")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@masking_test
+FAILED: SemanticException [Error 10287]: Detected default.masking_test has row masking/column filtering enabled, which is not supported for query involving ACID operations
[33/50] [abbrv] hive git commit: HIVE-15570. Improved messaging when
llap is down, ignore consistent splits config in non llap mode. (Zhiyuan Yang,
reviewed by Sergey Shelukhin, Siddharth Seth)
Posted by se...@apache.org.
HIVE-15570. Improved messaging when llap is down, ignore consistent splits config in non llap mode. (Zhiyuan Yang, reviewed by Sergey Shelukhin, Siddharth Seth)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/dc0938c4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/dc0938c4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/dc0938c4
Branch: refs/heads/hive-14535
Commit: dc0938c42f6c9a42adb3fcbb391fb759a3bb0072
Parents: 78e4bb7
Author: Siddharth Seth <ss...@apache.org>
Authored: Tue Feb 21 22:52:54 2017 -0800
Committer: Siddharth Seth <ss...@apache.org>
Committed: Tue Feb 21 22:52:54 2017 -0800
----------------------------------------------------------------------
common/src/java/org/apache/hadoop/hive/conf/HiveConf.java | 9 +++++----
ql/src/java/org/apache/hadoop/hive/ql/exec/tez/Utils.java | 6 +++++-
2 files changed, 10 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/dc0938c4/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 4faaa8b..7c88f4f 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -3162,10 +3162,11 @@ public class HiveConf extends Configuration {
"llap.daemon.service.port"),
LLAP_DAEMON_WEB_SSL("hive.llap.daemon.web.ssl", false,
"Whether LLAP daemon web UI should use SSL.", "llap.daemon.service.ssl"),
- LLAP_CLIENT_CONSISTENT_SPLITS("hive.llap.client.consistent.splits",
- false,
- "Whether to setup split locations to match nodes on which llap daemons are running," +
- " instead of using the locations provided by the split itself"),
+ LLAP_CLIENT_CONSISTENT_SPLITS("hive.llap.client.consistent.splits", false,
+ "Whether to setup split locations to match nodes on which llap daemons are running, " +
+ "instead of using the locations provided by the split itself. If there is no llap daemon " +
+ "running, fall back to locations provided by the split. This is effective only if " +
+ "hive.execution.mode is llap"),
LLAP_VALIDATE_ACLS("hive.llap.validate.acls", true,
"Whether LLAP should reject permissive ACLs in some cases (e.g. its own management\n" +
"protocol or ZK paths), similar to how ssh refuses a key with bad access permissions."),
http://git-wip-us.apache.org/repos/asf/hive/blob/dc0938c4/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/Utils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/Utils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/Utils.java
index d691e18..2b57d90 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/Utils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/Utils.java
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
+import com.google.common.base.Preconditions;
import org.apache.commons.lang.ArrayUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
@@ -34,7 +35,8 @@ public class Utils {
public static SplitLocationProvider getSplitLocationProvider(Configuration conf, Logger LOG) throws
IOException {
boolean useCustomLocations =
- HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_CLIENT_CONSISTENT_SPLITS);
+ HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_MODE).equals("llap")
+ && HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_CLIENT_CONSISTENT_SPLITS);
SplitLocationProvider splitLocationProvider;
LOG.info("SplitGenerator using llap affinitized locations: " + useCustomLocations);
if (useCustomLocations) {
@@ -43,6 +45,8 @@ public class Utils {
Collection<ServiceInstance> serviceInstances =
serviceRegistry.getInstances().getAllInstancesOrdered(true);
+ Preconditions.checkArgument(!serviceInstances.isEmpty(),
+ "No running LLAP daemons! Please check LLAP service status and zookeeper configuration");
ArrayList<String> locations = new ArrayList<>(serviceInstances.size());
for (ServiceInstance serviceInstance : serviceInstances) {
if (LOG.isDebugEnabled()) {
[34/50] [abbrv] hive git commit: HIVE-15990: Always initialize
connection properties in DruidSerDe (Jesus Camacho Rodriguez,
reviewed by Ashutosh Chauhan)
Posted by se...@apache.org.
HIVE-15990: Always initialize connection properties in DruidSerDe (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8973d2c6
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8973d2c6
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8973d2c6
Branch: refs/heads/hive-14535
Commit: 8973d2c66394ed25b1baa20df3920870ae9b053c
Parents: dc0938c
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Mon Feb 20 17:32:46 2017 +0000
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Wed Feb 22 10:16:50 2017 +0000
----------------------------------------------------------------------
.../hadoop/hive/druid/serde/DruidSerDe.java | 18 ++++++++----------
1 file changed, 8 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/8973d2c6/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java
----------------------------------------------------------------------
diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java
index 4235e89..bbe29b6 100644
--- a/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java
+++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/serde/DruidSerDe.java
@@ -100,18 +100,21 @@ public class DruidSerDe extends AbstractSerDe {
protected static final Logger LOG = LoggerFactory.getLogger(DruidSerDe.class);
- private String[] columns;
-
- private PrimitiveTypeInfo[] types;
-
private int numConnection;
-
private Period readTimeout;
+ private String[] columns;
+ private PrimitiveTypeInfo[] types;
private ObjectInspector inspector;
@Override
public void initialize(Configuration configuration, Properties properties) throws SerDeException {
+ // Init connection properties
+ numConnection = HiveConf
+ .getIntVar(configuration, HiveConf.ConfVars.HIVE_DRUID_NUM_HTTP_CONNECTION);
+ readTimeout = new Period(
+ HiveConf.getVar(configuration, HiveConf.ConfVars.HIVE_DRUID_HTTP_READ_TIMEOUT));
+
final List<String> columnNames = new ArrayList<>();
final List<PrimitiveTypeInfo> columnTypes = new ArrayList<>();
List<ObjectInspector> inspectors = new ArrayList<>();
@@ -173,11 +176,6 @@ public class DruidSerDe extends AbstractSerDe {
throw new SerDeException("Druid broker address not specified in configuration");
}
- numConnection = HiveConf
- .getIntVar(configuration, HiveConf.ConfVars.HIVE_DRUID_NUM_HTTP_CONNECTION);
- readTimeout = new Period(
- HiveConf.getVar(configuration, HiveConf.ConfVars.HIVE_DRUID_HTTP_READ_TIMEOUT));
-
// Infer schema
SegmentAnalysis schemaInfo;
try {
[32/50] [abbrv] hive git commit: HIVE-15938 : position alias in order
by fails for union queries (Sergey Shelukhin, reviewed by Ashutosh Chauhan,
Pengcheng Xiong)
Posted by se...@apache.org.
HIVE-15938 : position alias in order by fails for union queries (Sergey Shelukhin, reviewed by Ashutosh Chauhan, Pengcheng Xiong)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/78e4bb79
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/78e4bb79
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/78e4bb79
Branch: refs/heads/hive-14535
Commit: 78e4bb79a2f9e74acb8144db1854e5b9ad369f0f
Parents: ffe7357
Author: Sergey Shelukhin <se...@apache.org>
Authored: Tue Feb 21 17:15:14 2017 -0800
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Tue Feb 21 17:41:44 2017 -0800
----------------------------------------------------------------------
.../java/org/apache/hadoop/hive/ql/Driver.java | 6 +-
.../metadata/HiveMaterializedViewsRegistry.java | 3 +-
.../calcite/translator/ASTBuilder.java | 38 +--
.../index/RewriteParseContextGenerator.java | 4 +-
.../hadoop/hive/ql/parse/CalcitePlanner.java | 51 ++-
.../ql/parse/ColumnStatsAutoGatherContext.java | 4 +-
.../ql/parse/ColumnStatsSemanticAnalyzer.java | 4 +-
.../apache/hadoop/hive/ql/parse/HiveParser.g | 11 +-
.../apache/hadoop/hive/ql/parse/ParseUtils.java | 190 +++++++++++-
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 7 +-
.../ql/parse/UpdateDeleteSemanticAnalyzer.java | 5 +-
.../hadoop/hive/ql/tools/LineageInfo.java | 5 +-
.../ql/parse/TestMacroSemanticAnalyzer.java | 4 +-
.../parse/TestUpdateDeleteSemanticAnalyzer.java | 4 +-
.../authorization/AuthorizationTestUtil.java | 2 +-
.../queries/clientpositive/union_pos_alias.q | 30 ++
.../clientpositive/constant_prop_1.q.out | 4 +-
.../clientpositive/union_pos_alias.q.out | 308 +++++++++++++++++++
18 files changed, 611 insertions(+), 69 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/78e4bb79/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
index 2423471..592b1f1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
@@ -118,7 +118,6 @@ import org.apache.hadoop.hive.ql.session.OperationLog.LoggingLevel;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.hadoop.hive.serde2.ByteStream;
-import org.apache.hadoop.hive.serde2.thrift.ThriftJDBCBinarySerDe;
import org.apache.hadoop.hive.shims.Utils;
import org.apache.hadoop.mapred.ClusterStatus;
import org.apache.hadoop.mapred.JobClient;
@@ -466,9 +465,7 @@ public class Driver implements CommandProcessor {
ctx.setHDFSCleanup(true);
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PARSE);
- ParseDriver pd = new ParseDriver();
- ASTNode tree = pd.parse(command, ctx);
- tree = ParseUtils.findRootNonNullToken(tree);
+ ASTNode tree = ParseUtils.parse(command, ctx);
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PARSE);
// Trigger query hook before compilation
@@ -646,6 +643,7 @@ public class Driver implements CommandProcessor {
}
}
+
private int handleInterruption(String msg) {
SQLState = "HY008"; //SQLState for cancel operation
errorMessage = "FAILED: command has been interrupted: " + msg;
http://git-wip-us.apache.org/repos/asf/hive/blob/78e4bb79/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java
index 89c87cd..1d78b4c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java
@@ -328,8 +328,7 @@ public final class HiveMaterializedViewsRegistry {
private static RelNode parseQuery(String viewQuery) {
try {
- final ParseDriver pd = new ParseDriver();
- final ASTNode node = ParseUtils.findRootNonNullToken(pd.parse(viewQuery));
+ final ASTNode node = ParseUtils.parse(viewQuery);
final QueryState qs = new QueryState(SessionState.get().getConf());
CalcitePlanner analyzer = new CalcitePlanner(qs);
analyzer.initCtx(new Context(SessionState.get().getConf()));
http://git-wip-us.apache.org/repos/asf/hive/blob/78e4bb79/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java
index e36e1bd..0dc0c24 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java
@@ -38,19 +38,19 @@ import org.apache.hadoop.hive.ql.parse.HiveParser;
import org.apache.hadoop.hive.ql.parse.ParseDriver;
import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
-class ASTBuilder {
+public class ASTBuilder {
- static ASTBuilder construct(int tokenType, String text) {
+ public static ASTBuilder construct(int tokenType, String text) {
ASTBuilder b = new ASTBuilder();
b.curr = createAST(tokenType, text);
return b;
}
- static ASTNode createAST(int tokenType, String text) {
+ public static ASTNode createAST(int tokenType, String text) {
return (ASTNode) ParseDriver.adaptor.create(tokenType, text);
}
- static ASTNode destNode() {
+ public static ASTNode destNode() {
return ASTBuilder
.construct(HiveParser.TOK_DESTINATION, "TOK_DESTINATION")
.add(
@@ -58,7 +58,7 @@ class ASTBuilder {
"TOK_TMP_FILE")).node();
}
- static ASTNode table(RelNode scan) {
+ public static ASTNode table(RelNode scan) {
HiveTableScan hts;
if (scan instanceof DruidQuery) {
hts = (HiveTableScan) ((DruidQuery)scan).getTableScan();
@@ -102,7 +102,7 @@ class ASTBuilder {
return b.node();
}
- static ASTNode join(ASTNode left, ASTNode right, JoinRelType joinType, ASTNode cond,
+ public static ASTNode join(ASTNode left, ASTNode right, JoinRelType joinType, ASTNode cond,
boolean semiJoin) {
ASTBuilder b = null;
@@ -129,12 +129,12 @@ class ASTBuilder {
return b.node();
}
- static ASTNode subQuery(ASTNode qry, String alias) {
+ public static ASTNode subQuery(ASTNode qry, String alias) {
return ASTBuilder.construct(HiveParser.TOK_SUBQUERY, "TOK_SUBQUERY").add(qry)
.add(HiveParser.Identifier, alias).node();
}
- static ASTNode qualifiedName(String tableName, String colName) {
+ public static ASTNode qualifiedName(String tableName, String colName) {
ASTBuilder b = ASTBuilder
.construct(HiveParser.DOT, ".")
.add(
@@ -143,36 +143,36 @@ class ASTBuilder {
return b.node();
}
- static ASTNode unqualifiedName(String colName) {
+ public static ASTNode unqualifiedName(String colName) {
ASTBuilder b = ASTBuilder.construct(HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL").add(
HiveParser.Identifier, colName);
return b.node();
}
- static ASTNode where(ASTNode cond) {
+ public static ASTNode where(ASTNode cond) {
return ASTBuilder.construct(HiveParser.TOK_WHERE, "TOK_WHERE").add(cond).node();
}
- static ASTNode having(ASTNode cond) {
+ public static ASTNode having(ASTNode cond) {
return ASTBuilder.construct(HiveParser.TOK_HAVING, "TOK_HAVING").add(cond).node();
}
- static ASTNode limit(Object offset, Object limit) {
+ public static ASTNode limit(Object offset, Object limit) {
return ASTBuilder.construct(HiveParser.TOK_LIMIT, "TOK_LIMIT")
.add(HiveParser.Number, offset.toString())
.add(HiveParser.Number, limit.toString()).node();
}
- static ASTNode selectExpr(ASTNode expr, String alias) {
+ public static ASTNode selectExpr(ASTNode expr, String alias) {
return ASTBuilder.construct(HiveParser.TOK_SELEXPR, "TOK_SELEXPR").add(expr)
.add(HiveParser.Identifier, alias).node();
}
- static ASTNode literal(RexLiteral literal) {
+ public static ASTNode literal(RexLiteral literal) {
return literal(literal, false);
}
- static ASTNode literal(RexLiteral literal, boolean useTypeQualInLiteral) {
+ public static ASTNode literal(RexLiteral literal, boolean useTypeQualInLiteral) {
Object val = null;
int type = 0;
SqlTypeName sqlType = literal.getType().getSqlTypeName();
@@ -328,21 +328,21 @@ class ASTBuilder {
ASTNode curr;
- ASTNode node() {
+ public ASTNode node() {
return curr;
}
- ASTBuilder add(int tokenType, String text) {
+ public ASTBuilder add(int tokenType, String text) {
ParseDriver.adaptor.addChild(curr, createAST(tokenType, text));
return this;
}
- ASTBuilder add(ASTBuilder b) {
+ public ASTBuilder add(ASTBuilder b) {
ParseDriver.adaptor.addChild(curr, b.curr);
return this;
}
- ASTBuilder add(ASTNode n) {
+ public ASTBuilder add(ASTNode n) {
if (n != null) {
ParseDriver.adaptor.addChild(curr, n);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/78e4bb79/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteParseContextGenerator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteParseContextGenerator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteParseContextGenerator.java
index 340d29a..5659a72 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteParseContextGenerator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteParseContextGenerator.java
@@ -62,9 +62,7 @@ public final class RewriteParseContextGenerator {
Operator<? extends OperatorDesc> operatorTree;
try {
Context ctx = new Context(queryState.getConf());
- ParseDriver pd = new ParseDriver();
- ASTNode tree = pd.parse(command, ctx);
- tree = ParseUtils.findRootNonNullToken(tree);
+ ASTNode tree = ParseUtils.parse(command, ctx);
BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(queryState, tree);
assert(sem instanceof SemanticAnalyzer);
http://git-wip-us.apache.org/repos/asf/hive/blob/78e4bb79/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 10f16ca..21bf020 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -973,17 +973,18 @@ public class CalcitePlanner extends SemanticAnalyzer {
return newAst;
}
- /**
- * Performs breadth-first search of the AST for a nested set of tokens. Tokens
- * don't have to be each others' direct children, they can be separated by
- * layers of other tokens. For each token in the list, the first one found is
- * matched and there's no backtracking; thus, if AST has multiple instances of
- * some token, of which only one matches, it is not guaranteed to be found. We
- * use this for simple things. Not thread-safe - reuses searchQueue.
- */
- static class ASTSearcher {
+
+ public static class ASTSearcher {
private final LinkedList<ASTNode> searchQueue = new LinkedList<ASTNode>();
+ /**
+ * Performs breadth-first search of the AST for a nested set of tokens. Tokens
+ * don't have to be each others' direct children, they can be separated by
+ * layers of other tokens. For each token in the list, the first one found is
+ * matched and there's no backtracking; thus, if AST has multiple instances of
+ * some token, of which only one matches, it is not guaranteed to be found. We
+ * use this for simple things. Not thread-safe - reuses searchQueue.
+ */
public ASTNode simpleBreadthFirstSearch(ASTNode ast, int... tokens) {
searchQueue.clear();
searchQueue.add(ast);
@@ -1007,6 +1008,38 @@ public class CalcitePlanner extends SemanticAnalyzer {
}
return null;
}
+
+ public ASTNode depthFirstSearch(ASTNode ast, int token) {
+ searchQueue.clear();
+ searchQueue.add(ast);
+ while (!searchQueue.isEmpty()) {
+ ASTNode next = searchQueue.poll();
+ if (next.getType() == token) return next;
+ for (int j = 0; j < next.getChildCount(); ++j) {
+ searchQueue.add((ASTNode) next.getChild(j));
+ }
+ }
+ return null;
+ }
+
+ public ASTNode simpleBreadthFirstSearchAny(ASTNode ast, int... tokens) {
+ searchQueue.clear();
+ searchQueue.add(ast);
+ while (!searchQueue.isEmpty()) {
+ ASTNode next = searchQueue.poll();
+ for (int i = 0; i < tokens.length; ++i) {
+ if (next.getType() == tokens[i]) return next;
+ }
+ for (int i = 0; i < next.getChildCount(); ++i) {
+ searchQueue.add((ASTNode) next.getChild(i));
+ }
+ }
+ return null;
+ }
+
+ public void reset() {
+ searchQueue.clear();
+ }
}
private static void replaceASTChild(ASTNode child, ASTNode newChild) {
http://git-wip-us.apache.org/repos/asf/hive/blob/78e4bb79/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java
index 80e62c1..3b719af 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java
@@ -132,9 +132,7 @@ public class ColumnStatsAutoGatherContext {
//0. initialization
Context ctx = new Context(conf);
ctx.setExplainConfig(origCtx.getExplainConfig());
- ParseDriver pd = new ParseDriver();
- ASTNode tree = pd.parse(analyzeCommand, ctx);
- tree = ParseUtils.findRootNonNullToken(tree);
+ ASTNode tree = ParseUtils.parse(analyzeCommand, ctx);
//1. get the ColumnStatsSemanticAnalyzer
BaseSemanticAnalyzer baseSem = SemanticAnalyzerFactory.get(new QueryState(conf), tree);
http://git-wip-us.apache.org/repos/asf/hive/blob/78e4bb79/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
index ff07b42..93b8183 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
@@ -294,14 +294,12 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer {
throw new SemanticException(ErrorMsg.COLUMNSTATSCOLLECTOR_IO_ERROR.getMsg());
}
ctx.setCmd(rewrittenQuery);
- ParseDriver pd = new ParseDriver();
try {
- rewrittenTree = pd.parse(rewrittenQuery, ctx);
+ rewrittenTree = ParseUtils.parse(rewrittenQuery, ctx);
} catch (ParseException e) {
throw new SemanticException(ErrorMsg.COLUMNSTATSCOLLECTOR_PARSE_ERROR.getMsg());
}
- rewrittenTree = ParseUtils.findRootNonNullToken(rewrittenTree);
return rewrittenTree;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/78e4bb79/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
index eb81393..b4b5bfb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
@@ -42,6 +42,7 @@ TOK_SUBQUERY;
TOK_INSERT_INTO;
TOK_DESTINATION;
TOK_ALLCOLREF;
+TOK_SETCOLREF;
TOK_TABLE_OR_COL;
TOK_FUNCTION;
TOK_FUNCTIONDI;
@@ -2442,7 +2443,7 @@ fromStatement
)
^(TOK_INSERT
^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE))
- ^(TOK_SELECT ^(TOK_SELEXPR TOK_ALLCOLREF))
+ ^(TOK_SELECT ^(TOK_SELEXPR TOK_SETCOLREF))
)
)
-> {$fromStatement.tree}
@@ -2526,7 +2527,7 @@ selectStatement
)
^(TOK_INSERT
^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE))
- ^(TOK_SELECT ^(TOK_SELEXPR TOK_ALLCOLREF))
+ ^(TOK_SELECT ^(TOK_SELEXPR TOK_SETCOLREF))
$o? $c? $d? $sort? $l?
)
)
@@ -2545,7 +2546,7 @@ setOpSelectStatement[CommonTree t]
)
^(TOK_INSERT
^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE))
- ^(TOK_SELECTDI ^(TOK_SELEXPR TOK_ALLCOLREF))
+ ^(TOK_SELECTDI ^(TOK_SELEXPR TOK_SETCOLREF))
)
)
-> {$setOpSelectStatement.tree != null && ((CommonTree)u.getTree()).getType()!=HiveParser.TOK_UNIONDISTINCT}?
@@ -2560,7 +2561,7 @@ setOpSelectStatement[CommonTree t]
)
^(TOK_INSERT
^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE))
- ^(TOK_SELECTDI ^(TOK_SELEXPR TOK_ALLCOLREF))
+ ^(TOK_SELECTDI ^(TOK_SELEXPR TOK_SETCOLREF))
)
)
-> ^($u {$t} $b)
@@ -2579,7 +2580,7 @@ setOpSelectStatement[CommonTree t]
)
^(TOK_INSERT
^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE))
- ^(TOK_SELECT ^(TOK_SELEXPR TOK_ALLCOLREF))
+ ^(TOK_SELECT ^(TOK_SELEXPR TOK_SETCOLREF))
)
)
-> {$setOpSelectStatement.tree}
http://git-wip-us.apache.org/repos/asf/hive/blob/78e4bb79/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
index 943e6af..473a664 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
@@ -18,6 +18,15 @@
package org.apache.hadoop.hive.ql.parse;
+import org.apache.hadoop.hive.ql.Context;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.antlr.runtime.tree.CommonTree;
+import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTBuilder;
+import org.apache.hadoop.hive.ql.parse.CalcitePlanner.ASTSearcher;
+
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.HashSet;
@@ -48,6 +57,26 @@ import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
*
*/
public final class ParseUtils {
+ /** Parses the Hive query. */
+ private static final Logger LOG = LoggerFactory.getLogger(ParseUtils.class);
+ public static ASTNode parse(String command) throws ParseException {
+ return parse(command, null);
+ }
+
+ /** Parses the Hive query. */
+ public static ASTNode parse(String command, Context ctx) throws ParseException {
+ return parse(command, ctx, true);
+ }
+
+ /** Parses the Hive query. */
+ public static ASTNode parse(
+ String command, Context ctx, boolean setTokenRewriteStream) throws ParseException {
+ ParseDriver pd = new ParseDriver();
+ ASTNode tree = pd.parse(command, ctx, setTokenRewriteStream);
+ tree = findRootNonNullToken(tree);
+ handleSetColRefs(tree);
+ return tree;
+ }
/**
* Tests whether the parse tree node is a join token.
@@ -77,7 +106,7 @@ public final class ParseUtils {
*
* @return node at which descent stopped
*/
- public static ASTNode findRootNonNullToken(ASTNode tree) {
+ private static ASTNode findRootNonNullToken(ASTNode tree) {
while ((tree.getToken() == null) && (tree.getChildCount() > 0)) {
tree = (ASTNode) tree.getChild(0);
}
@@ -311,4 +340,163 @@ public final class ParseUtils {
return stack.empty() && otherStack.empty();
}
+
+
+ private static void handleSetColRefs(ASTNode tree) {
+ CalcitePlanner.ASTSearcher astSearcher = new CalcitePlanner.ASTSearcher();
+ while (true) {
+ astSearcher.reset();
+ ASTNode setCols = astSearcher.depthFirstSearch(tree, HiveParser.TOK_SETCOLREF);
+ if (setCols == null) break;
+ processSetColsNode(setCols, astSearcher);
+ }
+ }
+
+ /**
+ * Replaces a spurious TOK_SETCOLREF added by parser with column names referring to the query
+ * in e.g. a union. This is to maintain the expectations that some code, like order by position
+ * alias, might have about not having ALLCOLREF. If it cannot find the columns with confidence
+ * it will just replace SETCOLREF with ALLCOLREF. Most of the cases where that happens are
+ * easy to work around in the query (e.g. by adding column aliases in the union).
+ * @param setCols TOK_SETCOLREF ASTNode.
+ * @param searcher AST searcher to reuse.
+ */
+ private static void processSetColsNode(ASTNode setCols, ASTSearcher searcher) {
+ searcher.reset();
+ CommonTree rootNode = setCols;
+ while (rootNode != null && rootNode.getType() != HiveParser.TOK_INSERT) {
+ rootNode = rootNode.parent;
+ }
+ if (rootNode == null || rootNode.parent == null) {
+ // Couldn't find the parent insert; replace with ALLCOLREF.
+ LOG.debug("Replacing SETCOLREF with ALLCOLREF because we couldn't find the root INSERT");
+ setCols.token.setType(HiveParser.TOK_ALLCOLREF);
+ return;
+ }
+ rootNode = rootNode.parent; // TOK_QUERY above insert
+ Tree fromNode = null;
+ for (int j = 0; j < rootNode.getChildCount(); ++j) {
+ Tree child = rootNode.getChild(j);
+ if (child.getType() == HiveParser.TOK_FROM) {
+ fromNode = child;
+ break;
+ }
+ }
+ if (!(fromNode instanceof ASTNode)) {
+ // Couldn't find the from that contains subquery; replace with ALLCOLREF.
+ LOG.debug("Replacing SETCOLREF with ALLCOLREF because we couldn't find the FROM");
+ setCols.token.setType(HiveParser.TOK_ALLCOLREF);
+ return;
+ }
+ // We are making what we are trying to do more explicit if there's a union alias; so
+ // that if we do something we didn't expect to do, it'd be more likely to fail.
+ String alias = null;
+ if (fromNode.getChildCount() > 0) {
+ Tree fromWhat = fromNode.getChild(0);
+ if (fromWhat.getType() == HiveParser.TOK_SUBQUERY && fromWhat.getChildCount() > 1) {
+ Tree child = fromWhat.getChild(fromWhat.getChildCount() - 1);
+ if (child.getType() == HiveParser.Identifier) {
+ alias = child.getText();
+ }
+ }
+ }
+ // We find the SELECT closest to the top. This assumes there's only one FROM or FROM-s
+ // are all equivalent (union case). Also, this assumption could be false for an already
+ // malformed query; we don't check for that here - it will fail later anyway.
+ // TODO: Maybe we should find ALL the SELECT-s not nested in another from, and compare.
+ ASTNode select = searcher.simpleBreadthFirstSearchAny((ASTNode)fromNode,
+ HiveParser.TOK_SELECT, HiveParser.TOK_SELECTDI);
+ if (select == null) {
+ // Couldn't find the from that contains subquery; replace with ALLCOLREF.
+ LOG.debug("Replacing SETCOLREF with ALLCOLREF because we couldn't find the SELECT");
+ setCols.token.setType(HiveParser.TOK_ALLCOLREF);
+ return;
+ }
+ // Found the proper columns.
+ List<ASTNode> newChildren = new ArrayList<>(select.getChildCount());
+ HashSet<String> aliases = new HashSet<>();
+ for (int i = 0; i < select.getChildCount(); ++i) {
+ Tree selExpr = select.getChild(i);
+ assert selExpr.getType() == HiveParser.TOK_SELEXPR;
+ assert selExpr.getChildCount() > 0;
+ // Examine the last child. It could be an alias.
+ Tree child = selExpr.getChild(selExpr.getChildCount() - 1);
+ switch (child.getType()) {
+ case HiveParser.TOK_SETCOLREF:
+ // We have a nested setcolref. Process that and start from scratch TODO: use stack?
+ processSetColsNode((ASTNode)child, searcher);
+ processSetColsNode(setCols, searcher);
+ return;
+ case HiveParser.TOK_ALLCOLREF:
+ // We should find an alias of this insert and do (alias).*. This however won't fix e.g.
+ // positional order by alias case, cause we'd still have a star on the top level. Bail.
+ LOG.debug("Replacing SETCOLREF with ALLCOLREF because of nested ALLCOLREF");
+ setCols.token.setType(HiveParser.TOK_ALLCOLREF);
+ return;
+ case HiveParser.TOK_TABLE_OR_COL:
+ Tree idChild = child.getChild(0);
+ assert idChild.getType() == HiveParser.Identifier : idChild;
+ if (!createChildColumnRef(idChild, alias, newChildren, aliases)) {
+ setCols.token.setType(HiveParser.TOK_ALLCOLREF);
+ return;
+ }
+ break;
+ case HiveParser.Identifier:
+ if (!createChildColumnRef(child, alias, newChildren, aliases)) {
+ setCols.token.setType(HiveParser.TOK_ALLCOLREF);
+ return;
+ }
+ break;
+ case HiveParser.DOT: {
+ Tree colChild = child.getChild(child.getChildCount() - 1);
+ assert colChild.getType() == HiveParser.Identifier : colChild;
+ if (!createChildColumnRef(colChild, alias, newChildren, aliases)) {
+ setCols.token.setType(HiveParser.TOK_ALLCOLREF);
+ return;
+ }
+ break;
+ }
+ default:
+ // Not really sure how to refer to this (or if we can).
+ // TODO: We could find a different from branch for the union, that might have an alias?
+ // Or we could add an alias here to refer to, but that might break other branches.
+ LOG.debug("Replacing SETCOLREF with ALLCOLREF because of the nested node "
+ + child.getType() + " " + child.getText());
+ setCols.token.setType(HiveParser.TOK_ALLCOLREF);
+ return;
+ }
+ }
+ // Insert search in the beginning would have failed if these parents didn't exist.
+ ASTNode parent = (ASTNode)setCols.parent.parent;
+ int t = parent.getType();
+ assert t == HiveParser.TOK_SELECT || t == HiveParser.TOK_SELECTDI : t;
+ int ix = setCols.parent.childIndex;
+ parent.deleteChild(ix);
+ for (ASTNode node : newChildren) {
+ parent.insertChild(ix++, node);
+ }
+ }
+
+ private static boolean createChildColumnRef(Tree child, String alias,
+ List<ASTNode> newChildren, HashSet<String> aliases) {
+ String colAlias = child.getText();
+ if (!aliases.add(colAlias)) {
+ // TODO: if a side of the union has 2 columns with the same name, noone on the higher
+ // level can refer to them. We could change the alias in the original node.
+ LOG.debug("Replacing SETCOLREF with ALLCOLREF because of duplicate alias " + colAlias);
+ return false;
+ }
+ ASTBuilder selExpr = ASTBuilder.construct(HiveParser.TOK_SELEXPR, "TOK_SELEXPR");
+ ASTBuilder toc = ASTBuilder.construct(HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL");
+ ASTBuilder id = ASTBuilder.construct(HiveParser.Identifier, colAlias);
+ if (alias == null) {
+ selExpr = selExpr.add(toc.add(id));
+ } else {
+ ASTBuilder dot = ASTBuilder.construct(HiveParser.DOT, ".");
+ ASTBuilder aliasNode = ASTBuilder.construct(HiveParser.Identifier, alias);
+ selExpr = selExpr.add(dot.add(toc.add(aliasNode)).add(id));
+ }
+ newChildren.add(selExpr.node());
+ return true;
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/78e4bb79/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 9eafb0b..2430811 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -2429,7 +2429,6 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
private void replaceViewReferenceWithDefinition(QB qb, Table tab,
String tab_name, String alias) throws SemanticException {
- ParseDriver pd = new ParseDriver();
ASTNode viewTree;
final ASTNodeOrigin viewOrigin = new ASTNodeOrigin("VIEW", tab.getTableName(),
tab.getViewExpandedText(), alias, qb.getParseInfo().getSrcForAlias(
@@ -2438,8 +2437,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
String viewText = tab.getViewExpandedText();
// Reparse text, passing null for context to avoid clobbering
// the top-level token stream.
- ASTNode tree = pd.parse(viewText, ctx, false);
- tree = ParseUtils.findRootNonNullToken(tree);
+ ASTNode tree = ParseUtils.parse(viewText, ctx, false);
viewTree = tree;
Dispatcher nodeOriginDispatcher = new Dispatcher() {
@Override
@@ -10885,11 +10883,10 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
// check if we need to ctx.setCmd(rewrittenQuery);
ParseDriver pd = new ParseDriver();
try {
- rewrittenTree = pd.parse(rewrittenQuery);
+ rewrittenTree = ParseUtils.parse(rewrittenQuery);
} catch (ParseException e) {
throw new SemanticException(e);
}
- rewrittenTree = ParseUtils.findRootNonNullToken(rewrittenTree);
return rewrittenTree;
} else {
return ast;
http://git-wip-us.apache.org/repos/asf/hive/blob/78e4bb79/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
index 865c03a..64f1bdd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
@@ -299,13 +299,10 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
}
rewrittenCtx.setCmd(rewrittenQueryStr.toString());
- ParseDriver pd = new ParseDriver();
ASTNode rewrittenTree;
try {
LOG.info("Going to reparse <" + originalQuery + "> as \n<" + rewrittenQueryStr.toString() + ">");
- rewrittenTree = pd.parse(rewrittenQueryStr.toString(), rewrittenCtx);
- rewrittenTree = ParseUtils.findRootNonNullToken(rewrittenTree);
-
+ rewrittenTree = ParseUtils.parse(rewrittenQueryStr.toString(), rewrittenCtx);
} catch (ParseException e) {
throw new SemanticException(ErrorMsg.UPDATEDELETE_PARSE_ERROR.getMsg(), e);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/78e4bb79/ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java
index 12154c9..aca8354 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/tools/LineageInfo.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.tools;
+import org.apache.hadoop.hive.ql.parse.ParseUtils;
+
import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedHashMap;
@@ -109,8 +111,7 @@ public class LineageInfo implements NodeProcessor {
/*
* Get the AST tree
*/
- ParseDriver pd = new ParseDriver();
- ASTNode tree = pd.parse(query);
+ ASTNode tree = ParseUtils.parse(query, null);
while ((tree.getToken() == null) && (tree.getChildCount() > 0)) {
tree = (ASTNode) tree.getChild(0);
http://git-wip-us.apache.org/repos/asf/hive/blob/78e4bb79/ql/src/test/org/apache/hadoop/hive/ql/parse/TestMacroSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestMacroSemanticAnalyzer.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestMacroSemanticAnalyzer.java
index c659806..c734988 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestMacroSemanticAnalyzer.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestMacroSemanticAnalyzer.java
@@ -35,7 +35,6 @@ import org.junit.Test;
public class TestMacroSemanticAnalyzer {
- private ParseDriver parseDriver;
private MacroSemanticAnalyzer analyzer;
private QueryState queryState;
private HiveConf conf;
@@ -47,12 +46,11 @@ public class TestMacroSemanticAnalyzer {
conf = queryState.getConf();
SessionState.start(conf);
context = new Context(conf);
- parseDriver = new ParseDriver();
analyzer = new MacroSemanticAnalyzer(queryState);
}
private ASTNode parse(String command) throws Exception {
- return ParseUtils.findRootNonNullToken(parseDriver.parse(command));
+ return ParseUtils.parse(command);
}
private void analyze(ASTNode ast) throws Exception {
analyzer.analyze(ast, context);
http://git-wip-us.apache.org/repos/asf/hive/blob/78e4bb79/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java
index d6fe540..a573808 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java
@@ -258,9 +258,7 @@ public class TestUpdateDeleteSemanticAnalyzer {
ctx.setCmd(query);
ctx.setHDFSCleanup(true);
- ParseDriver pd = new ParseDriver();
- ASTNode tree = pd.parse(query, ctx);
- tree = ParseUtils.findRootNonNullToken(tree);
+ ASTNode tree = ParseUtils.parse(query, ctx);
BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(queryState, tree);
SessionState.get().initTxnMgr(conf);
http://git-wip-us.apache.org/repos/asf/hive/blob/78e4bb79/ql/src/test/org/apache/hadoop/hive/ql/parse/authorization/AuthorizationTestUtil.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/authorization/AuthorizationTestUtil.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/authorization/AuthorizationTestUtil.java
index e8e29ee..d0395dd 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/parse/authorization/AuthorizationTestUtil.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/authorization/AuthorizationTestUtil.java
@@ -67,7 +67,7 @@ public class AuthorizationTestUtil {
}
private static ASTNode parse(String command) throws Exception {
- return ParseUtils.findRootNonNullToken((new ParseDriver()).parse(command));
+ return ParseUtils.parse(command);
}
/**
http://git-wip-us.apache.org/repos/asf/hive/blob/78e4bb79/ql/src/test/queries/clientpositive/union_pos_alias.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/union_pos_alias.q b/ql/src/test/queries/clientpositive/union_pos_alias.q
new file mode 100644
index 0000000..c4eca68
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/union_pos_alias.q
@@ -0,0 +1,30 @@
+set hive.mapred.mode=nonstrict;
+
+
+explain
+select 'tst1' as key, count(1) as value from src s1
+UNION ALL
+select key, value from (select 'tst2' as key, count(1) as value from src s2 UNION ALL select 'tst3' as key, count(1) as value from src s3) s4
+order by 1;
+
+select 'tst1' as key, count(1) as value from src s1
+UNION ALL
+select key, value from (select 'tst2' as key, count(1) as value from src s2 UNION ALL select 'tst3' as key, count(1) as value from src s3) s4
+order by 1;
+
+drop table src_10;
+create table src_10 as select * from src limit 10;
+
+explain
+select key as value, value as key from src_10
+UNION ALL
+select 'test', value from src_10 s3
+order by 2, 1 desc;
+
+
+select key as value, value as key from src_10
+UNION ALL
+select 'test', value from src_10 s3
+order by 2, 1 desc;
+
+drop table src_10;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/78e4bb79/ql/src/test/results/clientpositive/constant_prop_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/constant_prop_1.q.out b/ql/src/test/results/clientpositive/constant_prop_1.q.out
index aaa1dac..3ba1f15 100644
--- a/ql/src/test/results/clientpositive/constant_prop_1.q.out
+++ b/ql/src/test/results/clientpositive/constant_prop_1.q.out
@@ -99,7 +99,7 @@ STAGE PLANS:
Union
Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 1
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
@@ -115,7 +115,7 @@ STAGE PLANS:
Union
Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 1
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
http://git-wip-us.apache.org/repos/asf/hive/blob/78e4bb79/ql/src/test/results/clientpositive/union_pos_alias.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/union_pos_alias.q.out b/ql/src/test/results/clientpositive/union_pos_alias.q.out
new file mode 100644
index 0000000..8eddbd9
--- /dev/null
+++ b/ql/src/test/results/clientpositive/union_pos_alias.q.out
@@ -0,0 +1,308 @@
+PREHOOK: query: explain
+select 'tst1' as key, count(1) as value from src s1
+UNION ALL
+select key, value from (select 'tst2' as key, count(1) as value from src s2 UNION ALL select 'tst3' as key, count(1) as value from src s3) s4
+order by 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select 'tst1' as key, count(1) as value from src s1
+UNION ALL
+select key, value from (select 'tst2' as key, count(1) as value from src s2 UNION ALL select 'tst3' as key, count(1) as value from src s3) s4
+order by 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-3, Stage-4
+ Stage-3 is a root stage
+ Stage-4 is a root stage
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: s1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(1)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 'tst1' (type: string), _col0 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Union
+ Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ TableScan
+ Union
+ Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ TableScan
+ Union
+ Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: s2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(1)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 'tst2' (type: string), _col0 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: s3
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(1)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 'tst3' (type: string), _col0 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select 'tst1' as key, count(1) as value from src s1
+UNION ALL
+select key, value from (select 'tst2' as key, count(1) as value from src s2 UNION ALL select 'tst3' as key, count(1) as value from src s3) s4
+order by 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select 'tst1' as key, count(1) as value from src s1
+UNION ALL
+select key, value from (select 'tst2' as key, count(1) as value from src s2 UNION ALL select 'tst3' as key, count(1) as value from src s3) s4
+order by 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+tst1 500
+tst2 500
+tst3 500
+PREHOOK: query: drop table src_10
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table src_10
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table src_10 as select * from src limit 10
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@src_10
+POSTHOOK: query: create table src_10 as select * from src limit 10
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@src_10
+POSTHOOK: Lineage: src_10.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: src_10.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: explain
+select key as value, value as key from src_10
+UNION ALL
+select 'test', value from src_10 s3
+order by 2, 1 desc
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key as value, value as key from src_10
+UNION ALL
+select 'test', value from src_10 s3
+order by 2, 1 desc
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src_10
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+ Union
+ Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string), _col0 (type: string)
+ sort order: +-
+ Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ alias: s3
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 'test' (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+ Union
+ Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string), _col0 (type: string)
+ sort order: +-
+ Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key as value, value as key from src_10
+UNION ALL
+select 'test', value from src_10 s3
+order by 2, 1 desc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_10
+#### A masked pattern was here ####
+POSTHOOK: query: select key as value, value as key from src_10
+UNION ALL
+select 'test', value from src_10 s3
+order by 2, 1 desc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_10
+#### A masked pattern was here ####
+test val_165
+165 val_165
+test val_238
+238 val_238
+test val_255
+255 val_255
+test val_27
+27 val_27
+test val_278
+278 val_278
+test val_311
+311 val_311
+test val_409
+409 val_409
+test val_484
+484 val_484
+test val_86
+86 val_86
+test val_98
+98 val_98
+PREHOOK: query: drop table src_10
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@src_10
+PREHOOK: Output: default@src_10
+POSTHOOK: query: drop table src_10
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@src_10
+POSTHOOK: Output: default@src_10
[36/50] [abbrv] hive git commit: HIVE-15796: HoS: poor reducer
parallelism when operator stats are not accurate (Chao Sun,
reviewed by Xuefu Zhang)
Posted by se...@apache.org.
HIVE-15796: HoS: poor reducer parallelism when operator stats are not accurate (Chao Sun, reviewed by Xuefu Zhang)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/806d6e1b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/806d6e1b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/806d6e1b
Branch: refs/heads/hive-14535
Commit: 806d6e1b01640e890fa751017d21fc4b107e4f0a
Parents: 8ab1889
Author: Chao Sun <su...@apache.org>
Authored: Fri Feb 17 12:22:45 2017 -0800
Committer: Chao Sun <su...@apache.org>
Committed: Wed Feb 22 09:28:56 2017 -0800
----------------------------------------------------------------------
.../org/apache/hadoop/hive/conf/HiveConf.java | 7 +-
.../test/resources/testconfiguration.properties | 3 +-
.../spark/SetSparkReducerParallelism.java | 79 ++++-
.../hive/ql/parse/spark/GenSparkUtils.java | 24 +-
.../hive/ql/parse/spark/SparkCompiler.java | 23 +-
.../queries/clientpositive/spark_use_op_stats.q | 41 +++
.../spark/spark_use_op_stats.q.out | 331 +++++++++++++++++++
7 files changed, 481 insertions(+), 27 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/806d6e1b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 3777fa9..0b315e1 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -3202,8 +3202,13 @@ public class HiveConf extends Configuration {
Constants.LLAP_LOGGER_NAME_CONSOLE),
"logger used for llap-daemons."),
+ SPARK_USE_OP_STATS("hive.spark.use.op.stats", true,
+ "Whether to use operator stats to determine reducer parallelism for Hive on Spark. "
+ + "If this is false, Hive will use source table stats to determine reducer "
+ + "parallelism for all first level reduce tasks, and the maximum reducer parallelism "
+ + "from all parents for all the rest (second level and onward) reducer tasks."),
SPARK_USE_FILE_SIZE_FOR_MAPJOIN("hive.spark.use.file.size.for.mapjoin", false,
- "If this is set to true, mapjoin optimization in Hive/Spark will use source file sizes associated"
+ "If this is set to true, mapjoin optimization in Hive/Spark will use source file sizes associated "
+ "with TableScan operator on the root of operator tree, instead of using operator statistics."),
SPARK_CLIENT_FUTURE_TIMEOUT("hive.spark.client.future.timeout",
"60s", new TimeValidator(TimeUnit.SECONDS),
http://git-wip-us.apache.org/repos/asf/hive/blob/806d6e1b/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 4a69bcc..d344464 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -1483,7 +1483,8 @@ spark.only.query.files=spark_combine_equivalent_work.q,\
spark_dynamic_partition_pruning.q,\
spark_dynamic_partition_pruning_2.q,\
spark_vectorized_dynamic_partition_pruning.q,\
- spark_use_file_size_for_mapjoin.q
+ spark_use_file_size_for_mapjoin.q,\
+ spark_use_op_stats.q
miniSparkOnYarn.query.files=auto_sortmerge_join_16.q,\
bucket4.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/806d6e1b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java
index 7a5b71f..337f418 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.ql.optimizer.spark;
import java.util.List;
+import java.util.Set;
import java.util.Stack;
import org.slf4j.Logger;
@@ -29,7 +30,9 @@ import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.LimitOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.OperatorUtils;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.spark.SparkUtilities;
import org.apache.hadoop.hive.ql.exec.spark.session.SparkSession;
@@ -57,6 +60,12 @@ public class SetSparkReducerParallelism implements NodeProcessor {
// Spark memory per task, and total number of cores
private ObjectPair<Long, Integer> sparkMemoryAndCores;
+ private final boolean useOpStats;
+
+ public SetSparkReducerParallelism(HiveConf conf) {
+ sparkMemoryAndCores = null;
+ useOpStats = conf.getBoolVar(HiveConf.ConfVars.SPARK_USE_OP_STATS);
+ }
@Override
public Object process(Node nd, Stack<Node> stack,
@@ -67,16 +76,28 @@ public class SetSparkReducerParallelism implements NodeProcessor {
ReduceSinkOperator sink = (ReduceSinkOperator) nd;
ReduceSinkDesc desc = sink.getConf();
+ Set<ReduceSinkOperator> parentSinks = null;
int maxReducers = context.getConf().getIntVar(HiveConf.ConfVars.MAXREDUCERS);
int constantReducers = context.getConf().getIntVar(HiveConf.ConfVars.HADOOPNUMREDUCERS);
+ if (!useOpStats) {
+ parentSinks = OperatorUtils.findOperatorsUpstream(sink, ReduceSinkOperator.class);
+ parentSinks.remove(sink);
+ if (!context.getVisitedReduceSinks().containsAll(parentSinks)) {
+ // We haven't processed all the parent sinks, and we need
+ // them to be done in order to compute the parallelism for this sink.
+ // In this case, skip. We should visit this again from another path.
+ LOG.debug("Skipping sink " + sink + " for now as we haven't seen all its parents.");
+ return false;
+ }
+ }
+
if (context.getVisitedReduceSinks().contains(sink)) {
// skip walking the children
LOG.debug("Already processed reduce sink: " + sink.getName());
return true;
}
-
context.getVisitedReduceSinks().add(sink);
if (needSetParallelism(sink, context.getConf())) {
@@ -96,19 +117,52 @@ public class SetSparkReducerParallelism implements NodeProcessor {
return false;
}
}
+
long numberOfBytes = 0;
- // we need to add up all the estimates from the siblings of this reduce sink
- for (Operator<? extends OperatorDesc> sibling
- : sink.getChildOperators().get(0).getParentOperators()) {
- if (sibling.getStatistics() != null) {
- numberOfBytes += sibling.getStatistics().getDataSize();
- if (LOG.isDebugEnabled()) {
- LOG.debug("Sibling " + sibling + " has stats: " + sibling.getStatistics());
+ if (useOpStats) {
+ // we need to add up all the estimates from the siblings of this reduce sink
+ for (Operator<? extends OperatorDesc> sibling
+ : sink.getChildOperators().get(0).getParentOperators()) {
+ if (sibling.getStatistics() != null) {
+ numberOfBytes += sibling.getStatistics().getDataSize();
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Sibling " + sibling + " has stats: " + sibling.getStatistics());
+ }
+ } else {
+ LOG.warn("No stats available from: " + sibling);
}
- } else {
- LOG.warn("No stats available from: " + sibling);
}
+ } else if (parentSinks.isEmpty()) {
+ // Not using OP stats and this is the first sink in the path, meaning that
+ // we should use TS stats to infer parallelism
+ for (Operator<? extends OperatorDesc> sibling
+ : sink.getChildOperators().get(0).getParentOperators()) {
+ Set<TableScanOperator> sources =
+ OperatorUtils.findOperatorsUpstream(sibling, TableScanOperator.class);
+ for (TableScanOperator source : sources) {
+ if (source.getStatistics() != null) {
+ numberOfBytes += source.getStatistics().getDataSize();
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Table source " + source + " has stats: " + source.getStatistics());
+ }
+ } else {
+ LOG.warn("No stats available from table source: " + source);
+ }
+ }
+ }
+ LOG.debug("Gathered stats for sink " + sink + ". Total size is "
+ + numberOfBytes + " bytes.");
+ } else {
+ // Use the maximum parallelism from all parent reduce sinks
+ int numberOfReducers = 0;
+ for (ReduceSinkOperator parent : parentSinks) {
+ numberOfReducers = Math.max(numberOfReducers, parent.getConf().getNumReducers());
+ }
+ desc.setNumReducers(numberOfReducers);
+ LOG.debug("Set parallelism for sink " + sink + " to " + numberOfReducers
+ + " based on its parents");
+ return false;
}
// Divide it by 2 so that we can have more reducers
@@ -134,7 +188,7 @@ public class SetSparkReducerParallelism implements NodeProcessor {
desc.setNumReducers(numReducers);
}
} else {
- LOG.info("Number of reducers determined to be: " + desc.getNumReducers());
+ LOG.info("Number of reducers for sink " + sink + " was already determined to be: " + desc.getNumReducers());
}
return false;
@@ -165,6 +219,9 @@ public class SetSparkReducerParallelism implements NodeProcessor {
}
private void getSparkMemoryAndCores(OptimizeSparkProcContext context) throws SemanticException {
+ if (sparkMemoryAndCores != null) {
+ return;
+ }
if (context.getConf().getBoolean(SPARK_DYNAMIC_ALLOCATION_ENABLED, false)) {
// If dynamic allocation is enabled, numbers for memory and cores are meaningless. So, we don't
// try to get it.
http://git-wip-us.apache.org/repos/asf/hive/blob/806d6e1b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java
index 36bde30..d0a82af 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java
@@ -102,21 +102,21 @@ public class GenSparkUtils {
reduceWork.setReducer(root);
reduceWork.setNeedsTagging(GenMapRedUtils.needsTagging(reduceWork));
- // All parents should be reduce sinks. We pick the one we just walked
- // to choose the number of reducers. In the join/union case they will
- // all be -1. In sort/order case where it matters there will be only
- // one parent.
- Preconditions.checkArgument(context.parentOfRoot instanceof ReduceSinkOperator,
- "AssertionError: expected context.parentOfRoot to be an instance of ReduceSinkOperator, but was "
- + context.parentOfRoot.getClass().getName());
- ReduceSinkOperator reduceSink = (ReduceSinkOperator) context.parentOfRoot;
-
- reduceWork.setNumReduceTasks(reduceSink.getConf().getNumReducers());
+ // Pick the maximum # reducers across all parents as the # of reduce tasks.
+ int maxExecutors = -1;
+ for (Operator<? extends OperatorDesc> parentOfRoot : root.getParentOperators()) {
+ Preconditions.checkArgument(parentOfRoot instanceof ReduceSinkOperator,
+ "AssertionError: expected parentOfRoot to be an "
+ + "instance of ReduceSinkOperator, but was "
+ + parentOfRoot.getClass().getName());
+ ReduceSinkOperator reduceSink = (ReduceSinkOperator) parentOfRoot;
+ maxExecutors = Math.max(maxExecutors, reduceSink.getConf().getNumReducers());
+ }
+ reduceWork.setNumReduceTasks(maxExecutors);
+ ReduceSinkOperator reduceSink = (ReduceSinkOperator) context.parentOfRoot;
setupReduceSink(context, reduceWork, reduceSink);
-
sparkWork.add(reduceWork);
-
SparkEdgeProperty edgeProp = getEdgeProperty(reduceSink, reduceWork);
sparkWork.connect(context.preceedingWork, reduceWork, edgeProp);
http://git-wip-us.apache.org/repos/asf/hive/blob/806d6e1b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
index c4b1640..682b987 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
@@ -56,6 +56,7 @@ import org.apache.hadoop.hive.ql.lib.GraphWalker;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.lib.PreOrderWalker;
import org.apache.hadoop.hive.ql.lib.Rule;
import org.apache.hadoop.hive.ql.lib.RuleRegExp;
import org.apache.hadoop.hive.ql.lib.TypeRule;
@@ -117,6 +118,9 @@ public class SparkCompiler extends TaskCompiler {
// Annotation OP tree with statistics
runStatsAnnotation(procCtx);
+ // Set reducer parallelism
+ runSetReducerParallelism(procCtx);
+
// Run Join releated optimizations
runJoinOptimizations(procCtx);
@@ -266,12 +270,27 @@ public class SparkCompiler extends TaskCompiler {
}
}
- private void runJoinOptimizations(OptimizeSparkProcContext procCtx) throws SemanticException {
+ private void runSetReducerParallelism(OptimizeSparkProcContext procCtx) throws SemanticException {
ParseContext pCtx = procCtx.getParseContext();
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(new RuleRegExp("Set parallelism - ReduceSink",
ReduceSinkOperator.getOperatorName() + "%"),
- new SetSparkReducerParallelism());
+ new SetSparkReducerParallelism(pCtx.getConf()));
+
+ // The dispatcher fires the processor corresponding to the closest matching
+ // rule and passes the context along
+ Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
+ GraphWalker ogw = new PreOrderWalker(disp);
+
+ // Create a list of topop nodes
+ ArrayList<Node> topNodes = new ArrayList<Node>();
+ topNodes.addAll(pCtx.getTopOps().values());
+ ogw.startWalking(topNodes, null);
+ }
+
+ private void runJoinOptimizations(OptimizeSparkProcContext procCtx) throws SemanticException {
+ ParseContext pCtx = procCtx.getParseContext();
+ Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(new TypeRule(JoinOperator.class), new SparkJoinOptimizer(pCtx));
http://git-wip-us.apache.org/repos/asf/hive/blob/806d6e1b/ql/src/test/queries/clientpositive/spark_use_op_stats.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/spark_use_op_stats.q b/ql/src/test/queries/clientpositive/spark_use_op_stats.q
new file mode 100644
index 0000000..b559bc0
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/spark_use_op_stats.q
@@ -0,0 +1,41 @@
+set hive.mapred.mode=nonstrict;
+set hive.spark.use.op.stats=false;
+set hive.auto.convert.join=false;
+set hive.exec.reducers.bytes.per.reducer=500;
+
+EXPLAIN
+SELECT src1.key, src2.value
+FROM src src1 JOIN src src2 ON (src1.key = src2.key)
+WHERE src1.key = 97;
+
+SELECT src1.key, src2.value
+FROM src src1 JOIN src src2 ON (src1.key = src2.key)
+WHERE src1.key = 97;
+
+CREATE TEMPORARY TABLE tmp AS
+SELECT * FROM src WHERE key > 50 AND key < 200;
+
+EXPLAIN
+WITH a AS (
+ SELECT src1.key, src2.value
+ FROM tmp src1 JOIN tmp src2 ON (src1.key = src2.key)
+ WHERE src1.key > 100
+),
+b AS (
+ SELECT src1.key, src2.value
+ FROM src src1 JOIN src src2 ON (src1.key = src2.key)
+ WHERE src1.key > 150
+)
+SELECT sum(hash(a.key, b.value)) FROM a JOIN b ON a.key = b.key;
+
+WITH a AS (
+ SELECT src1.key, src2.value
+ FROM tmp src1 JOIN tmp src2 ON (src1.key = src2.key)
+ WHERE src1.key > 100
+),
+b AS (
+ SELECT src1.key, src2.value
+ FROM src src1 JOIN src src2 ON (src1.key = src2.key)
+ WHERE src1.key > 150
+)
+SELECT sum(hash(a.key, b.value)) FROM a JOIN b ON a.key = b.key;
http://git-wip-us.apache.org/repos/asf/hive/blob/806d6e1b/ql/src/test/results/clientpositive/spark/spark_use_op_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/spark_use_op_stats.q.out b/ql/src/test/results/clientpositive/spark/spark_use_op_stats.q.out
new file mode 100644
index 0000000..76f9936
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/spark_use_op_stats.q.out
@@ -0,0 +1,331 @@
+PREHOOK: query: EXPLAIN
+SELECT src1.key, src2.value
+FROM src src1 JOIN src src2 ON (src1.key = src2.key)
+WHERE src1.key = 97
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT src1.key, src2.value
+FROM src src1 JOIN src src2 ON (src1.key = src2.key)
+WHERE src1.key = 97
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 43), Map 3 (PARTITION-LEVEL SORT, 43)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (UDFToDouble(key) = 97.0) (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: src2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (UDFToDouble(key) = 97.0) (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT src1.key, src2.value
+FROM src src1 JOIN src src2 ON (src1.key = src2.key)
+WHERE src1.key = 97
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src1.key, src2.value
+FROM src src1 JOIN src src2 ON (src1.key = src2.key)
+WHERE src1.key = 97
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+97 val_97
+97 val_97
+97 val_97
+97 val_97
+PREHOOK: query: CREATE TEMPORARY TABLE tmp AS
+SELECT * FROM src WHERE key > 50 AND key < 200
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tmp
+POSTHOOK: query: CREATE TEMPORARY TABLE tmp AS
+SELECT * FROM src WHERE key > 50 AND key < 200
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tmp
+PREHOOK: query: EXPLAIN
+WITH a AS (
+ SELECT src1.key, src2.value
+ FROM tmp src1 JOIN tmp src2 ON (src1.key = src2.key)
+ WHERE src1.key > 100
+),
+b AS (
+ SELECT src1.key, src2.value
+ FROM src src1 JOIN src src2 ON (src1.key = src2.key)
+ WHERE src1.key > 150
+)
+SELECT sum(hash(a.key, b.value)) FROM a JOIN b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+WITH a AS (
+ SELECT src1.key, src2.value
+ FROM tmp src1 JOIN tmp src2 ON (src1.key = src2.key)
+ WHERE src1.key > 100
+),
+b AS (
+ SELECT src1.key, src2.value
+ FROM src src1 JOIN src src2 ON (src1.key = src2.key)
+ WHERE src1.key > 150
+)
+SELECT sum(hash(a.key, b.value)) FROM a JOIN b ON a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 13), Map 5 (PARTITION-LEVEL SORT, 13)
+ Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 43), Reducer 7 (PARTITION-LEVEL SORT, 43)
+ Reducer 4 <- Reducer 3 (GROUP, 1)
+ Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 43), Map 8 (PARTITION-LEVEL SORT, 43)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 148 Data size: 1542 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((UDFToDouble(key) > 100.0) and (UDFToDouble(key) > 150.0)) (type: boolean)
+ Statistics: Num rows: 16 Data size: 166 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 16 Data size: 166 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 16 Data size: 166 Basic stats: COMPLETE Column stats: NONE
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: src2
+ Statistics: Num rows: 148 Data size: 1542 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((UDFToDouble(key) > 100.0) and (UDFToDouble(key) > 150.0)) (type: boolean)
+ Statistics: Num rows: 16 Data size: 166 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 16 Data size: 166 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 16 Data size: 166 Basic stats: COMPLETE Column stats: NONE
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((UDFToDouble(key) > 150.0) and (UDFToDouble(key) > 100.0)) (type: boolean)
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: src2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((UDFToDouble(key) > 150.0) and (UDFToDouble(key) > 100.0)) (type: boolean)
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 17 Data size: 182 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 17 Data size: 182 Basic stats: COMPLETE Column stats: NONE
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hash(_col0,_col2) (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col0)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 7
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: WITH a AS (
+ SELECT src1.key, src2.value
+ FROM tmp src1 JOIN tmp src2 ON (src1.key = src2.key)
+ WHERE src1.key > 100
+),
+b AS (
+ SELECT src1.key, src2.value
+ FROM src src1 JOIN src src2 ON (src1.key = src2.key)
+ WHERE src1.key > 150
+)
+SELECT sum(hash(a.key, b.value)) FROM a JOIN b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@tmp
+#### A masked pattern was here ####
+POSTHOOK: query: WITH a AS (
+ SELECT src1.key, src2.value
+ FROM tmp src1 JOIN tmp src2 ON (src1.key = src2.key)
+ WHERE src1.key > 100
+),
+b AS (
+ SELECT src1.key, src2.value
+ FROM src src1 JOIN src src2 ON (src1.key = src2.key)
+ WHERE src1.key > 150
+)
+SELECT sum(hash(a.key, b.value)) FROM a JOIN b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@tmp
+#### A masked pattern was here ####
+180817551380
[16/50] [abbrv] hive git commit: HIVE-15970 Fix merge to work in
presence of AST rewrites (Eugene Koifman, reviewed by Wei Zheng)
Posted by se...@apache.org.
HIVE-15970 Fix merge to work in presence of AST rewrites (Eugene Koifman, reviewed by Wei Zheng)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1a6902ce
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1a6902ce
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1a6902ce
Branch: refs/heads/hive-14535
Commit: 1a6902ce81c3ac5da98ee5183fa24b98c63642fb
Parents: 3f986d7
Author: Eugene Koifman <ek...@hortonworks.com>
Authored: Sat Feb 18 08:58:07 2017 -0800
Committer: Eugene Koifman <ek...@hortonworks.com>
Committed: Sat Feb 18 08:58:07 2017 -0800
----------------------------------------------------------------------
.../java/org/apache/hadoop/hive/ql/Context.java | 109 +++++++++++++++----
.../ql/parse/UpdateDeleteSemanticAnalyzer.java | 38 ++++---
2 files changed, 111 insertions(+), 36 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/1a6902ce/ql/src/java/org/apache/hadoop/hive/ql/Context.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java b/ql/src/java/org/apache/hadoop/hive/ql/Context.java
index bbdce63..fae2a12 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java
@@ -25,7 +25,6 @@ import java.net.URI;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
-import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
@@ -54,6 +53,7 @@ import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.ExplainConfiguration;
import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState;
+import org.apache.hadoop.hive.ql.parse.HiveParser;
import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.shims.ShimLoader;
@@ -132,7 +132,17 @@ public class Context {
* given tree but multi-insert has several and multi-insert representing MERGE must use
* different prefixes to encode the purpose of different Insert branches
*/
- private Map<ASTNode, DestClausePrefix> tree2DestNamePrefix;
+ private Map<Integer, DestClausePrefix> insertBranchToNamePrefix = new HashMap<>();
+ private Operation operation = Operation.OTHER;
+ public void setOperation(Operation operation) {
+ this.operation = operation;
+ }
+
+ /**
+ * These ops require special handling in various places
+ * (note that Insert into Acid table is in OTHER category)
+ */
+ public enum Operation {UPDATE, DELETE, MERGE, OTHER};
public enum DestClausePrefix {
INSERT("insclause-"), UPDATE("updclause-"), DELETE("delclause-");
private final String prefix;
@@ -143,36 +153,95 @@ public class Context {
return prefix;
}
}
+ private String getMatchedText(ASTNode n) {
+ return getTokenRewriteStream().toString(n.getTokenStartIndex(), n.getTokenStopIndex() + 1).trim();
+ }
/**
* The suffix is always relative to a given ASTNode
*/
public DestClausePrefix getDestNamePrefix(ASTNode curNode) {
- //if there is no mapping, we want to default to "old" naming
assert curNode != null : "must supply curNode";
- if(tree2DestNamePrefix == null || tree2DestNamePrefix.isEmpty()) {
+ if(curNode.getType() != HiveParser.TOK_INSERT_INTO) {
+ //select statement
+ assert curNode.getType() == HiveParser.TOK_DESTINATION;
+ if(operation == Operation.OTHER) {
+ //not an 'interesting' op
+ return DestClausePrefix.INSERT;
+ }
+ //if it is an 'interesting' op but it's a select it must be a sub-query or a derived table
+ //it doesn't require a special Acid code path - the reset of the code here is to ensure
+ //the tree structure is what we expect
+ boolean thisIsInASubquery = false;
+ parentLoop: while(curNode.getParent() != null) {
+ curNode = (ASTNode) curNode.getParent();
+ switch (curNode.getType()) {
+ case HiveParser.TOK_SUBQUERY_EXPR:
+ //this is a real subquery (foo IN (select ...))
+ case HiveParser.TOK_SUBQUERY:
+ //this is a Derived Table Select * from (select a from ...))
+ //strictly speaking SetOps should have a TOK_SUBQUERY parent so next 6 items are redundant
+ case HiveParser.TOK_UNIONALL:
+ case HiveParser.TOK_UNIONDISTINCT:
+ case HiveParser.TOK_EXCEPTALL:
+ case HiveParser.TOK_EXCEPTDISTINCT:
+ case HiveParser.TOK_INTERSECTALL:
+ case HiveParser.TOK_INTERSECTDISTINCT:
+ thisIsInASubquery = true;
+ break parentLoop;
+ }
+ }
+ if(!thisIsInASubquery) {
+ throw new IllegalStateException("Expected '" + getMatchedText(curNode) + "' to be in sub-query or set operation.");
+ }
return DestClausePrefix.INSERT;
}
- do {
- DestClausePrefix prefix = tree2DestNamePrefix.get(curNode);
- if(prefix != null) {
- return prefix;
- }
- curNode = (ASTNode) curNode.parent;
- } while(curNode != null);
- return DestClausePrefix.INSERT;
+ switch (operation) {
+ case OTHER:
+ return DestClausePrefix.INSERT;
+ case UPDATE:
+ return DestClausePrefix.UPDATE;
+ case DELETE:
+ return DestClausePrefix.DELETE;
+ case MERGE:
+ /* This is the structrue expected here
+ HiveParser.TOK_QUERY;
+ HiveParser.TOK_FROM
+ HiveParser.TOK_INSERT;
+ HiveParser.TOK_INSERT_INTO;
+ HiveParser.TOK_INSERT;
+ HiveParser.TOK_INSERT_INTO;
+ .....*/
+ ASTNode insert = (ASTNode) curNode.getParent();
+ assert insert != null && insert.getType() == HiveParser.TOK_INSERT;
+ ASTNode query = (ASTNode) insert.getParent();
+ assert query != null && query.getType() == HiveParser.TOK_QUERY;
+
+ for(int childIdx = 1; childIdx < query.getChildCount(); childIdx++) {//1st child is TOK_FROM
+ assert query.getChild(childIdx).getType() == HiveParser.TOK_INSERT;
+ if(insert == query.getChild(childIdx)) {
+ DestClausePrefix prefix = insertBranchToNamePrefix.get(childIdx);
+ if(prefix == null) {
+ throw new IllegalStateException("Found a node w/o branch mapping: '" +
+ getMatchedText(insert) + "'");
+ }
+ return prefix;
+ }
+ }
+ throw new IllegalStateException("Could not locate '" + getMatchedText(insert) + "'");
+ default:
+ throw new IllegalStateException("Unexpected operation: " + operation);
+ }
}
/**
- * Will make SemanticAnalyzer.Phase1Ctx#dest in subtree rooted at 'tree' use 'prefix'
- * @param tree
+ * Will make SemanticAnalyzer.Phase1Ctx#dest in subtree rooted at 'tree' use 'prefix'. This to
+ * handle multi-insert stmt that represents Merge stmt and has insert branches representing
+ * update/delete/insert.
+ * @param pos ordinal index of specific TOK_INSERT as child of TOK_QUERY
* @return previous prefix for 'tree' or null
*/
- public DestClausePrefix addDestNamePrefix(ASTNode tree, DestClausePrefix prefix) {
- if(tree2DestNamePrefix == null) {
- tree2DestNamePrefix = new IdentityHashMap<>();
- }
- return tree2DestNamePrefix.put(tree, prefix);
+ public DestClausePrefix addDestNamePrefix(int pos, DestClausePrefix prefix) {
+ return insertBranchToNamePrefix.put(pos, prefix);
}
-
public Context(Configuration conf) throws IOException {
this(conf, generateExecutionId());
}
http://git-wip-us.apache.org/repos/asf/hive/blob/1a6902ce/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
index 12a43a2..725f2ce 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
@@ -58,9 +58,9 @@ import org.apache.hadoop.hive.ql.session.SessionState;
*/
public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
- boolean useSuper = false;
+ private boolean useSuper = false;
- public UpdateDeleteSemanticAnalyzer(QueryState queryState) throws SemanticException {
+ UpdateDeleteSemanticAnalyzer(QueryState queryState) throws SemanticException {
super(queryState);
}
@@ -92,19 +92,19 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
}
}
private boolean updating() {
- return currentOperation == Operation.UPDATE;
+ return currentOperation == Context.Operation.UPDATE;
}
private boolean deleting() {
- return currentOperation == Operation.DELETE;
+ return currentOperation == Context.Operation.DELETE;
}
private void analyzeUpdate(ASTNode tree) throws SemanticException {
- currentOperation = Operation.UPDATE;
+ currentOperation = Context.Operation.UPDATE;
reparseAndSuperAnalyze(tree);
}
private void analyzeDelete(ASTNode tree) throws SemanticException {
- currentOperation = Operation.DELETE;
+ currentOperation = Context.Operation.DELETE;
reparseAndSuperAnalyze(tree);
}
/**
@@ -410,10 +410,12 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
"Expected TOK_INSERT as second child of TOK_QUERY but found " + rewrittenInsert.getName();
if(updating()) {
- rewrittenCtx.addDestNamePrefix(rewrittenInsert, Context.DestClausePrefix.UPDATE);
+ rewrittenCtx.setOperation(Context.Operation.UPDATE);
+ rewrittenCtx.addDestNamePrefix(1, Context.DestClausePrefix.UPDATE);
}
else if(deleting()) {
- rewrittenCtx.addDestNamePrefix(rewrittenInsert, Context.DestClausePrefix.DELETE);
+ rewrittenCtx.setOperation(Context.Operation.DELETE);
+ rewrittenCtx.addDestNamePrefix(1, Context.DestClausePrefix.DELETE);
}
if (where != null) {
@@ -489,7 +491,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
return false;
}
private String operation() {
- if (currentOperation == Operation.NOT_ACID) {
+ if (currentOperation == Context.Operation.OTHER) {
throw new IllegalStateException("UpdateDeleteSemanticAnalyzer neither updating nor " +
"deleting, operation not known.");
}
@@ -523,8 +525,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
return colName.toLowerCase();
}
- private enum Operation {UPDATE, DELETE, MERGE, NOT_ACID};
- private Operation currentOperation = Operation.NOT_ACID;
+ private Context.Operation currentOperation = Context.Operation.OTHER;
private static final String Indent = " ";
private IdentifierQuoter quotedIdenfierHelper;
@@ -589,7 +590,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
* @throws SemanticException
*/
private void analyzeMerge(ASTNode tree) throws SemanticException {
- currentOperation = Operation.MERGE;
+ currentOperation = Context.Operation.MERGE;
quotedIdenfierHelper = new IdentifierQuoter(ctx.getTokenRewriteStream());
/*
* See org.apache.hadoop.hive.ql.parse.TestMergeStatement for some examples of the merge AST
@@ -701,8 +702,9 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
ReparseResult rr = parseRewrittenQuery(rewrittenQueryStr, ctx.getCmd());
Context rewrittenCtx = rr.rewrittenCtx;
ASTNode rewrittenTree = rr.rewrittenTree;
+ rewrittenCtx.setOperation(Context.Operation.MERGE);
- //set dest name mapping on new context
+ //set dest name mapping on new context; 1st chid is TOK_FROM
for(int insClauseIdx = 1, whenClauseIdx = 0;
insClauseIdx < rewrittenTree.getChildCount() - (validating ? 1 : 0/*skip cardinality violation clause*/);
insClauseIdx++, whenClauseIdx++) {
@@ -710,18 +712,22 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
ASTNode insertClause = (ASTNode) rewrittenTree.getChild(insClauseIdx);
switch (getWhenClauseOperation(whenClauses.get(whenClauseIdx)).getType()) {
case HiveParser.TOK_INSERT:
- rewrittenCtx.addDestNamePrefix(insertClause, Context.DestClausePrefix.INSERT);
+ rewrittenCtx.addDestNamePrefix(insClauseIdx, Context.DestClausePrefix.INSERT);
break;
case HiveParser.TOK_UPDATE:
- rewrittenCtx.addDestNamePrefix(insertClause, Context.DestClausePrefix.UPDATE);
+ rewrittenCtx.addDestNamePrefix(insClauseIdx, Context.DestClausePrefix.UPDATE);
break;
case HiveParser.TOK_DELETE:
- rewrittenCtx.addDestNamePrefix(insertClause, Context.DestClausePrefix.DELETE);
+ rewrittenCtx.addDestNamePrefix(insClauseIdx, Context.DestClausePrefix.DELETE);
break;
default:
assert false;
}
}
+ if(validating) {
+ //here means the last branch of the multi-insert is Cardinality Validation
+ rewrittenCtx.addDestNamePrefix(rewrittenTree.getChildCount() - 1, Context.DestClausePrefix.INSERT);
+ }
try {
useSuper = true;
super.analyze(rewrittenTree, rewrittenCtx);
[15/50] [abbrv] hive git commit: HIVE-15919: Row count mismatch for
count * query (Jason Dere, reviewed by Matt McCline)
Posted by se...@apache.org.
HIVE-15919: Row count mismatch for count * query (Jason Dere, reviewed by Matt McCline)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3f986d7a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3f986d7a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3f986d7a
Branch: refs/heads/hive-14535
Commit: 3f986d7a286d7777aebce92bb6c62d00795d0e8f
Parents: e618bd1
Author: Jason Dere <jd...@hortonworks.com>
Authored: Fri Feb 17 14:30:30 2017 -0800
Committer: Jason Dere <jd...@hortonworks.com>
Committed: Fri Feb 17 14:30:30 2017 -0800
----------------------------------------------------------------------
.../ql/exec/vector/VectorizationContext.java | 112 ++++++++++++++-----
.../exec/vector/TestVectorizationContext.java | 37 ++++++
.../llap/vector_decimal_mapjoin.q.out | 4 +-
.../llap/vector_reduce_groupby_decimal.q.out | 2 +-
.../spark/vector_decimal_mapjoin.q.out | 4 +-
.../clientpositive/vector_decimal_mapjoin.q.out | 2 +-
.../vector_reduce_groupby_decimal.q.out | 2 +-
7 files changed, 125 insertions(+), 38 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/3f986d7a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 8164684..bf78251 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -104,20 +104,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.GroupByDesc;
import org.apache.hadoop.hive.ql.udf.SettableUDF;
-import org.apache.hadoop.hive.ql.udf.UDFConv;
-import org.apache.hadoop.hive.ql.udf.UDFFromUnixTime;
-import org.apache.hadoop.hive.ql.udf.UDFHex;
-import org.apache.hadoop.hive.ql.udf.UDFRegExpExtract;
-import org.apache.hadoop.hive.ql.udf.UDFRegExpReplace;
-import org.apache.hadoop.hive.ql.udf.UDFSign;
-import org.apache.hadoop.hive.ql.udf.UDFToBoolean;
-import org.apache.hadoop.hive.ql.udf.UDFToByte;
-import org.apache.hadoop.hive.ql.udf.UDFToDouble;
-import org.apache.hadoop.hive.ql.udf.UDFToFloat;
-import org.apache.hadoop.hive.ql.udf.UDFToInteger;
-import org.apache.hadoop.hive.ql.udf.UDFToLong;
-import org.apache.hadoop.hive.ql.udf.UDFToShort;
-import org.apache.hadoop.hive.ql.udf.UDFToString;
+import org.apache.hadoop.hive.ql.udf.*;
import org.apache.hadoop.hive.ql.udf.generic.*;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
import org.apache.hadoop.hive.serde2.ByteStream.Output;
@@ -359,6 +346,67 @@ public class VectorizationContext {
castExpressionUdfs.add(UDFToShort.class);
}
+ // Set of GenericUDFs which require need implicit type casting of decimal parameters.
+ // Vectorization for mathmatical functions currently depends on decimal params automatically
+ // being converted to the return type (see getImplicitCastExpression()), which is not correct
+ // in the general case. This set restricts automatic type conversion to just these functions.
+ private static Set<Class<?>> udfsNeedingImplicitDecimalCast = new HashSet<Class<?>>();
+ static {
+ udfsNeedingImplicitDecimalCast.add(GenericUDFOPPlus.class);
+ udfsNeedingImplicitDecimalCast.add(GenericUDFOPMinus.class);
+ udfsNeedingImplicitDecimalCast.add(GenericUDFOPMultiply.class);
+ udfsNeedingImplicitDecimalCast.add(GenericUDFOPDivide.class);
+ udfsNeedingImplicitDecimalCast.add(GenericUDFOPMod.class);
+ udfsNeedingImplicitDecimalCast.add(GenericUDFRound.class);
+ udfsNeedingImplicitDecimalCast.add(GenericUDFBRound.class);
+ udfsNeedingImplicitDecimalCast.add(GenericUDFFloor.class);
+ udfsNeedingImplicitDecimalCast.add(GenericUDFCbrt.class);
+ udfsNeedingImplicitDecimalCast.add(GenericUDFCeil.class);
+ udfsNeedingImplicitDecimalCast.add(GenericUDFAbs.class);
+ udfsNeedingImplicitDecimalCast.add(GenericUDFPosMod.class);
+ udfsNeedingImplicitDecimalCast.add(GenericUDFPower.class);
+ udfsNeedingImplicitDecimalCast.add(GenericUDFFactorial.class);
+ udfsNeedingImplicitDecimalCast.add(GenericUDFOPPositive.class);
+ udfsNeedingImplicitDecimalCast.add(GenericUDFOPNegative.class);
+ udfsNeedingImplicitDecimalCast.add(GenericUDFCoalesce.class);
+ udfsNeedingImplicitDecimalCast.add(GenericUDFElt.class);
+ udfsNeedingImplicitDecimalCast.add(GenericUDFGreatest.class);
+ udfsNeedingImplicitDecimalCast.add(GenericUDFLeast.class);
+ udfsNeedingImplicitDecimalCast.add(GenericUDFIn.class);
+ udfsNeedingImplicitDecimalCast.add(GenericUDFOPEqual.class);
+ udfsNeedingImplicitDecimalCast.add(GenericUDFOPEqualNS.class);
+ udfsNeedingImplicitDecimalCast.add(GenericUDFOPNotEqual.class);
+ udfsNeedingImplicitDecimalCast.add(GenericUDFOPLessThan.class);
+ udfsNeedingImplicitDecimalCast.add(GenericUDFOPEqualOrLessThan.class);
+ udfsNeedingImplicitDecimalCast.add(GenericUDFOPGreaterThan.class);
+ udfsNeedingImplicitDecimalCast.add(GenericUDFOPEqualOrGreaterThan.class);
+ udfsNeedingImplicitDecimalCast.add(GenericUDFBetween.class);
+ udfsNeedingImplicitDecimalCast.add(UDFSqrt.class);
+ udfsNeedingImplicitDecimalCast.add(UDFRand.class);
+ udfsNeedingImplicitDecimalCast.add(UDFLn.class);
+ udfsNeedingImplicitDecimalCast.add(UDFLog2.class);
+ udfsNeedingImplicitDecimalCast.add(UDFSin.class);
+ udfsNeedingImplicitDecimalCast.add(UDFAsin.class);
+ udfsNeedingImplicitDecimalCast.add(UDFCos.class);
+ udfsNeedingImplicitDecimalCast.add(UDFAcos.class);
+ udfsNeedingImplicitDecimalCast.add(UDFLog10.class);
+ udfsNeedingImplicitDecimalCast.add(UDFLog.class);
+ udfsNeedingImplicitDecimalCast.add(UDFExp.class);
+ udfsNeedingImplicitDecimalCast.add(UDFDegrees.class);
+ udfsNeedingImplicitDecimalCast.add(UDFRadians.class);
+ udfsNeedingImplicitDecimalCast.add(UDFAtan.class);
+ udfsNeedingImplicitDecimalCast.add(UDFTan.class);
+ udfsNeedingImplicitDecimalCast.add(UDFOPLongDivide.class);
+ }
+
+ protected boolean needsImplicitCastForDecimal(GenericUDF udf) {
+ Class<?> udfClass = udf.getClass();
+ if (udf instanceof GenericUDFBridge) {
+ udfClass = ((GenericUDFBridge) udf).getUdfClass();
+ }
+ return udfsNeedingImplicitDecimalCast.contains(udfClass);
+ }
+
protected int getInputColumnIndex(String name) throws HiveException {
if (name == null) {
throw new HiveException("Null column name");
@@ -764,24 +812,26 @@ public class VectorizationContext {
}
if (castTypeDecimal && !inputTypeDecimal) {
-
- // Cast the input to decimal
- // If castType is decimal, try not to lose precision for numeric types.
- castType = updatePrecision(inputTypeInfo, (DecimalTypeInfo) castType);
- GenericUDFToDecimal castToDecimalUDF = new GenericUDFToDecimal();
- castToDecimalUDF.setTypeInfo(castType);
- List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
- children.add(child);
- ExprNodeDesc desc = new ExprNodeGenericFuncDesc(castType, castToDecimalUDF, children);
- return desc;
+ if (needsImplicitCastForDecimal(udf)) {
+ // Cast the input to decimal
+ // If castType is decimal, try not to lose precision for numeric types.
+ castType = updatePrecision(inputTypeInfo, (DecimalTypeInfo) castType);
+ GenericUDFToDecimal castToDecimalUDF = new GenericUDFToDecimal();
+ castToDecimalUDF.setTypeInfo(castType);
+ List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
+ children.add(child);
+ ExprNodeDesc desc = new ExprNodeGenericFuncDesc(castType, castToDecimalUDF, children);
+ return desc;
+ }
} else if (!castTypeDecimal && inputTypeDecimal) {
-
- // Cast decimal input to returnType
- GenericUDF genericUdf = getGenericUDFForCast(castType);
- List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
- children.add(child);
- ExprNodeDesc desc = new ExprNodeGenericFuncDesc(castType, genericUdf, children);
- return desc;
+ if (needsImplicitCastForDecimal(udf)) {
+ // Cast decimal input to returnType
+ GenericUDF genericUdf = getGenericUDFForCast(castType);
+ List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
+ children.add(child);
+ ExprNodeDesc desc = new ExprNodeGenericFuncDesc(castType, genericUdf, children);
+ return desc;
+ }
} else {
// Casts to exact types including long to double etc. are needed in some special cases.
http://git-wip-us.apache.org/repos/asf/hive/blob/3f986d7a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
index bb37a04..9fcb392 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
@@ -31,11 +31,13 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.BRoundWithNumDigitsDoub
import org.apache.hadoop.hive.ql.exec.vector.expressions.ColAndCol;
import org.apache.hadoop.hive.ql.exec.vector.expressions.ColOrCol;
import org.apache.hadoop.hive.ql.exec.vector.expressions.DoubleColumnInList;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprAndExpr;
import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprOrExpr;
import org.apache.hadoop.hive.ql.exec.vector.expressions.FuncLogWithBaseDoubleToDouble;
import org.apache.hadoop.hive.ql.exec.vector.expressions.FuncLogWithBaseLongToDouble;
import org.apache.hadoop.hive.ql.exec.vector.expressions.FuncPowerDoubleToDouble;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprCharScalarStringGroupColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleColumnDoubleColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongColumnLongColumn;
@@ -66,6 +68,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.StringLTrim;
import org.apache.hadoop.hive.ql.exec.vector.expressions.StringLower;
import org.apache.hadoop.hive.ql.exec.vector.expressions.StringUpper;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorInBloomFilterColDynamicValue;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampDate;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampTimestamp;
import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColumnInList;
@@ -110,9 +113,11 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColSubtractLong
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColUnaryMinus;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongScalarSubtractLongColumn;
import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.DynamicValue;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.udf.UDFLog;
import org.apache.hadoop.hive.ql.udf.UDFSin;
@@ -123,6 +128,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIf;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFInBloomFilter;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLTrim;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLower;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
@@ -1584,4 +1590,35 @@ public class TestVectorizationContext {
b = 1;
assertEquals(a != b ? 1 : 0, ((a - b) ^ (b - a)) >>> 63);
}
+
+ @Test
+ public void testInBloomFilter() throws Exception {
+ // Setup InBloomFilter() UDF
+ ExprNodeColumnDesc colExpr = new ExprNodeColumnDesc(TypeInfoFactory.getDecimalTypeInfo(10, 5), "a", "table", false);
+ ExprNodeDesc bfExpr = new ExprNodeDynamicValueDesc(new DynamicValue("id1", TypeInfoFactory.binaryTypeInfo));
+
+ ExprNodeGenericFuncDesc inBloomFilterExpr = new ExprNodeGenericFuncDesc();
+ GenericUDF inBloomFilterUdf = new GenericUDFInBloomFilter();
+ inBloomFilterExpr.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
+ inBloomFilterExpr.setGenericUDF(inBloomFilterUdf);
+ List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2);
+ children1.add(colExpr);
+ children1.add(bfExpr);
+ inBloomFilterExpr.setChildren(children1);
+
+ // Setup VectorizationContext
+ List<String> columns = new ArrayList<String>();
+ columns.add("b");
+ columns.add("a");
+ VectorizationContext vc = new VectorizationContext("name", columns);
+
+ // Create vectorized expr
+ VectorExpression ve = vc.getVectorExpression(inBloomFilterExpr, VectorExpressionDescriptor.Mode.FILTER);
+ Assert.assertEquals(VectorInBloomFilterColDynamicValue.class, ve.getClass());
+ VectorInBloomFilterColDynamicValue vectorizedInBloomFilterExpr = (VectorInBloomFilterColDynamicValue) ve;
+ VectorExpression[] children = vectorizedInBloomFilterExpr.getChildExpressions();
+ // VectorInBloomFilterColDynamicValue should have all of the necessary information to vectorize.
+ // Should be no need for child vector expressions, which would imply casting/conversion.
+ Assert.assertNull(children);
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/3f986d7a/ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out
index 29e779d..6275c59 100644
--- a/ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out
@@ -106,7 +106,7 @@ STAGE PLANS:
Filter Vectorization:
className: VectorFilterOperator
native: true
- predicateExpression: SelectColumnIsNotNull(col 1)(children: CastDecimalToBoolean(col 0) -> 1:Boolean) -> boolean
+ predicateExpression: SelectColumnIsNotNull(col 0) -> boolean
predicate: dec is not null (type: boolean)
Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE
Select Operator
@@ -165,7 +165,7 @@ STAGE PLANS:
Filter Vectorization:
className: VectorFilterOperator
native: true
- predicateExpression: SelectColumnIsNotNull(col 1)(children: CastDecimalToBoolean(col 0) -> 1:Boolean) -> boolean
+ predicateExpression: SelectColumnIsNotNull(col 0) -> boolean
predicate: dec is not null (type: boolean)
Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE
Select Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/3f986d7a/ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out b/ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out
index e17aff5..a28719f 100644
--- a/ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out
@@ -57,7 +57,7 @@ STAGE PLANS:
Filter Vectorization:
className: VectorFilterOperator
native: true
- predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 4)(children: CastDecimalToBoolean(col 2) -> 4:Boolean) -> boolean, SelectColumnIsNotNull(col 4)(children: CastDecimalToBoolean(col 3) -> 4:Boolean) -> boolean) -> boolean
+ predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 2) -> boolean, SelectColumnIsNotNull(col 3) -> boolean) -> boolean
predicate: (cdecimal1 is not null and cdecimal2 is not null) (type: boolean)
Statistics: Num rows: 6102 Data size: 1440072 Basic stats: COMPLETE Column stats: NONE
Group By Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/3f986d7a/ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out b/ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out
index e4c92c7..968e080 100644
--- a/ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out
@@ -104,7 +104,7 @@ STAGE PLANS:
Filter Vectorization:
className: VectorFilterOperator
native: true
- predicateExpression: SelectColumnIsNotNull(col 1)(children: CastDecimalToBoolean(col 0) -> 1:Boolean) -> boolean
+ predicateExpression: SelectColumnIsNotNull(col 0) -> boolean
predicate: dec is not null (type: boolean)
Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE
Select Operator
@@ -150,7 +150,7 @@ STAGE PLANS:
Filter Vectorization:
className: VectorFilterOperator
native: true
- predicateExpression: SelectColumnIsNotNull(col 1)(children: CastDecimalToBoolean(col 0) -> 1:Boolean) -> boolean
+ predicateExpression: SelectColumnIsNotNull(col 0) -> boolean
predicate: dec is not null (type: boolean)
Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE
Select Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/3f986d7a/ql/src/test/results/clientpositive/vector_decimal_mapjoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_decimal_mapjoin.q.out b/ql/src/test/results/clientpositive/vector_decimal_mapjoin.q.out
index c3c0c4e..223a7a8 100644
--- a/ql/src/test/results/clientpositive/vector_decimal_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/vector_decimal_mapjoin.q.out
@@ -124,7 +124,7 @@ STAGE PLANS:
Filter Vectorization:
className: VectorFilterOperator
native: true
- predicateExpression: SelectColumnIsNotNull(col 1)(children: CastDecimalToBoolean(col 0) -> 1:Boolean) -> boolean
+ predicateExpression: SelectColumnIsNotNull(col 0) -> boolean
predicate: dec is not null (type: boolean)
Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE
Select Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/3f986d7a/ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out b/ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out
index 7094c9c..a6e0c1d 100644
--- a/ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out
+++ b/ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out
@@ -51,7 +51,7 @@ STAGE PLANS:
Filter Vectorization:
className: VectorFilterOperator
native: true
- predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 4)(children: CastDecimalToBoolean(col 2) -> 4:Boolean) -> boolean, SelectColumnIsNotNull(col 4)(children: CastDecimalToBoolean(col 3) -> 4:Boolean) -> boolean) -> boolean
+ predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 2) -> boolean, SelectColumnIsNotNull(col 3) -> boolean) -> boolean
predicate: (cdecimal1 is not null and cdecimal2 is not null) (type: boolean)
Statistics: Num rows: 6102 Data size: 1440072 Basic stats: COMPLETE Column stats: NONE
Group By Operator
[31/50] [abbrv] hive git commit: HIVE-15934 : Downgrade Maven
surefire plugin from 2.19.1 to 2.18.1 (Wei Zheng,
reviewed by Zoltan Haindrich)
Posted by se...@apache.org.
HIVE-15934 : Downgrade Maven surefire plugin from 2.19.1 to 2.18.1 (Wei Zheng, reviewed by Zoltan Haindrich)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ffe73576
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ffe73576
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ffe73576
Branch: refs/heads/hive-14535
Commit: ffe735766efa7ac6469aec0c43cc6fcdfab5662c
Parents: d5bb76c
Author: Wei Zheng <we...@apache.org>
Authored: Tue Feb 21 15:46:18 2017 -0800
Committer: Wei Zheng <we...@apache.org>
Committed: Tue Feb 21 15:46:18 2017 -0800
----------------------------------------------------------------------
pom.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/ffe73576/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 3c46ad1..3ddec7a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -99,7 +99,7 @@
<maven.jar.plugin.version>2.4</maven.jar.plugin.version>
<maven.javadoc.plugin.version>2.4</maven.javadoc.plugin.version>
<maven.shade.plugin.version>2.4.3</maven.shade.plugin.version>
- <maven.surefire.plugin.version>2.19.1</maven.surefire.plugin.version>
+ <maven.surefire.plugin.version>2.18.1</maven.surefire.plugin.version>
<maven.war.plugin.version>2.4</maven.war.plugin.version>
<maven.dependency.plugin.version>2.8</maven.dependency.plugin.version>
<maven.eclipse.plugin.version>2.9</maven.eclipse.plugin.version>
[48/50] [abbrv] hive git commit: HIVE-16017 : MM tables - many
queries duplicate the data after master merge (Sergey Shelukhin)
Posted by se...@apache.org.
HIVE-16017 : MM tables - many queries duplicate the data after master merge (Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1f0a5ef3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1f0a5ef3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1f0a5ef3
Branch: refs/heads/hive-14535
Commit: 1f0a5ef31c450db497052300a44a1080add47c6a
Parents: 74d9333
Author: Sergey Shelukhin <se...@apache.org>
Authored: Thu Feb 23 16:54:06 2017 -0800
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Thu Feb 23 16:54:06 2017 -0800
----------------------------------------------------------------------
.../apache/hadoop/hive/ql/exec/Utilities.java | 2 +-
ql/src/test/results/clientpositive/mm_all.q.out | 82 ++++++--------------
2 files changed, 26 insertions(+), 58 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/1f0a5ef3/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index 80ca28d..d7db991 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@ -3187,7 +3187,7 @@ public final class Utilities {
continue;
}
- pathsProcessed.add(path);
+ pathsProcessed.add(file);
if (LOG.isDebugEnabled()) {
LOG.debug("Adding input file " + file);
} else if (!hasLogged) {
http://git-wip-us.apache.org/repos/asf/hive/blob/1f0a5ef3/ql/src/test/results/clientpositive/mm_all.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/mm_all.q.out b/ql/src/test/results/clientpositive/mm_all.q.out
index dd0a3b1..4944d6c 100644
--- a/ql/src/test/results/clientpositive/mm_all.q.out
+++ b/ql/src/test/results/clientpositive/mm_all.q.out
@@ -1,8 +1,6 @@
-PREHOOK: query: -- Force multiple writers when reading
-drop table intermediate
+PREHOOK: query: drop table intermediate
PREHOOK: type: DROPTABLE
-POSTHOOK: query: -- Force multiple writers when reading
-drop table intermediate
+POSTHOOK: query: drop table intermediate
POSTHOOK: type: DROPTABLE
PREHOOK: query: create table intermediate(key int) partitioned by (p int) stored as orc
PREHOOK: type: CREATETABLE
@@ -205,15 +203,13 @@ POSTHOOK: Input: default@part_mm@key_mm=456
103 455
103 455
103 456
-PREHOOK: query: -- TODO: doesn't work truncate table part_mm partition(key_mm=455);
-select * from part_mm order by key, key_mm
+PREHOOK: query: select * from part_mm order by key, key_mm
PREHOOK: type: QUERY
PREHOOK: Input: default@part_mm
PREHOOK: Input: default@part_mm@key_mm=455
PREHOOK: Input: default@part_mm@key_mm=456
#### A masked pattern was here ####
-POSTHOOK: query: -- TODO: doesn't work truncate table part_mm partition(key_mm=455);
-select * from part_mm order by key, key_mm
+POSTHOOK: query: select * from part_mm order by key, key_mm
POSTHOOK: type: QUERY
POSTHOOK: Input: default@part_mm
POSTHOOK: Input: default@part_mm@key_mm=455
@@ -378,11 +374,9 @@ POSTHOOK: query: drop table simple_mm
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@simple_mm
POSTHOOK: Output: default@simple_mm
-PREHOOK: query: -- simple DP (no bucketing)
-drop table dp_mm
+PREHOOK: query: drop table dp_mm
PREHOOK: type: DROPTABLE
-POSTHOOK: query: -- simple DP (no bucketing)
-drop table dp_mm
+POSTHOOK: query: drop table dp_mm
POSTHOOK: type: DROPTABLE
PREHOOK: query: create table dp_mm (key int) partitioned by (key1 string, key2 int) stored as orc
tblproperties ("transactional"="true", "transactional_properties"="insert_only")
@@ -453,15 +447,11 @@ POSTHOOK: query: drop table dp_mm
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@dp_mm
POSTHOOK: Output: default@dp_mm
-PREHOOK: query: -- union
-
-create table union_mm(id int) tblproperties ("transactional"="true", "transactional_properties"="insert_only")
+PREHOOK: query: create table union_mm(id int) tblproperties ("transactional"="true", "transactional_properties"="insert_only")
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@union_mm
-POSTHOOK: query: -- union
-
-create table union_mm(id int) tblproperties ("transactional"="true", "transactional_properties"="insert_only")
+POSTHOOK: query: create table union_mm(id int) tblproperties ("transactional"="true", "transactional_properties"="insert_only")
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@union_mm
@@ -949,8 +939,8 @@ POSTHOOK: Input: default@skew_dp_union_mm@k3=98
97 97 97 97
98 98 98 98
98 99 100 101
-100 100 100 100
99 100 101 102
+100 100 100 100
101 102 103 104
103 103 103 103
104 105 106 107
@@ -1175,9 +1165,9 @@ POSTHOOK: Input: default@merge1_mm@key=103
POSTHOOK: Input: default@merge1_mm@key=97
POSTHOOK: Input: default@merge1_mm@key=98
#### A masked pattern was here ####
-98 98
97 97
103 103
+98 98
100 100
10 10
0 0
@@ -1227,17 +1217,17 @@ POSTHOOK: Input: default@merge1_mm@key=97
POSTHOOK: Input: default@merge1_mm@key=98
#### A masked pattern was here ####
100 100
-100 100
97 97
103 103
103 103
+100 100
97 97
98 98
98 98
-10 10
-10 10
0 0
+10 10
0 0
+10 10
PREHOOK: query: drop table merge1_mm
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@merge1_mm
@@ -1246,15 +1236,9 @@ POSTHOOK: query: drop table merge1_mm
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@merge1_mm
POSTHOOK: Output: default@merge1_mm
-PREHOOK: query: -- TODO: need to include merge+union+DP, but it's broken for now
-
-
-drop table ctas0_mm
+PREHOOK: query: drop table ctas0_mm
PREHOOK: type: DROPTABLE
-POSTHOOK: query: -- TODO: need to include merge+union+DP, but it's broken for now
-
-
-drop table ctas0_mm
+POSTHOOK: query: drop table ctas0_mm
POSTHOOK: type: DROPTABLE
PREHOOK: query: create table ctas0_mm tblproperties ("transactional"="true", "transactional_properties"="insert_only") as select * from intermediate
PREHOOK: type: CREATETABLE_AS_SELECT
@@ -2162,13 +2146,9 @@ POSTHOOK: query: drop table intermmediate_nonpart
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@intermmediate_nonpart
POSTHOOK: Output: default@intermmediate_nonpart
-PREHOOK: query: -- non-MM export to MM table, with and without partitions
-
-drop table import0_mm
+PREHOOK: query: drop table import0_mm
PREHOOK: type: DROPTABLE
-POSTHOOK: query: -- non-MM export to MM table, with and without partitions
-
-drop table import0_mm
+POSTHOOK: query: drop table import0_mm
POSTHOOK: type: DROPTABLE
PREHOOK: query: create table import0_mm(key int, p int) tblproperties("transactional"="true", "transactional_properties"="insert_only")
PREHOOK: type: CREATETABLE
@@ -2261,13 +2241,9 @@ POSTHOOK: query: drop table import1_mm
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@import1_mm
POSTHOOK: Output: default@import1_mm
-PREHOOK: query: -- MM export into new MM table, non-part and part
-
-drop table import2_mm
+PREHOOK: query: drop table import2_mm
PREHOOK: type: DROPTABLE
-POSTHOOK: query: -- MM export into new MM table, non-part and part
-
-drop table import2_mm
+POSTHOOK: query: drop table import2_mm
POSTHOOK: type: DROPTABLE
PREHOOK: query: import table import2_mm from 'ql/test/data/exports/intermmediate_nonpart'
PREHOOK: type: IMPORT
@@ -2365,13 +2341,9 @@ POSTHOOK: query: drop table import3_mm
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@import3_mm
POSTHOOK: Output: default@import3_mm
-PREHOOK: query: -- MM export into existing MM table, non-part and partial part
-
-drop table import4_mm
+PREHOOK: query: drop table import4_mm
PREHOOK: type: DROPTABLE
-POSTHOOK: query: -- MM export into existing MM table, non-part and partial part
-
-drop table import4_mm
+POSTHOOK: query: drop table import4_mm
POSTHOOK: type: DROPTABLE
PREHOOK: query: create table import4_mm(key int, p int) tblproperties("transactional"="true", "transactional_properties"="insert_only")
PREHOOK: type: CREATETABLE
@@ -2452,13 +2424,9 @@ POSTHOOK: query: drop table import5_mm
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@import5_mm
POSTHOOK: Output: default@import5_mm
-PREHOOK: query: -- MM export into existing non-MM table, non-part and part
-
-drop table import6_mm
+PREHOOK: query: drop table import6_mm
PREHOOK: type: DROPTABLE
-POSTHOOK: query: -- MM export into existing non-MM table, non-part and part
-
-drop table import6_mm
+POSTHOOK: query: drop table import6_mm
POSTHOOK: type: DROPTABLE
PREHOOK: query: create table import6_mm(key int, p int)
PREHOOK: type: CREATETABLE
@@ -3136,7 +3104,7 @@ POSTHOOK: type: CREATETABLE_AS_SELECT
POSTHOOK: Input: default@src
POSTHOOK: Output: database:default
POSTHOOK: Output: default@stats2_mm
-POSTHOOK: Lineage: stats2_mm.c0 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: stats2_mm._c0 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: desc formatted stats2_mm
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@stats2_mm
@@ -3145,7 +3113,7 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@stats2_mm
# col_name data_type comment
-c0 array<string>
+_c0 array<string>
# Detailed Table Information
Database: default
[14/50] [abbrv] hive git commit: HIVE-15969 : Failures in
TestRemoteHiveMetaStore,
TestSetUGIOnOnlyServer (Slim Bouguerra via Thejas Nair)
Posted by se...@apache.org.
HIVE-15969 : Failures in TestRemoteHiveMetaStore, TestSetUGIOnOnlyServer (Slim Bouguerra via Thejas Nair)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e618bd1b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e618bd1b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e618bd1b
Branch: refs/heads/hive-14535
Commit: e618bd1b45bc45f96b27698fd0c97fee6b9c57e6
Parents: 5c29371
Author: Slim Bouguerra <sl...@gmail.com>
Authored: Fri Feb 17 14:15:15 2017 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Fri Feb 17 14:15:15 2017 -0800
----------------------------------------------------------------------
.../apache/hadoop/hive/metastore/HiveMetaStoreClient.java | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/e618bd1b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
index 70f3a6b..7002620 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
@@ -739,12 +739,14 @@ public class HiveMetaStoreClient implements IMetaStoreClient {
hook.commitCreateTable(tbl);
}
success = true;
- } catch (Exception e){
- LOG.error("Got exception from createTable", e);
}
finally {
if (!success && (hook != null)) {
- hook.rollbackCreateTable(tbl);
+ try {
+ hook.rollbackCreateTable(tbl);
+ } catch (Exception e){
+ LOG.error("Create rollback failed with", e);
+ }
}
}
}
[50/50] [abbrv] hive git commit: HIVE-14671 : merge master into
hive-14535 (Sergey Shelukhin)
Posted by se...@apache.org.
HIVE-14671 : merge master into hive-14535 (Sergey Shelukhin)
Conflicts:
metastore/scripts/upgrade/mssql/upgrade-2.1.0-to-2.2.0.mssql.sql
metastore/scripts/upgrade/mysql/upgrade-2.1.0-to-2.2.0.mysql.sql
metastore/scripts/upgrade/oracle/upgrade-2.1.0-to-2.2.0.oracle.sql
metastore/scripts/upgrade/postgres/upgrade-2.1.0-to-2.2.0.postgres.sql
ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2014ece9
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2014ece9
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2014ece9
Branch: refs/heads/hive-14535
Commit: 2014ece97960f8d2f690f55c131d9a61c421c2f9
Parents: 1f0a5ef b8d7192
Author: Sergey Shelukhin <se...@apache.org>
Authored: Thu Feb 23 17:08:03 2017 -0800
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Thu Feb 23 17:08:03 2017 -0800
----------------------------------------------------------------------
ant/pom.xml | 69 -
.../hive/ant/DistinctElementsClassPath.java | 90 -
.../apache/hadoop/hive/ant/GenVectorCode.java | 3309 --------------
.../hadoop/hive/ant/GenVectorTestCode.java | 261 --
.../apache/hadoop/hive/ant/GetVersionPref.java | 94 -
ant/src/org/apache/hadoop/hive/ant/antlib.xml | 24 -
.../java/org/apache/hive/beeline/BeeLine.java | 42 +-
.../hive/beeline/BeeLineSignalHandler.java | 1 -
.../java/org/apache/hive/beeline/Commands.java | 2 +-
.../apache/hive/beeline/SunSignalHandler.java | 13 +-
beeline/src/main/resources/BeeLine.properties | 1 +
.../apache/hive/beeline/TestHiveSchemaTool.java | 73 +
.../apache/hive/beeline/TestShutdownHook.java | 46 +
bin/beeline | 5 -
bin/beeline.cmd | 66 -
bin/derbyserver.cmd | 60 -
bin/ext/cleardanglingscratchdir.cmd | 34 -
bin/ext/cli.cmd | 58 -
bin/ext/cli.sh | 8 -
bin/ext/debug.cmd | 110 -
bin/ext/hbaseimport.cmd | 35 -
bin/ext/help.cmd | 30 -
bin/ext/hiveserver2.cmd | 139 -
bin/ext/jar.cmd | 43 -
bin/ext/lineage.cmd | 30 -
bin/ext/metastore.cmd | 47 -
bin/ext/orcfiledump.cmd | 35 -
bin/ext/rcfilecat.cmd | 34 -
bin/ext/schemaTool.cmd | 33 -
bin/ext/util/execHiveCmd.cmd | 24 -
bin/hive | 7 +
bin/hive-config.cmd | 61 -
bin/hive.cmd | 383 --
bin/hplsql.cmd | 58 -
.../hadoop/hive/cli/TestCliDriverMethods.java | 28 -
.../apache/hadoop/hive/common/FileUtils.java | 9 -
.../hadoop/hive/common/StatsSetupConst.java | 223 +-
.../hadoop/hive/common/log/InPlaceUpdate.java | 1 +
.../org/apache/hadoop/hive/conf/HiveConf.java | 61 +-
.../common/util/ACLConfigurationParser.java | 167 +
.../java/org/apache/hive/http/HttpServer.java | 2 +-
.../hadoop/hive/common/TestStatsSetupConst.java | 54 +
.../apache/hadoop/hive/conf/TestHiveConf.java | 5 -
.../common/util/TestACLConfigurationParser.java | 99 +
.../hadoop/hive/contrib/mr/TestGenericMR.java | 13 +-
data/conf/hive-site.xml | 2 +-
druid-handler/pom.xml | 33 +-
.../hadoop/hive/druid/DruidStorageHandler.java | 7 +-
.../hive/druid/DruidStorageHandlerUtils.java | 64 +-
.../druid/io/DruidQueryBasedInputFormat.java | 198 +-
.../hadoop/hive/druid/io/HiveDruidSplit.java | 30 +-
.../druid/serde/DruidQueryRecordReader.java | 3 +-
.../hadoop/hive/druid/serde/DruidSerDe.java | 18 +-
.../TestHiveDruidQueryBasedInputFormat.java | 159 +
.../hive/ql/io/DruidRecordWriterTest.java | 2 +-
hcatalog/bin/templeton.cmd | 90 -
.../hive/hcatalog/mapreduce/HCatBaseTest.java | 4 -
.../mapreduce/TestHCatPartitionPublish.java | 11 +-
.../pig/TestHCatLoaderComplexSchema.java | 5 -
.../hcatalog/pig/TestHCatLoaderEncryption.java | 11 +-
.../listener/DbNotificationListener.java | 1 +
.../e2e/templeton/drivers/TestDriverCurl.pm | 1984 ++++----
.../hive/hcatalog/api/TestHCatClient.java | 7 -
.../hcatalog/templeton/ExecServiceImpl.java | 48 +-
.../hive/hcatalog/templeton/HiveDelegator.java | 9 +-
.../hive/hcatalog/templeton/JarDelegator.java | 9 +-
.../hive/hcatalog/templeton/PigDelegator.java | 5 +-
.../hive/hcatalog/templeton/SqoopDelegator.java | 7 +-
.../hcatalog/templeton/StreamingDelegator.java | 4 +-
.../hcatalog/templeton/tool/LaunchMapper.java | 9 -
.../hcatalog/templeton/tool/TempletonUtils.java | 72 +-
.../main/java/org/apache/hive/hplsql/Exec.java | 8 +-
.../listener/TestDbNotificationListener.java | 82 +
itests/hive-blobstore/pom.xml | 6 -
.../org/apache/hive/jdbc/miniHS2/MiniHS2.java | 5 +-
.../hive/ql/TestReplicationScenarios.java | 123 +-
.../security/StorageBasedMetastoreTestBase.java | 4 -
.../ql/session/TestClearDanglingScratchDir.java | 4 -
.../hive/beeline/TestBeeLineWithArgs.java | 169 +-
.../TestOperationLoggingAPIWithTez.java | 2 -
.../server/TestHS2ClearDanglingScratchDir.java | 4 -
itests/qtest-accumulo/pom.xml | 6 -
itests/qtest-spark/pom.xml | 6 -
itests/qtest/pom.xml | 6 -
.../test/resources/testconfiguration.properties | 7 +-
.../org/apache/hadoop/hive/ql/QTestUtil.java | 55 +-
jdbc/pom.xml | 23 +-
.../org/apache/hive/jdbc/HiveBaseResultSet.java | 297 +-
.../apache/hive/jdbc/HiveCallableStatement.java | 421 +-
.../org/apache/hive/jdbc/HiveConnection.java | 64 +-
.../org/apache/hive/jdbc/HiveDataSource.java | 12 +-
.../apache/hive/jdbc/HiveDatabaseMetaData.java | 215 +-
.../apache/hive/jdbc/HivePreparedStatement.java | 73 +-
.../apache/hive/jdbc/HiveQueryResultSet.java | 7 +-
.../apache/hive/jdbc/HiveResultSetMetaData.java | 19 +-
.../org/apache/hive/jdbc/HiveStatement.java | 32 +-
.../org/apache/hive/jdbc/HiveStatementTest.java | 14 +-
.../llap/registry/impl/LlapRegistryService.java | 13 +-
.../impl/LlapZookeeperRegistryImpl.java | 11 +-
.../hive/llap/security/SecretManager.java | 19 +-
.../org/apache/hadoop/hive/llap/LlapDump.java | 30 +-
.../hive/llap/cache/SerDeLowLevelCacheImpl.java | 13 +-
.../hive/llap/cli/LlapOptionsProcessor.java | 28 +-
.../hadoop/hive/llap/cli/LlapServiceDriver.java | 290 +-
.../hadoop/hive/llap/cli/LlapSliderUtils.java | 188 +
.../hive/llap/cli/LlapStatusServiceDriver.java | 15 +-
.../hive/llap/daemon/impl/AMReporter.java | 5 +-
.../llap/daemon/impl/ContainerRunnerImpl.java | 16 +-
.../hive/llap/daemon/impl/LlapDaemon.java | 57 +-
.../hadoop/hive/llap/daemon/impl/QueryInfo.java | 29 +
.../hive/llap/daemon/impl/QueryTracker.java | 4 +-
.../llap/daemon/impl/TaskRunnerCallable.java | 31 +-
.../hive/llap/io/api/impl/LlapRecordReader.java | 11 +-
.../llap/io/decode/OrcEncodedDataConsumer.java | 22 +-
.../llap/io/encoded/OrcEncodedDataReader.java | 6 +-
.../llap/io/encoded/SerDeEncodedDataReader.java | 882 ++--
.../io/encoded/VectorDeserializeOrcWriter.java | 458 ++
.../io/encoded/VertorDeserializeOrcWriter.java | 261 --
.../llap/shufflehandler/ShuffleHandler.java | 5 +-
.../resources/llap-daemon-log4j2.properties | 14 +-
llap-server/src/main/resources/package.py | 29 +-
llap-server/src/main/resources/templates.py | 3 +-
.../hive/llap/daemon/MiniLlapCluster.java | 38 +-
.../daemon/impl/TaskExecutorTestHelpers.java | 4 +-
.../llap/tezplugins/LlapTaskCommunicator.java | 81 +-
.../tezplugins/LlapTaskSchedulerService.java | 16 +
.../tezplugins/helpers/SourceStateTracker.java | 44 +-
metastore/if/hive_metastore.thrift | 1 +
.../upgrade/derby/038-HIVE-10562.derby.sql | 11 +
.../upgrade/derby/hive-schema-2.2.0.derby.sql | 2 +-
.../derby/upgrade-2.1.0-to-2.2.0.derby.sql | 1 +
.../upgrade/mssql/023-HIVE-10562.mssql.sql | 1 +
.../upgrade/mssql/hive-schema-2.2.0.mssql.sql | 1 +
.../mssql/upgrade-2.1.0-to-2.2.0.mssql.sql | 1 +
.../upgrade/mysql/038-HIVE-10562.mysql.sql | 6 +
.../upgrade/mysql/hive-schema-2.2.0.mysql.sql | 3 +-
.../mysql/upgrade-2.1.0-to-2.2.0.mysql.sql | 1 +
.../upgrade/oracle/038-HIVE-10562.oracle.sql | 2 +
.../upgrade/oracle/hive-schema-2.2.0.oracle.sql | 3 +-
.../oracle/upgrade-2.1.0-to-2.2.0.oracle.sql | 1 +
.../postgres/037-HIVE-10562.postgres.sql | 1 +
.../postgres/hive-schema-2.2.0.postgres.sql | 1 +
.../upgrade-2.1.0-to-2.2.0.postgres.sql | 1 +
.../gen/thrift/gen-cpp/hive_metastore_types.cpp | 22 +
.../gen/thrift/gen-cpp/hive_metastore_types.h | 12 +-
.../hive/metastore/api/NotificationEvent.java | 114 +-
.../src/gen/thrift/gen-php/metastore/Types.php | 23 +
.../gen/thrift/gen-py/hive_metastore/ttypes.py | 15 +-
.../gen/thrift/gen-rb/hive_metastore_types.rb | 4 +-
.../hadoop/hive/metastore/HiveMetaStore.java | 38 +-
.../hive/metastore/HiveMetaStoreClient.java | 9 +-
.../hadoop/hive/metastore/ObjectStore.java | 2 +
.../hive/metastore/RetryingMetaStoreClient.java | 4 +-
.../hive/metastore/messaging/EventUtils.java | 28 +-
.../metastore/messaging/MessageFactory.java | 5 -
.../messaging/json/JSONMessageFactory.java | 7 +-
.../hive/metastore/model/MNotificationLog.java | 9 +
metastore/src/model/package.jdo | 3 +
pom.xml | 50 +-
ql/pom.xml | 4 +-
.../FilterColumnBetweenDynamicValue.txt | 11 +-
.../java/org/apache/hadoop/hive/ql/Context.java | 121 +-
.../java/org/apache/hadoop/hive/ql/Driver.java | 9 +-
.../org/apache/hadoop/hive/ql/ErrorMsg.java | 3 +
.../apache/hadoop/hive/ql/exec/ExplainTask.java | 10 +-
.../hadoop/hive/ql/exec/OperatorUtils.java | 34 +
.../hadoop/hive/ql/exec/ScriptOperator.java | 17 -
.../apache/hadoop/hive/ql/exec/Utilities.java | 38 +-
.../hive/ql/exec/mr/HadoopJobExecHelper.java | 6 +-
.../persistence/BytesBytesMultiHashMap.java | 10 +-
.../hive/ql/exec/spark/GroupByShuffler.java | 11 +-
.../hive/ql/exec/spark/HiveReduceFunction.java | 10 +-
.../spark/HiveReduceFunctionResultList.java | 18 +-
.../ql/exec/spark/HiveSparkClientFactory.java | 5 +
.../hadoop/hive/ql/exec/spark/ReduceTran.java | 8 +-
.../hive/ql/exec/spark/RepartitionShuffler.java | 42 +
.../hive/ql/exec/spark/SortByShuffler.java | 2 +-
.../hive/ql/exec/spark/SparkPlanGenerator.java | 6 +-
.../ql/exec/spark/SparkReduceRecordHandler.java | 56 +-
.../hive/ql/exec/spark/SparkShuffler.java | 4 +-
.../spark/status/RemoteSparkJobMonitor.java | 7 +-
.../ql/exec/spark/status/SparkJobMonitor.java | 12 +
.../spark/status/impl/RemoteSparkJobStatus.java | 8 +
.../hive/ql/exec/tez/TezJobExecHelper.java | 29 +-
.../hive/ql/exec/tez/TezSessionPoolManager.java | 2 +-
.../hive/ql/exec/tez/TezSessionState.java | 27 +
.../apache/hadoop/hive/ql/exec/tez/TezTask.java | 45 +-
.../apache/hadoop/hive/ql/exec/tez/Utils.java | 6 +-
.../hive/ql/exec/tez/monitoring/DAGSummary.java | 12 +-
.../ql/exec/tez/monitoring/RenderStrategy.java | 154 +
.../ql/exec/tez/monitoring/TezJobMonitor.java | 115 +-
.../ql/exec/vector/VectorizationContext.java | 114 +-
.../fast/VectorMapJoinFastBytesHashMap.java | 4 +-
.../VectorMapJoinFastBytesHashMultiSet.java | 4 +-
.../fast/VectorMapJoinFastBytesHashSet.java | 4 +-
.../fast/VectorMapJoinFastBytesHashTable.java | 8 +-
.../mapjoin/fast/VectorMapJoinFastHashMap.java | 4 +-
.../fast/VectorMapJoinFastHashMultiSet.java | 4 +-
.../mapjoin/fast/VectorMapJoinFastHashSet.java | 4 +-
.../fast/VectorMapJoinFastHashTable.java | 20 +-
.../fast/VectorMapJoinFastLongHashMap.java | 4 +-
.../fast/VectorMapJoinFastLongHashMultiSet.java | 4 +-
.../fast/VectorMapJoinFastLongHashSet.java | 4 +-
.../fast/VectorMapJoinFastLongHashTable.java | 8 +-
.../fast/VectorMapJoinFastMultiKeyHashMap.java | 4 +-
.../VectorMapJoinFastMultiKeyHashMultiSet.java | 4 +-
.../fast/VectorMapJoinFastMultiKeyHashSet.java | 4 +-
.../fast/VectorMapJoinFastStringHashMap.java | 4 +-
.../VectorMapJoinFastStringHashMultiSet.java | 4 +-
.../fast/VectorMapJoinFastStringHashSet.java | 4 +-
.../fast/VectorMapJoinFastTableContainer.java | 27 +-
.../apache/hadoop/hive/ql/hooks/ATSHook.java | 17 +-
.../hadoop/hive/ql/io/orc/OrcInputFormat.java | 39 +
.../hive/ql/io/orc/encoded/CacheChunk.java | 3 +-
.../orc/encoded/EncodedTreeReaderFactory.java | 497 ++-
.../hadoop/hive/ql/io/orc/encoded/Reader.java | 69 +-
.../vector/VectorizedParquetRecordReader.java | 8 +-
.../hadoop/hive/ql/lockmgr/DbLockManager.java | 28 +-
.../hadoop/hive/ql/lockmgr/DbTxnManager.java | 144 +-
.../metadata/HiveMaterializedViewsRegistry.java | 3 +-
.../hive/ql/metadata/HiveMetaStoreChecker.java | 45 +-
.../AnnotateReduceSinkOutputOperator.java | 73 +
.../hive/ql/optimizer/ConvertJoinMapJoin.java | 112 +-
.../DynamicPartitionPruningOptimization.java | 36 +-
.../hadoop/hive/ql/optimizer/Optimizer.java | 4 +
.../SparkRemoveDynamicPruningBySize.java | 4 +-
.../calcite/CalciteViewSemanticException.java | 52 +
.../calcite/rules/HiveRelDecorrelator.java | 724 +--
.../calcite/translator/ASTBuilder.java | 38 +-
.../translator/SqlFunctionConverter.java | 33 +-
.../index/RewriteParseContextGenerator.java | 4 +-
.../spark/SetSparkReducerParallelism.java | 79 +-
.../optimizer/spark/SparkMapJoinOptimizer.java | 34 +-
.../stats/annotation/StatsRulesProcFactory.java | 44 +-
.../hadoop/hive/ql/parse/CalcitePlanner.java | 147 +-
.../ql/parse/ColumnStatsAutoGatherContext.java | 4 +-
.../ql/parse/ColumnStatsSemanticAnalyzer.java | 4 +-
.../apache/hadoop/hive/ql/parse/HiveParser.g | 11 +-
.../hadoop/hive/ql/parse/IdentifiersParser.g | 162 +-
.../apache/hadoop/hive/ql/parse/ParseUtils.java | 190 +-
.../org/apache/hadoop/hive/ql/parse/QB.java | 4 +
.../ql/parse/ReplicationSemanticAnalyzer.java | 9 +-
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 47 +-
.../ql/parse/UpdateDeleteSemanticAnalyzer.java | 62 +-
.../hive/ql/parse/spark/GenSparkUtils.java | 60 +-
.../hive/ql/parse/spark/SparkCompiler.java | 26 +-
.../hadoop/hive/ql/plan/ReduceSinkDesc.java | 14 +
.../hadoop/hive/ql/session/SessionState.java | 36 +-
.../apache/hadoop/hive/ql/stats/StatsUtils.java | 6 +-
.../hadoop/hive/ql/tools/LineageInfo.java | 5 +-
.../hive/ql/txn/compactor/CompactorMR.java | 2 +-
.../ql/udf/generic/GenericUDAFBloomFilter.java | 28 +-
.../generic/GenericUDAFPercentileApprox.java | 2 +
.../hive/ql/udf/generic/GenericUDFBasePad.java | 2 +-
.../hive/ql/udf/generic/GenericUDFLpad.java | 9 +-
.../hive/ql/udf/generic/GenericUDFRpad.java | 8 +-
.../ql/udf/generic/GenericUDFSQCountCheck.java | 11 +-
.../apache/hadoop/hive/ql/util/DosToUnix.java | 107 -
.../hadoop/hive/ql/util/ResourceDownloader.java | 12 +-
.../apache/hadoop/hive/ql/TestTxnCommands.java | 22 +
.../apache/hadoop/hive/ql/WindowsPathUtil.java | 57 -
.../hadoop/hive/ql/exec/TestExecDriver.java | 8 +-
.../hadoop/hive/ql/exec/tez/TestTezTask.java | 6 +
.../exec/vector/TestVectorizationContext.java | 37 +
.../fast/TestVectorMapJoinFastBytesHashMap.java | 56 +-
.../TestVectorMapJoinFastBytesHashMultiSet.java | 14 +-
.../fast/TestVectorMapJoinFastBytesHashSet.java | 14 +-
.../fast/TestVectorMapJoinFastLongHashMap.java | 50 +-
.../TestVectorMapJoinFastLongHashMultiSet.java | 14 +-
.../fast/TestVectorMapJoinFastLongHashSet.java | 14 +-
.../fast/TestVectorMapJoinFastRowHashMap.java | 48 +-
.../io/parquet/TestVectorizedColumnReader.java | 21 +
.../parquet/VectorizedColumnReaderTestBase.java | 2 +-
.../ql/metadata/TestHiveMetaStoreChecker.java | 4 -
.../ql/parse/TestMacroSemanticAnalyzer.java | 4 +-
.../hive/ql/parse/TestParseDriverIntervals.java | 2 +-
.../hadoop/hive/ql/parse/TestQBSubQuery.java | 2 +-
.../parse/TestUpdateDeleteSemanticAnalyzer.java | 4 +-
.../authorization/AuthorizationTestUtil.java | 2 +-
.../hadoop/hive/ql/session/TestAddResource.java | 8 +-
.../hive/ql/txn/compactor/CompactorTest.java | 11 +-
.../hive/ql/txn/compactor/TestCleaner.java | 6 +
.../hive/ql/txn/compactor/TestInitiator.java | 5 +
.../hive/ql/txn/compactor/TestWorker.java | 6 +
.../hive/ql/udf/generic/TestGenericUDFLpad.java | 9 +-
.../hive/ql/udf/generic/TestGenericUDFRpad.java | 11 +-
.../hadoop/hive/ql/util/TestDosToUnix.java | 77 -
.../clientnegative/masking_acid_delete.q | 10 +
.../queries/clientnegative/masking_acid_merge.q | 15 +
.../clientnegative/masking_acid_update.q | 10 +
.../clientnegative/subquery_corr_in_agg.q | 8 +
ql/src/test/queries/clientnegative/udf_in_2.q | 2 +
.../queries/clientpositive/avro_timestamp.q | 2 -
.../queries/clientpositive/avro_timestamp_win.q | 28 -
.../queries/clientpositive/cbo_rp_auto_join1.q | 2 +-
ql/src/test/queries/clientpositive/combine2.q | 3 -
.../queries/clientpositive/combine2_hadoop20.q | 3 -
.../test/queries/clientpositive/combine2_win.q | 41 -
.../dynamic_semijoin_reduction_2.q | 41 +
ql/src/test/queries/clientpositive/escape1.q | 3 -
ql/src/test/queries/clientpositive/escape2.q | 3 -
.../clientpositive/explain_formatted_oid.q | 18 +
.../test/queries/clientpositive/input_part10.q | 3 -
.../queries/clientpositive/input_part10_win.q | 23 -
.../test/queries/clientpositive/interval_alt.q | 19 +-
.../clientpositive/interval_arithmetic.q | 3 +
ql/src/test/queries/clientpositive/join31.q | 2 +-
.../queries/clientpositive/join_max_hashtable.q | 37 +
.../clientpositive/lateral_view_explode2.q | 4 +-
.../test/queries/clientpositive/llap_reader.q | 40 +
.../queries/clientpositive/load_dyn_part14.q | 3 -
.../clientpositive/load_dyn_part14_win.q | 38 -
ql/src/test/queries/clientpositive/masking_10.q | 25 +
.../clientpositive/msck_repair_batchsize.q | 10 +
.../test/queries/clientpositive/multiMapJoin2.q | 2 +-
ql/src/test/queries/clientpositive/parenthese.q | 80 +
.../clientpositive/partition_timestamp.q | 2 -
.../clientpositive/partition_timestamp2.q | 2 -
.../clientpositive/partition_timestamp2_win.q | 58 -
.../clientpositive/partition_timestamp_win.q | 59 -
.../clientpositive/position_alias_test_1.q | 18 +
.../test/queries/clientpositive/scriptfile1.q | 2 -
.../queries/clientpositive/scriptfile1_win.q | 16 -
.../spark_use_file_size_for_mapjoin.q | 30 +
.../queries/clientpositive/spark_use_op_stats.q | 41 +
ql/src/test/queries/clientpositive/sqlmerge.q | 6 +
.../queries/clientpositive/subquery_multi.q | 8 +-
.../queries/clientpositive/transform_acid.q | 2 -
.../clientpositive/udaf_percentile_approx_23.q | 3 +
ql/src/test/queries/clientpositive/udf_in.q | 4 +-
ql/src/test/queries/clientpositive/udf_notop.q | 2 +-
.../queries/clientpositive/union_pos_alias.q | 30 +
.../vector_partitioned_date_time.q | 3 -
.../vector_partitioned_date_time_win.q | 129 -
.../vectorized_dynamic_semijoin_reduction.q | 6 +
.../vectorized_dynamic_semijoin_reduction2.q | 65 +
ql/src/test/queries/clientpositive/view_cbo.q | 72 +
.../clientnegative/char_pad_convert_fail2.q.out | 2 +-
.../create_or_replace_view4.q.out | 2 +-
.../clientnegative/create_view_failure3.q.out | 2 +-
.../clientnegative/create_view_failure6.q.out | 2 +-
.../clientnegative/create_view_failure7.q.out | 2 +-
.../clientnegative/create_view_failure8.q.out | 2 +-
.../clientnegative/create_view_failure9.q.out | 2 +-
.../invalid_select_expression.q.out | 2 +-
.../clientnegative/masking_acid_delete.q.out | 13 +
.../clientnegative/masking_acid_merge.q.out | 21 +
.../clientnegative/masking_acid_update.q.out | 13 +
.../ptf_negative_DistributeByOrderBy.q.out | 2 +-
.../ptf_negative_PartitionBySortBy.q.out | 2 +-
.../clientnegative/ptf_window_boundaries.q.out | 2 +-
.../clientnegative/ptf_window_boundaries2.q.out | 2 +-
.../selectDistinctStarNeg_1.q.out | 2 +-
.../clientnegative/select_charliteral.q.out | 3 +-
.../clientnegative/subquery_corr_in_agg.q.out | 39 +
.../clientnegative/subquery_missing_from.q.out | 2 +-
.../subquery_subquery_chain.q.out | 2 +-
.../clientnegative/subquery_with_or_cond.q.out | 2 +-
ql/src/test/results/clientnegative/udf_in.q.out | 2 +-
.../test/results/clientnegative/udf_in_2.q.out | 1 +
.../clientpositive/acid_globallimit.q.out | 52 -
.../avro_timestamp_win.q.java1.7.out | 134 -
.../avro_timestamp_win.q.java1.8.out | 134 -
.../results/clientpositive/combine2_win.q.out | 767 ----
.../clientpositive/constant_prop_1.q.out | 4 +-
.../clientpositive/constprog_partitioner.q.out | 117 +-
.../results/clientpositive/create_view.q.out | 10 +-
.../clientpositive/create_view_translate.q.out | 4 +-
ql/src/test/results/clientpositive/cte_2.q.out | 4 +-
ql/src/test/results/clientpositive/cte_4.q.out | 4 +-
...on_join_with_different_encryption_keys.q.out | 14 +-
.../results/clientpositive/explain_ddl.q.out | 4 +-
.../clientpositive/explain_dependency.q.out | 2 +-
.../clientpositive/explain_formatted_oid.q.out | 38 +
.../clientpositive/explain_logical.q.out | 2 +-
ql/src/test/results/clientpositive/input4.q.out | 2 +-
.../clientpositive/input_part10_win.q.out | 131 -
.../results/clientpositive/interval_alt.q.out | 79 +-
.../clientpositive/interval_arithmetic.q.out | 35 +
ql/src/test/results/clientpositive/join0.q.out | 2 +-
.../results/clientpositive/llap/cbo_views.q.out | 2 +-
.../results/clientpositive/llap/cte_2.q.out | 4 +-
.../results/clientpositive/llap/cte_4.q.out | 4 +-
.../llap/dynamic_semijoin_reduction_2.q.out | 301 ++
.../clientpositive/llap/explainuser_1.q.out | 688 +--
.../llap/join_max_hashtable.q.out | 490 ++
.../results/clientpositive/llap/lineage3.q.out | 2 +-
.../clientpositive/llap/llap_reader.q.out | 167 +
.../results/clientpositive/llap/mergejoin.q.out | 8 +-
.../llap/metadata_only_queries.q.out | 4 +-
...chema_evol_text_vec_part_all_primitive.q.out | 34 +-
.../llap/selectDistinctStar.q.out | 8 +-
.../results/clientpositive/llap/sqlmerge.q.out | 101 +
.../clientpositive/llap/subquery_exists.q.out | 169 +-
.../clientpositive/llap/subquery_in.q.out | 2838 +++---------
.../clientpositive/llap/subquery_multi.q.out | 2788 +++---------
.../clientpositive/llap/subquery_notin.q.out | 4222 +++++-------------
.../clientpositive/llap/subquery_scalar.q.out | 2679 +++--------
.../clientpositive/llap/subquery_views.q.out | 558 +--
.../clientpositive/llap/union_top_level.q.out | 4 +-
.../clientpositive/llap/vector_between_in.q.out | 22 +-
.../llap/vector_decimal_mapjoin.q.out | 4 +-
.../llap/vector_mapjoin_reduce.q.out | 196 +-
.../llap/vector_reduce_groupby_decimal.q.out | 2 +-
.../vectorized_dynamic_semijoin_reduction.q.out | 141 +
...vectorized_dynamic_semijoin_reduction2.q.out | 1192 +++++
.../results/clientpositive/llap_reader.q.out | 86 +
.../clientpositive/load_dyn_part14_win.q.out | 298 --
.../results/clientpositive/masking_10.q.out | 244 +
.../clientpositive/metadata_only_queries.q.out | 4 +-
ql/src/test/results/clientpositive/mm_all.q.out | 10 +-
.../clientpositive/msck_repair_batchsize.q.out | 25 +
.../results/clientpositive/parallel_join0.q.out | 2 +-
.../results/clientpositive/parenthese.q.out | 168 +
.../partition_timestamp2_win.q.out | 399 --
.../partition_timestamp_win.q.out | 316 --
.../results/clientpositive/perf/query1.q.out | 190 +-
.../results/clientpositive/perf/query14.q.out | 8 +-
.../results/clientpositive/perf/query16.q.out | 222 +-
.../results/clientpositive/perf/query23.q.out | 4 +-
.../results/clientpositive/perf/query30.q.out | 375 +-
.../results/clientpositive/perf/query6.q.out | 403 +-
.../results/clientpositive/perf/query69.q.out | 499 +--
.../results/clientpositive/perf/query81.q.out | 375 +-
.../test/results/clientpositive/plan_json.q.out | 2 +-
.../clientpositive/position_alias_test_1.q.out | 148 +
.../clientpositive/scriptfile1_win.q.out | 55 -
.../test/results/clientpositive/semijoin5.q.out | 138 +-
.../spark/constprog_partitioner.q.out | 87 +-
.../spark/metadata_only_queries.q.out | 4 +-
.../spark/spark_use_file_size_for_mapjoin.q.out | 257 ++
.../spark/spark_use_op_stats.q.out | 331 ++
.../clientpositive/spark/subquery_exists.q.out | 167 +-
.../clientpositive/spark/subquery_in.q.out | 2597 +++--------
.../clientpositive/spark/union_remove_25.q.out | 2 +-
.../clientpositive/spark/union_top_level.q.out | 4 +-
.../spark/vector_between_in.q.out | 22 +-
.../spark/vector_decimal_mapjoin.q.out | 4 +-
.../spark/vector_mapjoin_reduce.q.out | 216 +-
.../clientpositive/subquery_exists.q.out | 221 +-
.../clientpositive/subquery_exists_having.q.out | 232 +-
.../clientpositive/subquery_in_having.q.out | 641 +--
.../clientpositive/subquery_notexists.q.out | 209 +-
.../subquery_notexists_having.q.out | 223 +-
.../clientpositive/subquery_notin_having.q.out | 644 +--
.../subquery_unqualcolumnrefs.q.out | 687 +--
.../clientpositive/tez/explainanalyze_3.q.out | 4 +-
.../clientpositive/tez/explainuser_3.q.out | 4 +-
.../udaf_percentile_approx_23.q.out | 9 +
.../results/clientpositive/udf_between.q.out | 2 +-
ql/src/test/results/clientpositive/udf_in.q.out | 4 +-
.../test/results/clientpositive/udf_lpad.q.out | 6 +-
.../test/results/clientpositive/udf_notop.q.out | 4 +-
.../test/results/clientpositive/udf_rpad.q.out | 6 +-
.../clientpositive/union_pos_alias.q.out | 308 ++
.../clientpositive/vector_decimal_mapjoin.q.out | 2 +-
.../clientpositive/vector_mapjoin_reduce.q.out | 322 +-
.../clientpositive/vector_outer_join3.q.out | 6 +-
.../clientpositive/vector_outer_join4.q.out | 6 +-
.../clientpositive/vector_outer_join6.q.out | 4 +-
.../vector_partitioned_date_time_win.q.out | 2036 ---------
.../vector_reduce_groupby_decimal.q.out | 2 +-
.../results/clientpositive/view_alias.q.out | 12 +-
.../test/results/clientpositive/view_cbo.q.out | 823 ++++
.../lazy/fast/LazySimpleDeserializeRead.java | 2 +-
.../gen/thrift/gen-cpp/TCLIService_types.cpp | 8 +-
.../src/gen/thrift/gen-cpp/TCLIService_types.h | 11 +-
.../service/rpc/thrift/TOpenSessionReq.java | 4 +-
.../service/rpc/thrift/TOpenSessionResp.java | 4 +-
.../service/rpc/thrift/TProtocolVersion.java | 5 +-
service-rpc/src/gen/thrift/gen-php/Types.php | 6 +-
.../src/gen/thrift/gen-py/TCLIService/ttypes.py | 7 +-
.../gen/thrift/gen-rb/t_c_l_i_service_types.rb | 9 +-
.../org/apache/hive/service/cli/CLIService.java | 45 +-
.../hive/service/cli/operation/Operation.java | 16 +-
.../service/cli/operation/SQLOperation.java | 4 +-
.../service/cli/thrift/ThriftCLIService.java | 24 +-
.../cli/thrift/ThriftHttpCLIService.java | 2 +-
.../apache/hadoop/fs/ProxyLocalFileSystem.java | 8 -
.../org/apache/hive/spark/client/rpc/Rpc.java | 2 +-
.../common/io/encoded/EncodedColumnBatch.java | 1 +
.../hive/ql/exec/vector/BytesColumnVector.java | 11 +
.../hive/ql/exec/vector/ColumnVector.java | 12 +
.../ql/exec/vector/DecimalColumnVector.java | 11 +-
.../hive/ql/exec/vector/DoubleColumnVector.java | 8 +-
.../vector/IntervalDayTimeColumnVector.java | 8 +
.../hive/ql/exec/vector/LongColumnVector.java | 8 +-
.../ql/exec/vector/MultiValuedColumnVector.java | 4 +
.../hive/ql/exec/vector/StructColumnVector.java | 5 +
.../ql/exec/vector/TimestampColumnVector.java | 8 +
.../hive/ql/exec/vector/UnionColumnVector.java | 5 +
.../hive/serde2/io/HiveDecimalWritable.java | 5 +-
testutils/hadoop.cmd | 252 --
vector-code-gen/pom.xml | 69 +
.../apache/hadoop/hive/tools/GenVectorCode.java | 3327 ++++++++++++++
.../hadoop/hive/tools/GenVectorTestCode.java | 261 ++
496 files changed, 23684 insertions(+), 31973 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2014ece9/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2014ece9/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2014ece9/metastore/if/hive_metastore.thrift
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2014ece9/metastore/scripts/upgrade/derby/hive-schema-2.2.0.derby.sql
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2014ece9/metastore/scripts/upgrade/derby/upgrade-2.1.0-to-2.2.0.derby.sql
----------------------------------------------------------------------
diff --cc metastore/scripts/upgrade/derby/upgrade-2.1.0-to-2.2.0.derby.sql
index f59b37f,e5a144c..cbf5be1
--- a/metastore/scripts/upgrade/derby/upgrade-2.1.0-to-2.2.0.derby.sql
+++ b/metastore/scripts/upgrade/derby/upgrade-2.1.0-to-2.2.0.derby.sql
@@@ -1,6 -1,5 +1,7 @@@
-- Upgrade MetaStore schema from 2.1.0 to 2.2.0
RUN '037-HIVE-14496.derby.sql';
+ RUN '038-HIVE-10562.derby.sql';
+RUN '037-HIVE-14637.derby.sql';
+
UPDATE "APP".VERSION SET SCHEMA_VERSION='2.2.0', VERSION_COMMENT='Hive release version 2.2.0' where VER_ID=1;
http://git-wip-us.apache.org/repos/asf/hive/blob/2014ece9/metastore/scripts/upgrade/mssql/hive-schema-2.2.0.mssql.sql
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2014ece9/metastore/scripts/upgrade/mssql/upgrade-2.1.0-to-2.2.0.mssql.sql
----------------------------------------------------------------------
diff --cc metastore/scripts/upgrade/mssql/upgrade-2.1.0-to-2.2.0.mssql.sql
index 2dcdedc,a4b8fda..99024c2
--- a/metastore/scripts/upgrade/mssql/upgrade-2.1.0-to-2.2.0.mssql.sql
+++ b/metastore/scripts/upgrade/mssql/upgrade-2.1.0-to-2.2.0.mssql.sql
@@@ -1,7 -1,7 +1,8 @@@
SELECT 'Upgrading MetaStore schema from 2.1.0 to 2.2.0' AS MESSAGE;
:r 022-HIVE-14496.mssql.sql
+:r 023-HIVE-14637.mssql.sql
+ :r 023-HIVE-10562.mssql.sql
UPDATE VERSION SET SCHEMA_VERSION='2.2.0', VERSION_COMMENT='Hive release version 2.2.0' where VER_ID=1;
SELECT 'Finished upgrading MetaStore schema from 2.1.0 to 2.2.0' AS MESSAGE;
http://git-wip-us.apache.org/repos/asf/hive/blob/2014ece9/metastore/scripts/upgrade/mysql/hive-schema-2.2.0.mysql.sql
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2014ece9/metastore/scripts/upgrade/mysql/upgrade-2.1.0-to-2.2.0.mysql.sql
----------------------------------------------------------------------
diff --cc metastore/scripts/upgrade/mysql/upgrade-2.1.0-to-2.2.0.mysql.sql
index afe17f9,509c532..68300d3
--- a/metastore/scripts/upgrade/mysql/upgrade-2.1.0-to-2.2.0.mysql.sql
+++ b/metastore/scripts/upgrade/mysql/upgrade-2.1.0-to-2.2.0.mysql.sql
@@@ -1,7 -1,7 +1,8 @@@
SELECT 'Upgrading MetaStore schema from 2.1.0 to 2.2.0' AS ' ';
SOURCE 037-HIVE-14496.mysql.sql;
+SOURCE 038-HIVE-14637.mysql.sql;
+ SOURCE 038-HIVE-10562.mysql.sql;
UPDATE VERSION SET SCHEMA_VERSION='2.2.0', VERSION_COMMENT='Hive release version 2.2.0' where VER_ID=1;
SELECT 'Finished upgrading MetaStore schema from 2.1.0 to 2.2.0' AS ' ';
http://git-wip-us.apache.org/repos/asf/hive/blob/2014ece9/metastore/scripts/upgrade/oracle/hive-schema-2.2.0.oracle.sql
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2014ece9/metastore/scripts/upgrade/oracle/upgrade-2.1.0-to-2.2.0.oracle.sql
----------------------------------------------------------------------
diff --cc metastore/scripts/upgrade/oracle/upgrade-2.1.0-to-2.2.0.oracle.sql
index dd1b97c,f31fda9..058c0d5
--- a/metastore/scripts/upgrade/oracle/upgrade-2.1.0-to-2.2.0.oracle.sql
+++ b/metastore/scripts/upgrade/oracle/upgrade-2.1.0-to-2.2.0.oracle.sql
@@@ -1,7 -1,7 +1,8 @@@
SELECT 'Upgrading MetaStore schema from 2.1.0 to 2.2.0' AS Status from dual;
@037-HIVE-14496.oracle.sql;
+@038-HIVE-14637.oracle.sql;
+ @038-HIVE-10562.oracle.sql;
UPDATE VERSION SET SCHEMA_VERSION='2.2.0', VERSION_COMMENT='Hive release version 2.2.0' where VER_ID=1;
SELECT 'Finished upgrading MetaStore schema from 2.1.0 to 2.2.0' AS Status from dual;
http://git-wip-us.apache.org/repos/asf/hive/blob/2014ece9/metastore/scripts/upgrade/postgres/hive-schema-2.2.0.postgres.sql
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2014ece9/metastore/scripts/upgrade/postgres/upgrade-2.1.0-to-2.2.0.postgres.sql
----------------------------------------------------------------------
diff --cc metastore/scripts/upgrade/postgres/upgrade-2.1.0-to-2.2.0.postgres.sql
index a316383,0f64a90..ae4adf7
--- a/metastore/scripts/upgrade/postgres/upgrade-2.1.0-to-2.2.0.postgres.sql
+++ b/metastore/scripts/upgrade/postgres/upgrade-2.1.0-to-2.2.0.postgres.sql
@@@ -1,7 -1,7 +1,8 @@@
SELECT 'Upgrading MetaStore schema from 2.1.0 to 2.2.0';
\i 036-HIVE-14496.postgres.sql;
+\i 037-HIVE-14637.postgres.sql;
+ \i 037-HIVE-10562.postgres.sql;
UPDATE "VERSION" SET "SCHEMA_VERSION"='2.2.0', "VERSION_COMMENT"='Hive release version 2.2.0' where "VER_ID"=1;
SELECT 'Finished upgrading MetaStore schema from 2.1.0 to 2.2.0';
http://git-wip-us.apache.org/repos/asf/hive/blob/2014ece9/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2014ece9/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2014ece9/metastore/src/gen/thrift/gen-php/metastore/Types.php
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2014ece9/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2014ece9/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2014ece9/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2014ece9/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2014ece9/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2014ece9/metastore/src/model/package.jdo
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2014ece9/ql/src/java/org/apache/hadoop/hive/ql/Context.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2014ece9/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2014ece9/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
----------------------------------------------------------------------
diff --cc ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index d7db991,3484493..966b2b5
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@@ -200,9 -141,9 +200,10 @@@ import org.apache.hadoop.mapred.RecordR
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.Shell;
+ import org.apache.hive.common.util.ACLConfigurationParser;
import org.apache.hive.common.util.ReflectionUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@@ -3966,331 -3796,25 +3951,352 @@@ public final class Utilities
return String.format("%.2f%sB", bytes / Math.pow(unit, exp), suffix);
}
+ private static final String MANIFEST_EXTENSION = ".manifest";
+
+ private static void tryDelete(FileSystem fs, Path path) {
+ try {
+ fs.delete(path, true);
+ } catch (IOException ex) {
+ LOG.error("Failed to delete " + path, ex);
+ }
+ }
+
+ public static Path[] getMmDirectoryCandidates(FileSystem fs, Path path, int dpLevels,
+ int lbLevels, PathFilter filter, long mmWriteId, Configuration conf) throws IOException {
+ int skipLevels = dpLevels + lbLevels;
+ if (filter == null) {
+ filter = new ValidWriteIds.IdPathFilter(mmWriteId, true);
+ }
+ if (skipLevels == 0) {
+ return statusToPath(fs.listStatus(path, filter));
+ }
+ if (HiveConf.getBoolVar(conf, ConfVars.HIVE_MM_AVOID_GLOBSTATUS_ON_S3) && isS3(fs)) {
+ return getMmDirectoryCandidatesRecursive(fs, path, skipLevels, filter);
+ }
+ return getMmDirectoryCandidatesGlobStatus(fs, path, skipLevels, filter, mmWriteId);
+ }
+
+ private static boolean isS3(FileSystem fs) {
+ try {
+ return fs.getScheme().equalsIgnoreCase("s3a");
+ } catch (UnsupportedOperationException ex) {
+ // Some FS-es do not implement getScheme, e.g. ProxyLocalFileSystem.
+ return false;
+ }
+ }
+
+ private static Path[] statusToPath(FileStatus[] statuses) {
+ if (statuses == null) return null;
+ Path[] paths = new Path[statuses.length];
+ for (int i = 0; i < statuses.length; ++i) {
+ paths[i] = statuses[i].getPath();
+ }
+ return paths;
+ }
+
+ private static Path[] getMmDirectoryCandidatesRecursive(FileSystem fs,
+ Path path, int skipLevels, PathFilter filter) throws IOException {
+ String lastRelDir = null;
+ HashSet<Path> results = new HashSet<Path>();
+ String relRoot = Path.getPathWithoutSchemeAndAuthority(path).toString();
+ if (!relRoot.endsWith(Path.SEPARATOR)) {
+ relRoot += Path.SEPARATOR;
+ }
+ RemoteIterator<LocatedFileStatus> allFiles = fs.listFiles(path, true);
+ while (allFiles.hasNext()) {
+ LocatedFileStatus lfs = allFiles.next();
+ Path dirPath = Path.getPathWithoutSchemeAndAuthority(lfs.getPath());
+ String dir = dirPath.toString();
+ if (!dir.startsWith(relRoot)) {
+ throw new IOException("Path " + lfs.getPath() + " is not under " + relRoot
+ + " (when shortened to " + dir + ")");
+ }
+ String subDir = dir.substring(relRoot.length());
+ Utilities.LOG14535.info("Looking at " + subDir + " from " + lfs.getPath());
+ // If sorted, we'll skip a bunch of files.
+ if (lastRelDir != null && subDir.startsWith(lastRelDir)) continue;
+ int startIx = skipLevels > 0 ? -1 : 0;
+ for (int i = 0; i < skipLevels; ++i) {
+ startIx = subDir.indexOf(Path.SEPARATOR_CHAR, startIx + 1);
+ if (startIx == -1) {
+ Utilities.LOG14535.info("Expected level of nesting (" + skipLevels + ") is not "
+ + " present in " + subDir + " (from " + lfs.getPath() + ")");
+ break;
+ }
+ }
+ if (startIx == -1) continue;
+ int endIx = subDir.indexOf(Path.SEPARATOR_CHAR, startIx + 1);
+ if (endIx == -1) {
+ Utilities.LOG14535.info("Expected level of nesting (" + (skipLevels + 1) + ") is not "
+ + " present in " + subDir + " (from " + lfs.getPath() + ")");
+ continue;
+ }
+ lastRelDir = subDir = subDir.substring(0, endIx);
+ Path candidate = new Path(relRoot, subDir);
+ Utilities.LOG14535.info("Considering MM directory candidate " + candidate);
+ if (!filter.accept(candidate)) continue;
+ results.add(fs.makeQualified(candidate));
+ }
+ return results.toArray(new Path[results.size()]);
+ }
+
+ private static Path[] getMmDirectoryCandidatesGlobStatus(FileSystem fs,
+ Path path, int skipLevels, PathFilter filter, long mmWriteId) throws IOException {
+ StringBuilder sb = new StringBuilder(path.toUri().getPath());
+ for (int i = 0; i < skipLevels; i++) {
+ sb.append(Path.SEPARATOR).append("*");
+ }
+ sb.append(Path.SEPARATOR).append(ValidWriteIds.getMmFilePrefix(mmWriteId));
+ Path pathPattern = new Path(path, sb.toString());
+ Utilities.LOG14535.info("Looking for files via: " + pathPattern);
+ return statusToPath(fs.globStatus(pathPattern, filter));
+ }
+
+ private static void tryDeleteAllMmFiles(FileSystem fs, Path specPath, Path manifestDir,
+ int dpLevels, int lbLevels, String unionSuffix, ValidWriteIds.IdPathFilter filter,
+ long mmWriteId, Configuration conf) throws IOException {
+ Path[] files = getMmDirectoryCandidates(
+ fs, specPath, dpLevels, lbLevels, filter, mmWriteId, conf);
+ if (files != null) {
+ for (Path path : files) {
+ Utilities.LOG14535.info("Deleting " + path + " on failure");
+ tryDelete(fs, path);
+ }
+ }
+ Utilities.LOG14535.info("Deleting " + manifestDir + " on failure");
+ fs.delete(manifestDir, true);
+ }
+
+
+ public static void writeMmCommitManifest(List<Path> commitPaths, Path specPath, FileSystem fs,
+ String taskId, Long mmWriteId, String unionSuffix) throws HiveException {
+ if (commitPaths.isEmpty()) return;
+ // We assume one FSOP per task (per specPath), so we create it in specPath.
+ Path manifestPath = getManifestDir(specPath, mmWriteId, unionSuffix);
+ manifestPath = new Path(manifestPath, taskId + MANIFEST_EXTENSION);
+ Utilities.LOG14535.info("Writing manifest to " + manifestPath + " with " + commitPaths);
+ try {
+ // Don't overwrite the manifest... should fail if we have collisions.
+ try (FSDataOutputStream out = fs.create(manifestPath, false)) {
+ if (out == null) {
+ throw new HiveException("Failed to create manifest at " + manifestPath);
+ }
+ out.writeInt(commitPaths.size());
+ for (Path path : commitPaths) {
+ out.writeUTF(path.toString());
+ }
+ }
+ } catch (IOException e) {
+ throw new HiveException(e);
+ }
+ }
+
+ private static Path getManifestDir(Path specPath, long mmWriteId, String unionSuffix) {
+ Path manifestPath = new Path(specPath, "_tmp." + ValidWriteIds.getMmFilePrefix(mmWriteId));
+ return (unionSuffix == null) ? manifestPath : new Path(manifestPath, unionSuffix);
+ }
+
+ public static final class MissingBucketsContext {
+ public final TableDesc tableInfo;
+ public final int numBuckets;
+ public final boolean isCompressed;
+ public MissingBucketsContext(TableDesc tableInfo, int numBuckets, boolean isCompressed) {
+ this.tableInfo = tableInfo;
+ this.numBuckets = numBuckets;
+ this.isCompressed = isCompressed;
+ }
+ }
+
+ public static void handleMmTableFinalPath(Path specPath, String unionSuffix, Configuration hconf,
+ boolean success, int dpLevels, int lbLevels, MissingBucketsContext mbc, long mmWriteId,
+ Reporter reporter, boolean isMmCtas) throws IOException, HiveException {
+ FileSystem fs = specPath.getFileSystem(hconf);
+ Path manifestDir = getManifestDir(specPath, mmWriteId, unionSuffix);
+ if (!success) {
+ ValidWriteIds.IdPathFilter filter = new ValidWriteIds.IdPathFilter(mmWriteId, true);
+ tryDeleteAllMmFiles(fs, specPath, manifestDir, dpLevels, lbLevels,
+ unionSuffix, filter, mmWriteId, hconf);
+ return;
+ }
+
+ Utilities.LOG14535.info("Looking for manifests in: " + manifestDir + " (" + mmWriteId + ")");
+ // TODO# may be wrong if there are no splits (empty insert/CTAS)
+ List<Path> manifests = new ArrayList<>();
+ if (fs.exists(manifestDir)) {
+ FileStatus[] manifestFiles = fs.listStatus(manifestDir);
+ if (manifestFiles != null) {
+ for (FileStatus status : manifestFiles) {
+ Path path = status.getPath();
+ if (path.getName().endsWith(MANIFEST_EXTENSION)) {
+ Utilities.LOG14535.info("Reading manifest " + path);
+ manifests.add(path);
+ }
+ }
+ }
+ } else {
+ Utilities.LOG14535.info("No manifests found - query produced no output");
+ manifestDir = null;
+ }
+
+ Utilities.LOG14535.info("Looking for files in: " + specPath);
+ ValidWriteIds.IdPathFilter filter = new ValidWriteIds.IdPathFilter(mmWriteId, true);
+ if (isMmCtas && !fs.exists(specPath)) {
+ // TODO: do we also need to do this when creating an empty partition from select?
+ Utilities.LOG14535.info("Creating table directory for CTAS with no output at " + specPath);
+ FileUtils.mkdir(fs, specPath, hconf);
+ }
+ Path[] files = getMmDirectoryCandidates(
+ fs, specPath, dpLevels, lbLevels, filter, mmWriteId, hconf);
+ ArrayList<Path> mmDirectories = new ArrayList<>();
+ if (files != null) {
+ for (Path path : files) {
+ Utilities.LOG14535.info("Looking at path: " + path);
+ mmDirectories.add(path);
+ }
+ }
+
+ HashSet<String> committed = new HashSet<>();
+ for (Path mfp : manifests) {
+ try (FSDataInputStream mdis = fs.open(mfp)) {
+ int fileCount = mdis.readInt();
+ for (int i = 0; i < fileCount; ++i) {
+ String nextFile = mdis.readUTF();
+ if (!committed.add(nextFile)) {
+ throw new HiveException(nextFile + " was specified in multiple manifests");
+ }
+ }
+ }
+ }
+
+ if (manifestDir != null) {
+ Utilities.LOG14535.info("Deleting manifest directory " + manifestDir);
+ tryDelete(fs, manifestDir);
+ if (unionSuffix != null) {
+ // Also delete the parent directory if we are the last union FSOP to execute.
+ manifestDir = manifestDir.getParent();
+ FileStatus[] remainingFiles = fs.listStatus(manifestDir);
+ if (remainingFiles == null || remainingFiles.length == 0) {
+ Utilities.LOG14535.info("Deleting manifest directory " + manifestDir);
+ tryDelete(fs, manifestDir);
+ }
+ }
+ }
+
+ for (Path path : mmDirectories) {
+ cleanMmDirectory(path, fs, unionSuffix, committed);
+ }
+
+ if (!committed.isEmpty()) {
+ throw new HiveException("The following files were committed but not found: " + committed);
+ }
+
+ if (mmDirectories.isEmpty()) return;
+
+ // TODO: see HIVE-14886 - removeTempOrDuplicateFiles is broken for list bucketing,
+ // so maintain parity here by not calling it at all.
+ if (lbLevels != 0) return;
+ // Create fake file statuses to avoid querying the file system. removeTempOrDuplicateFiles
+ // doesn't need tocheck anything except path and directory status for MM directories.
+ FileStatus[] finalResults = new FileStatus[mmDirectories.size()];
+ for (int i = 0; i < mmDirectories.size(); ++i) {
+ finalResults[i] = new PathOnlyFileStatus(mmDirectories.get(i));
+ }
+ List<Path> emptyBuckets = Utilities.removeTempOrDuplicateFiles(
+ fs, finalResults, dpLevels, mbc == null ? 0 : mbc.numBuckets, hconf, mmWriteId);
+ // create empty buckets if necessary
+ if (emptyBuckets.size() > 0) {
+ assert mbc != null;
+ Utilities.createEmptyBuckets(hconf, emptyBuckets, mbc.isCompressed, mbc.tableInfo, reporter);
+ }
+ }
+
+ private static final class PathOnlyFileStatus extends FileStatus {
+ public PathOnlyFileStatus(Path path) {
+ super(0, true, 0, 0, 0, path);
+ }
+ }
+
+ private static void cleanMmDirectory(Path dir, FileSystem fs,
+ String unionSuffix, HashSet<String> committed) throws IOException, HiveException {
+ for (FileStatus child : fs.listStatus(dir)) {
+ Path childPath = child.getPath();
+ if (unionSuffix == null) {
+ if (committed.remove(childPath.toString())) continue; // A good file.
+ deleteUncommitedFile(childPath, fs);
+ } else if (!child.isDirectory()) {
+ if (committed.contains(childPath.toString())) {
+ throw new HiveException("Union FSOP has commited "
+ + childPath + " outside of union directory" + unionSuffix);
+ }
+ deleteUncommitedFile(childPath, fs);
+ } else if (childPath.getName().equals(unionSuffix)) {
+ // Found the right union directory; treat it as "our" MM directory.
+ cleanMmDirectory(childPath, fs, null, committed);
+ } else {
+ Utilities.LOG14535.info("FSOP for " + unionSuffix
+ + " is ignoring the other side of the union " + childPath.getName());
+ }
+ }
+ }
+
+ private static void deleteUncommitedFile(Path childPath, FileSystem fs)
+ throws IOException, HiveException {
+ Utilities.LOG14535.info("Deleting " + childPath + " that was not committed");
+ // We should actually succeed here - if we fail, don't commit the query.
+ if (!fs.delete(childPath, true)) {
+ throw new HiveException("Failed to delete an uncommitted path " + childPath);
+ }
+ }
+
+ /**
+ * @return the complete list of valid MM directories under a table/partition path; null
+ * if the entire directory is valid (has no uncommitted/temporary files).
+ */
+ public static List<Path> getValidMmDirectoriesFromTableOrPart(Path path, Configuration conf,
+ ValidWriteIds ids, int lbLevels) throws IOException {
+ Utilities.LOG14535.info("Looking for valid MM paths under " + path);
+ // NULL means this directory is entirely valid.
+ List<Path> result = null;
+ FileSystem fs = path.getFileSystem(conf);
+ FileStatus[] children = (lbLevels == 0) ? fs.listStatus(path)
+ : fs.globStatus(new Path(path, StringUtils.repeat("*" + Path.SEPARATOR, lbLevels) + "*"));
+ for (int i = 0; i < children.length; ++i) {
+ FileStatus file = children[i];
+ Path childPath = file.getPath();
+ Long writeId = ValidWriteIds.extractWriteId(childPath);
+ if (!file.isDirectory() || writeId == null || !ids.isValid(writeId)) {
+ Utilities.LOG14535.info("Skipping path " + childPath);
+ if (result == null) {
+ result = new ArrayList<>(children.length - 1);
+ for (int j = 0; j < i; ++j) {
+ result.add(children[j].getPath());
+ }
+ }
+ } else if (result != null) {
+ result.add(childPath);
+ }
+ }
+ return result;
+ }
+
+ public static String getAclStringWithHiveModification(Configuration tezConf,
+ String propertyName,
+ boolean addHs2User,
+ String user,
+ String hs2User) throws
+ IOException {
+
+ // Start with initial ACLs
+ ACLConfigurationParser aclConf =
+ new ACLConfigurationParser(tezConf, propertyName);
+
+ // Always give access to the user
+ aclConf.addAllowedUser(user);
+
+ // Give access to the process user if the config is set.
+ if (addHs2User && hs2User != null) {
+ aclConf.addAllowedUser(hs2User);
+ }
+ return aclConf.toAclString();
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/2014ece9/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2014ece9/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2014ece9/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2014ece9/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java
----------------------------------------------------------------------
[02/50] [abbrv] hive git commit: HIVE-15900 : Beeline prints tez job
progress in stdout instead of stderr (Thejas Nair, reviewed by Daniel Dai,
Anishek Agarwal)
Posted by se...@apache.org.
HIVE-15900 : Beeline prints tez job progress in stdout instead of stderr (Thejas Nair, reviewed by Daniel Dai, Anishek Agarwal)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/60a36d12
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/60a36d12
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/60a36d12
Branch: refs/heads/hive-14535
Commit: 60a36d124d437bea123c94d50ccb171e24ff2f3d
Parents: 6e652a3
Author: Thejas M Nair <th...@hortonworks.com>
Authored: Thu Feb 16 11:43:45 2017 -0800
Committer: Thejas M Nair <th...@hortonworks.com>
Committed: Thu Feb 16 11:43:52 2017 -0800
----------------------------------------------------------------------
.../java/org/apache/hive/beeline/Commands.java | 2 +-
.../org/apache/hive/jdbc/miniHS2/MiniHS2.java | 2 +
.../hive/beeline/TestBeeLineWithArgs.java | 169 +++++++++++++------
.../TestOperationLoggingAPIWithTez.java | 2 -
4 files changed, 118 insertions(+), 57 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/60a36d12/beeline/src/java/org/apache/hive/beeline/Commands.java
----------------------------------------------------------------------
diff --git a/beeline/src/java/org/apache/hive/beeline/Commands.java b/beeline/src/java/org/apache/hive/beeline/Commands.java
index 99db643..6a3ad42 100644
--- a/beeline/src/java/org/apache/hive/beeline/Commands.java
+++ b/beeline/src/java/org/apache/hive/beeline/Commands.java
@@ -985,7 +985,7 @@ public class Commands {
logThread.start();
if (stmnt instanceof HiveStatement) {
((HiveStatement) stmnt).setInPlaceUpdateStream(
- new BeelineInPlaceUpdateStream(beeLine.getOutputStream())
+ new BeelineInPlaceUpdateStream(beeLine.getErrorStream())
);
}
hasResults = stmnt.execute(sql);
http://git-wip-us.apache.org/repos/asf/hive/blob/60a36d12/itests/hive-unit/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java b/itests/hive-unit/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java
index e641253..71f9640 100644
--- a/itests/hive-unit/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java
+++ b/itests/hive-unit/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java
@@ -222,6 +222,8 @@ public class MiniHS2 extends AbstractHiveService {
// Initialize the execution engine based on cluster type
switch (miniClusterType) {
case TEZ:
+ // Change the engine to tez
+ hiveConf.setVar(ConfVars.HIVE_EXECUTION_ENGINE, "tez");
// TODO: This should be making use of confDir to load configs setup for Tez, etc.
mr = ShimLoader.getHadoopShims().getMiniTezCluster(hiveConf, 2, uriString, false);
break;
http://git-wip-us.apache.org/repos/asf/hive/blob/60a36d12/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java b/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java
index 9e99a91..8fe3789 100644
--- a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java
+++ b/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java
@@ -36,11 +36,16 @@ import java.sql.Statement;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hive.jdbc.Utils;
import org.apache.hive.jdbc.miniHS2.MiniHS2;
+import org.apache.hive.jdbc.miniHS2.MiniHS2.MiniClusterType;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
@@ -51,11 +56,15 @@ import org.junit.Test;
*
*/
public class TestBeeLineWithArgs {
+ private enum OutStream {
+ ERR, OUT
+ };
+
// Default location of HiveServer2
private static final String tableName = "TestBeelineTable1";
private static final String tableComment = "Test table comment";
-
private static MiniHS2 miniHS2;
+ private static final String userName = System.getProperty("user.name");
private List<String> getBaseArgs(String jdbcUrl) {
List<String> argList = new ArrayList<String>(8);
@@ -63,6 +72,8 @@ public class TestBeeLineWithArgs {
argList.add(BeeLine.BEELINE_DEFAULT_JDBC_DRIVER);
argList.add("-u");
argList.add(jdbcUrl);
+ argList.add("-n");
+ argList.add(userName);
return argList;
}
/**
@@ -71,11 +82,15 @@ public class TestBeeLineWithArgs {
@BeforeClass
public static void preTests() throws Exception {
HiveConf hiveConf = new HiveConf();
- // Set to non-zk lock manager to prevent HS2 from trying to connect
- hiveConf.setVar(HiveConf.ConfVars.HIVE_LOCK_MANAGER, "org.apache.hadoop.hive.ql.lockmgr.EmbeddedLockManager");
+ hiveConf.setVar(HiveConf.ConfVars.HIVE_LOCK_MANAGER,
+ "org.apache.hadoop.hive.ql.lockmgr.EmbeddedLockManager");
hiveConf.setBoolVar(HiveConf.ConfVars.HIVEOPTIMIZEMETADATAQUERIES, false);
- miniHS2 = new MiniHS2(hiveConf);
- miniHS2.start(new HashMap<String, String>());
+ hiveConf.set(ConfVars.HIVE_SERVER2_LOGGING_OPERATION_LEVEL.varname, "verbose");
+ miniHS2 = new MiniHS2(hiveConf, MiniClusterType.TEZ);
+
+ Map<String, String> confOverlay = new HashMap<String, String>();
+ miniHS2.start(confOverlay);
+
createTable();
}
@@ -86,7 +101,8 @@ public class TestBeeLineWithArgs {
*/
private static void createTable() throws ClassNotFoundException, SQLException {
Class.forName(BeeLine.BEELINE_DEFAULT_JDBC_DRIVER);
- Connection con = DriverManager.getConnection(miniHS2.getBaseJdbcURL(),"", "");
+ Connection con = DriverManager.getConnection(miniHS2.getBaseJdbcURL(),
+ userName , "");
assertNotNull("Connection is null", con);
assertFalse("Connection should not be closed", con.isClosed());
@@ -128,16 +144,27 @@ public class TestBeeLineWithArgs {
/**
* Execute a script with "beeline -f or -i"
- *
+ * @param argList List of arguments for beeline
+ * @param inputStream input stream if any
+ * @param streamType if output from STDERR or STDOUT needs to be returned
* @return The stderr and stdout from running the script
+ * @throws Throwable
*/
- private String testCommandLineScript(List<String> argList, InputStream inputStream)
+ private String testCommandLineScript(List<String> argList, InputStream inputStream, OutStream streamType)
throws Throwable {
BeeLine beeLine = new BeeLine();
ByteArrayOutputStream os = new ByteArrayOutputStream();
PrintStream beelineOutputStream = new PrintStream(os);
- beeLine.setOutputStream(beelineOutputStream);
- beeLine.setErrorStream(beelineOutputStream);
+ switch (streamType) {
+ case OUT:
+ beeLine.setOutputStream(beelineOutputStream);
+ break;
+ case ERR:
+ beeLine.setErrorStream(beelineOutputStream);
+ break;
+ default:
+ throw new RuntimeException("Unexpected outstream type " + streamType);
+ }
String[] args = argList.toArray(new String[argList.size()]);
beeLine.begin(args, inputStream);
String output = os.toString("UTF8");
@@ -147,33 +174,53 @@ public class TestBeeLineWithArgs {
}
/**
+ * Attempt to execute a simple script file with the -f and -i option to
+ * BeeLine to test for presence of an expected pattern in the output (stdout
+ * or stderr), fail if not found. Print PASSED or FAILED
+ *
+ * @param expectedRegex
+ * Text to look for in command output (stdout)
+ * @param shouldMatch
+ * true if the pattern should be found, false if it should not
+ * @throws Exception
+ * on command execution error
+ */
+ private void testScriptFile(String scriptText, String expectedRegex,
+ boolean shouldMatch, List<String> argList) throws Throwable {
+ testScriptFile(scriptText, expectedRegex, shouldMatch, argList, true, true, OutStream.OUT);
+ }
+
+ /**
* Attempt to execute a simple script file with the -f and -i option
* to BeeLine to test for presence of an expected pattern
* in the output (stdout or stderr), fail if not found.
* Print PASSED or FAILED
- * @param expectedPattern Text to look for in command output/error
+ * @param expectedRegex Text to look for in command output (stdout)
* @param shouldMatch true if the pattern should be found, false if it should not
- * @throws Exception on command execution error
+ * @param argList arguments
+ * @param outType output stream type
+ * @throws Throwable
*/
- private void testScriptFile(String scriptText, String expectedPattern,
- boolean shouldMatch, List<String> argList) throws Throwable {
- testScriptFile(scriptText, expectedPattern, shouldMatch, argList, true, true);
+ private void testScriptFile(String scriptText, String expectedRegex,
+ boolean shouldMatch, List<String> argList, OutStream outType) throws Throwable {
+ testScriptFile(scriptText, expectedRegex, shouldMatch, argList, true, true, outType);
}
-
+
/**
* Attempt to execute a simple script file with the -f or -i option
* to BeeLine (or both) to test for presence of an expected pattern
* in the output (stdout or stderr), fail if not found.
* Print PASSED or FAILED
- * @param expectedPattern Text to look for in command output/error
+ * @param expectedRegex Text to look for in command output/error
* @param shouldMatch true if the pattern should be found, false if it should not
* @param testScript Whether we should test -f
* @param testInit Whether we should test -i
+ * @param streamType Whether match should be done against STDERR or STDOUT
* @throws Exception on command execution error
*/
- private void testScriptFile(String scriptText, String expectedPattern,
+ private void testScriptFile(String scriptText, String expectedRegex,
boolean shouldMatch, List<String> argList,
- boolean testScript, boolean testInit) throws Throwable {
+ boolean testScript, boolean testInit, OutStream streamType) throws Throwable {
// Put the script content in a temp file
File scriptFile = File.createTempFile(this.getClass().getSimpleName(), "temp");
@@ -183,17 +230,20 @@ public class TestBeeLineWithArgs {
os.print(scriptText);
os.close();
+ Pattern expectedPattern = Pattern.compile(".*" + expectedRegex + ".*", Pattern.DOTALL);
if (testScript) {
List<String> copy = new ArrayList<String>(argList);
copy.add("-f");
copy.add(scriptFile.getAbsolutePath());
- String output = testCommandLineScript(copy, null);
- boolean matches = output.contains(expectedPattern);
+ String output = testCommandLineScript(copy, null, streamType);
+
+ Matcher m = expectedPattern.matcher(output);
+ boolean matches = m.matches();
if (shouldMatch != matches) {
//failed
fail("Output" + output + " should" + (shouldMatch ? "" : " not") +
- " contain " + expectedPattern);
+ " contain " + expectedRegex);
}
}
@@ -205,12 +255,13 @@ public class TestBeeLineWithArgs {
copy.add("-i");
copy.add(scriptFile.getAbsolutePath());
- String output = testCommandLineScript(copy, new StringBufferInputStream("!quit\n"));
- boolean matches = output.contains(expectedPattern);
+ String output = testCommandLineScript(copy, new StringBufferInputStream("!quit\n"), streamType);
+ Matcher m = expectedPattern.matcher(output);
+ boolean matches = m.matches();
if (shouldMatch != matches) {
//failed
fail("Output" + output + " should" + (shouldMatch ? "" : " not") +
- " contain " + expectedPattern);
+ " contain " + expectedRegex);
}
}
scriptFile.delete();
@@ -225,14 +276,15 @@ public class TestBeeLineWithArgs {
* @param shouldMatch true if the pattern should be found, false if it should not
* @throws Exception on command execution error
*/
+
private void testCommandEnclosedQuery(String enclosedQuery, String expectedPattern,
- boolean shouldMatch, List<String> argList) throws Throwable {
+ boolean shouldMatch, List<String> argList, OutStream out) throws Throwable {
List<String> copy = new ArrayList<String>(argList);
copy.add("-e");
copy.add(enclosedQuery);
- String output = testCommandLineScript(copy, null);
+ String output = testCommandLineScript(copy, null, out);
boolean matches = output.contains(expectedPattern);
if (shouldMatch != matches) {
//failed
@@ -290,7 +342,7 @@ public class TestBeeLineWithArgs {
List<String> argList = getBaseArgs(miniHS2.getBaseJdbcURL());
argList.add("--hivevar");
argList.add("DUMMY_TBL=dummy");
- final String SCRIPT_TEXT = "create table ${DUMMY_TBL} (d int);\nshow tables;\n";
+ final String SCRIPT_TEXT = "create table ${DUMMY_TBL} (d int);\nshow tables;\n drop table ${DUMMY_TBL};";
final String EXPECTED_PATTERN = "dummy";
testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, true, argList);
}
@@ -300,7 +352,8 @@ public class TestBeeLineWithArgs {
List<String> argList = getBaseArgs(miniHS2.getBaseJdbcURL());
argList.add("--hiveconf");
argList.add("test.hive.table.name=dummy");
- final String SCRIPT_TEXT = "create table ${hiveconf:test.hive.table.name} (d int);\nshow tables;\n";
+ final String SCRIPT_TEXT = "create table ${hiveconf:test.hive.table.name} (d int);\nshow tables;\n"
+ + " drop table ${hiveconf:test.hive.table.name};\n";
final String EXPECTED_PATTERN = "dummy";
testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, true, argList);
}
@@ -327,7 +380,9 @@ public class TestBeeLineWithArgs {
argList.add("--hiveconf");
argList.add("COLUMN_TYPE=int");
- final String SCRIPT_TEXT = "${COMMAND} ${OBJECT} ${TABLE_NAME} (${hiveconf:COLUMN_NAME} ${hiveconf:COLUMN_TYPE});\nshow tables;\n";
+ final String SCRIPT_TEXT = "${COMMAND} ${OBJECT} ${TABLE_NAME} "
+ + "(${hiveconf:COLUMN_NAME} ${hiveconf:COLUMN_TYPE});"
+ + "\nshow tables;\n drop ${OBJECT} ${TABLE_NAME};\n";
final String EXPECTED_PATTERN = "dummy2";
testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, true, argList);
}
@@ -348,7 +403,8 @@ public class TestBeeLineWithArgs {
@Test
public void testTabInScriptFile() throws Throwable {
List<String> argList = getBaseArgs(miniHS2.getBaseJdbcURL());
- final String SCRIPT_TEXT = "CREATE\tTABLE IF NOT EXISTS testTabInScriptFile\n(id\tint);\nSHOW TABLES;";
+ final String SCRIPT_TEXT = "CREATE\tTABLE IF NOT EXISTS testTabInScriptFile\n(id\tint);\nSHOW TABLES;"
+ + "\ndrop table testTabInScriptFile";
final String EXPECTED_PATTERN = "testTabInScriptFile";
testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, true, argList);
}
@@ -537,7 +593,7 @@ public class TestBeeLineWithArgs {
argList.add("--outputformat=tsv");
final String EXPECTED_PATTERN = "Format tsv is deprecated, please use tsv2";
- testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, true, argList);
+ testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, true, argList, OutStream.ERR);
}
/**
@@ -551,7 +607,7 @@ public class TestBeeLineWithArgs {
argList.add("--outputformat=csv");
final String EXPECTED_PATTERN = "Format csv is deprecated, please use csv2";
- testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, true, argList);
+ testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, true, argList, true, true, OutStream.ERR);
}
/**
@@ -612,7 +668,7 @@ public class TestBeeLineWithArgs {
argList.add(scriptFile.getAbsolutePath());
try {
- String output = testCommandLineScript(argList, null);
+ String output = testCommandLineScript(argList, null, OutStream.OUT);
if (output.contains(EXPECTED_PATTERN)) {
fail("Output: " + output + " Negative pattern: " + EXPECTED_PATTERN);
}
@@ -651,7 +707,7 @@ public class TestBeeLineWithArgs {
@Test
public void testHiveVarSubstitution() throws Throwable {
List<String> argList = getBaseArgs(miniHS2.getBaseJdbcURL() + "#D_TBL=dummy_t");
- final String SCRIPT_TEXT = "create table ${D_TBL} (d int);\nshow tables;\n";
+ final String SCRIPT_TEXT = "create table ${D_TBL} (d int);\nshow tables;\ndrop table ${D_TBL};\n";
final String EXPECTED_PATTERN = "dummy_t";
testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, true, argList);
}
@@ -665,7 +721,7 @@ public class TestBeeLineWithArgs {
// Set to non-zk lock manager to avoid trying to connect to zookeeper
final String SCRIPT_TEXT =
"set hive.lock.manager=org.apache.hadoop.hive.ql.lockmgr.EmbeddedLockManager;\n" +
- "create table ${DUMMY_TBL} (d int);\nshow tables;\n";
+ "create table ${DUMMY_TBL} (d int);\nshow tables;\n drop table ${DUMMY_TBL};\n";
final String EXPECTED_PATTERN = "embedded_table";
testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, true, argList);
}
@@ -678,8 +734,10 @@ public class TestBeeLineWithArgs {
public void testQueryProgress() throws Throwable {
final String SCRIPT_TEXT = "set hive.support.concurrency = false;\n" +
"select count(*) from " + tableName + ";\n";
- final String EXPECTED_PATTERN = "number of splits";
- testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, true, getBaseArgs(miniHS2.getBaseJdbcURL()));
+ // Check for part of log message as well as part of progress information
+ final String EXPECTED_PATTERN = "Number of reducers determined to be.*ELAPSED TIME";
+ testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, true, getBaseArgs(miniHS2.getBaseJdbcURL()),
+ OutStream.ERR);
}
/**
@@ -692,8 +750,10 @@ public class TestBeeLineWithArgs {
final String SCRIPT_TEXT = "set hive.support.concurrency = false;\n" +
"set hive.exec.parallel = true;\n" +
"select count(*) from " + tableName + ";\n";
- final String EXPECTED_PATTERN = "number of splits";
- testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, true, getBaseArgs(miniHS2.getBaseJdbcURL()));
+ // Check for part of log message as well as part of progress information
+ final String EXPECTED_PATTERN = "Number of reducers determined to be.*ELAPSED TIME";
+ testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, true, getBaseArgs(miniHS2.getBaseJdbcURL()),
+ OutStream.ERR);
}
/**
@@ -706,7 +766,7 @@ public class TestBeeLineWithArgs {
"!set silent true\n" +
"select count(*) from " + tableName + ";\n";
final String EXPECTED_PATTERN = "Executing command";
- testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, false, getBaseArgs(miniHS2.getBaseJdbcURL()));
+ testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, false, getBaseArgs(miniHS2.getBaseJdbcURL()), OutStream.ERR);
}
@Test
@@ -727,10 +787,10 @@ public class TestBeeLineWithArgs {
+"(key int);show tables; --multicommands in one line";
final String EXPECTED_PATTERN = " multicmdtbl ";
List<String> argList = getBaseArgs(miniHS2.getBaseJdbcURL());
- testCommandEnclosedQuery(QUERY_TEXT, EXPECTED_PATTERN, true, argList);
+ testCommandEnclosedQuery(QUERY_TEXT, EXPECTED_PATTERN, true, argList, OutStream.OUT);
final String QUERY_TEXT_DROP = "drop table multiCmdTbl;show tables;";
- testCommandEnclosedQuery(QUERY_TEXT_DROP, EXPECTED_PATTERN, false, argList);
+ testCommandEnclosedQuery(QUERY_TEXT_DROP, EXPECTED_PATTERN, false, argList, OutStream.OUT);
}
@Test
@@ -765,10 +825,10 @@ public class TestBeeLineWithArgs {
+ " TERMINATED BY '\\n';show tables;";
final String EXPECTED_PATTERN = " multicmdtbl ";
List<String> argList = getBaseArgs(miniHS2.getBaseJdbcURL());
- testCommandEnclosedQuery(QUERY_TEXT, EXPECTED_PATTERN, true, argList);
+ testCommandEnclosedQuery(QUERY_TEXT, EXPECTED_PATTERN, true, argList, OutStream.OUT);
final String QUERY_TEXT_DROP = "drop table multiCmdTbl;show tables;";
- testCommandEnclosedQuery(QUERY_TEXT_DROP, EXPECTED_PATTERN, false, argList);
+ testCommandEnclosedQuery(QUERY_TEXT_DROP, EXPECTED_PATTERN, false, argList, OutStream.OUT);
}
@Test
@@ -779,9 +839,10 @@ public class TestBeeLineWithArgs {
final String SCRIPT_TEXT = "set hive.lock.manager=org.apache.hadoop.hive.ql.lockmgr.EmbeddedLockManager;\n"
+ "set hive.compute.query.using.stats=false;\n"
+ "create table if not exists embeddedBeelineOutputs(d int);\n"
- + "set a=1;\nselect count(*) from embeddedBeelineOutputs;\n";
+ + "set a=1;\nselect count(*) from embeddedBeelineOutputs;\n"
+ + "drop table embeddedBeelineOutputs;\n";
final String EXPECTED_PATTERN = "Stage-1 map =";
- testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, true, argList);
+ testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, true, argList, OutStream.ERR);
}
@Test
@@ -806,7 +867,7 @@ public class TestBeeLineWithArgs {
argList.add(BeeLine.BEELINE_DEFAULT_JDBC_DRIVER);
final String SCRIPT_TEXT =
- "create table blueconnecttest (d int);\nshow tables;\n";
+ "create table blueconnecttest (d int);\nshow tables;\ndrop table blueconnecttest;\n";
final String EXPECTED_PATTERN = "blueconnecttest";
// We go through these hijinxes because java considers System.getEnv
@@ -826,7 +887,7 @@ public class TestBeeLineWithArgs {
};
BeeLineOpts.setEnv(newEnv);
- testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, true, argList, true, false);
+ testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, true, argList, true, false, OutStream.OUT);
}
/**
@@ -839,10 +900,10 @@ public class TestBeeLineWithArgs {
final String SCRIPT_TEXT =
"!close\n" +
"!reconnect\n\n\n" +
- "create table reconnecttest (d int);\nshow tables;\n";
+ "create table reconnecttest (d int);\nshow tables;\ndrop table reconnecttest;\n";
final String EXPECTED_PATTERN = "reconnecttest";
- testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, true, argList, true, false);
+ testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, true, argList, true, false, OutStream.OUT);
}
@@ -894,7 +955,7 @@ public class TestBeeLineWithArgs {
*/
@Test
public void testShowDbInPrompt() throws Throwable {
- final String EXPECTED_PATTERN = " (default)>";
+ final String EXPECTED_PATTERN = " \\(default\\)>";
List<String> argList = new ArrayList<String>();
argList.add("--showDbInPrompt");
argList.add("-u");
@@ -909,7 +970,7 @@ public class TestBeeLineWithArgs {
List<String> argList = new ArrayList<String>();
final String SCRIPT_TEXT = "!sh echo hello world";
final String EXPECTED_PATTERN = "hello world";
- testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, true, argList,true,false);
+ testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, true, argList, true, false, OutStream.OUT);
}
/**
@@ -924,6 +985,6 @@ public class TestBeeLineWithArgs {
final String EXPECTED_PATTERN = "2 rows selected";
List<String> argList = getBaseArgs(miniHS2.getBaseJdbcURL());
argList.add("--force");
- testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, true, argList);
+ testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, true, argList, OutStream.ERR);
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/60a36d12/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithTez.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithTez.java b/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithTez.java
index e98406d..388486d 100644
--- a/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithTez.java
+++ b/itests/hive-unit/src/test/java/org/apache/hive/service/cli/operation/TestOperationLoggingAPIWithTez.java
@@ -58,8 +58,6 @@ public class TestOperationLoggingAPIWithTez extends OperationLoggingAPITestBase
};
hiveConf = new HiveConf();
hiveConf.set(ConfVars.HIVE_SERVER2_LOGGING_OPERATION_LEVEL.varname, "verbose");
- // Change the engine to tez
- hiveConf.setVar(ConfVars.HIVE_EXECUTION_ENGINE, "tez");
// Set tez execution summary to false.
hiveConf.setBoolVar(ConfVars.TEZ_EXEC_SUMMARY, false);
miniHS2 = new MiniHS2(hiveConf, MiniClusterType.TEZ);
[05/50] [abbrv] hive git commit: HIVE-15877 : Upload dependency jars
for druid storage handler (Slim Bouguerra via Ashutosh Chauhan)
Posted by se...@apache.org.
HIVE-15877 : Upload dependency jars for druid storage handler (Slim Bouguerra via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ef61a9bc
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ef61a9bc
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ef61a9bc
Branch: refs/heads/hive-14535
Commit: ef61a9bcfa90bd359e1af028af873fb0ccf51deb
Parents: f3790ce
Author: Slim Bouguerra <sl...@gmail.com>
Authored: Fri Feb 10 11:24:00 2017 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Thu Feb 16 17:11:54 2017 -0800
----------------------------------------------------------------------
.../hadoop/hive/druid/DruidStorageHandler.java | 7 ++-
.../hive/druid/DruidStorageHandlerUtils.java | 64 +++++++++++++++-----
.../hive/metastore/HiveMetaStoreClient.java | 5 +-
3 files changed, 60 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/ef61a9bc/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java
----------------------------------------------------------------------
diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java
index cff0056..d4f6865 100644
--- a/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java
+++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandler.java
@@ -49,6 +49,7 @@ import org.apache.hadoop.hive.conf.Constants;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.druid.io.DruidOutputFormat;
import org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat;
+import org.apache.hadoop.hive.druid.io.DruidRecordWriter;
import org.apache.hadoop.hive.druid.serde.DruidSerDe;
import org.apache.hadoop.hive.metastore.DefaultHiveMetaHook;
import org.apache.hadoop.hive.metastore.HiveMetaHook;
@@ -520,7 +521,11 @@ public class DruidStorageHandler extends DefaultHiveMetaHook implements HiveStor
@Override
public void configureJobConf(TableDesc tableDesc, JobConf jobConf) {
-
+ try {
+ DruidStorageHandlerUtils.addDependencyJars(jobConf, DruidRecordWriter.class);
+ } catch (IOException e) {
+ Throwables.propagate(e);
+ }
}
@Override
http://git-wip-us.apache.org/repos/asf/hive/blob/ef61a9bc/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java
----------------------------------------------------------------------
diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java
index 52e7e8d..8d48e14 100644
--- a/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java
+++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/DruidStorageHandlerUtils.java
@@ -20,7 +20,6 @@ package org.apache.hadoop.hive.druid;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.jsontype.NamedType;
import com.fasterxml.jackson.dataformat.smile.SmileFactory;
-import com.google.common.base.Strings;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Interner;
@@ -28,13 +27,10 @@ import com.google.common.collect.Interners;
import com.google.common.collect.Lists;
import com.google.common.io.CharStreams;
import com.metamx.common.MapUtils;
-import com.metamx.common.lifecycle.Lifecycle;
import com.metamx.emitter.EmittingLogger;
import com.metamx.emitter.core.NoopEmitter;
import com.metamx.emitter.service.ServiceEmitter;
import com.metamx.http.client.HttpClient;
-import com.metamx.http.client.HttpClientConfig;
-import com.metamx.http.client.HttpClientInit;
import com.metamx.http.client.Request;
import com.metamx.http.client.response.InputStreamResponseHandler;
import io.druid.jackson.DefaultObjectMapper;
@@ -51,14 +47,13 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.api.MetaException;
-import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.hive.common.JavaUtils;
+import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.io.retry.RetryPolicies;
import org.apache.hadoop.io.retry.RetryProxy;
+import org.apache.hadoop.util.StringUtils;
import org.jboss.netty.handler.codec.http.HttpHeaders;
import org.jboss.netty.handler.codec.http.HttpMethod;
-import org.joda.time.Period;
import org.skife.jdbi.v2.FoldController;
import org.skife.jdbi.v2.Folder3;
import org.skife.jdbi.v2.Handle;
@@ -67,31 +62,46 @@ import org.skife.jdbi.v2.TransactionCallback;
import org.skife.jdbi.v2.TransactionStatus;
import org.skife.jdbi.v2.tweak.HandleCallback;
import org.skife.jdbi.v2.util.ByteArrayMapper;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.Reader;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
import java.net.InetAddress;
-import java.net.URI;
import java.net.URL;
+import java.net.URLDecoder;
import java.net.UnknownHostException;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import java.util.TimeZone;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipFile;
+
+import static org.apache.hadoop.hive.ql.exec.Utilities.jarFinderGetJar;
/**
* Utils class for Druid storage handler.
*/
public final class DruidStorageHandlerUtils {
+ private static final Logger LOG = LoggerFactory.getLogger(DruidStorageHandlerUtils.class);
+
private static final String SMILE_CONTENT_TYPE = "application/x-jackson-smile";
+
/**
* Mapper to use to serialize/deserialize Druid objects (JSON)
*/
@@ -190,18 +200,17 @@ public final class DruidStorageHandlerUtils {
return response;
}
-
public static String getURL(HttpClient client, URL url) throws IOException {
try (Reader reader = new InputStreamReader(
DruidStorageHandlerUtils.submitRequest(client, new Request(HttpMethod.GET, url)))) {
- return CharStreams.toString(reader);
+ return CharStreams.toString(reader);
}
}
/**
* @param taskDir path to the directory containing the segments descriptor info
- * the descriptor path will be .../workingPath/task_id/{@link DruidStorageHandler#SEGMENTS_DESCRIPTOR_DIR_NAME}/*.json
- * @param conf hadoop conf to get the file system
+ * the descriptor path will be .../workingPath/task_id/{@link DruidStorageHandler#SEGMENTS_DESCRIPTOR_DIR_NAME}/*.json
+ * @param conf hadoop conf to get the file system
*
* @return List of DataSegments
*
@@ -290,7 +299,8 @@ public final class DruidStorageHandlerUtils {
public ArrayList<String> fold(ArrayList<String> druidDataSources,
Map<String, Object> stringObjectMap,
FoldController foldController,
- StatementContext statementContext) throws SQLException {
+ StatementContext statementContext
+ ) throws SQLException {
druidDataSources.add(
MapUtils.getString(stringObjectMap, "datasource")
);
@@ -431,4 +441,30 @@ public final class DruidStorageHandlerUtils {
public interface DataPusher {
long push() throws IOException;
}
+
+ // Thanks, HBase Storage handler
+ public static void addDependencyJars(Configuration conf, Class<?>... classes) throws IOException {
+ FileSystem localFs = FileSystem.getLocal(conf);
+ Set<String> jars = new HashSet<String>();
+ jars.addAll(conf.getStringCollection("tmpjars"));
+ for (Class<?> clazz : classes) {
+ if (clazz == null) {
+ continue;
+ }
+ String path = Utilities.jarFinderGetJar(clazz);
+ if (path == null) {
+ throw new RuntimeException(
+ "Could not find jar for class " + clazz + " in order to ship it to the cluster.");
+ }
+ if (!localFs.exists(new Path(path))) {
+ throw new RuntimeException("Could not validate jar file " + path + " for class " + clazz);
+ }
+ jars.add(path.toString());
+ }
+ if (jars.isEmpty()) {
+ return;
+ }
+ conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[jars.size()])));
+ }
+
}
http://git-wip-us.apache.org/repos/asf/hive/blob/ef61a9bc/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
index c32104f..70f3a6b 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
@@ -739,7 +739,10 @@ public class HiveMetaStoreClient implements IMetaStoreClient {
hook.commitCreateTable(tbl);
}
success = true;
- } finally {
+ } catch (Exception e){
+ LOG.error("Got exception from createTable", e);
+ }
+ finally {
if (!success && (hook != null)) {
hook.rollbackCreateTable(tbl);
}
[22/50] [abbrv] hive git commit: HIVE-15874: Invalid position alias
in Group By when CBO failed (Walter Wu, reviewed by Pengcheng Xiong)
Posted by se...@apache.org.
HIVE-15874: Invalid position alias in Group By when CBO failed (Walter Wu, reviewed by Pengcheng Xiong)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/08ca7b2d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/08ca7b2d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/08ca7b2d
Branch: refs/heads/hive-14535
Commit: 08ca7b2de8e50699b759d9b86eb8daaa59f580e9
Parents: 091ac8e
Author: Pengcheng Xiong <px...@apache.org>
Authored: Mon Feb 20 12:33:24 2017 -0800
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Mon Feb 20 12:33:24 2017 -0800
----------------------------------------------------------------------
.../hadoop/hive/ql/parse/CalcitePlanner.java | 2 +
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 8 +-
ql/src/test/queries/clientpositive/masking_10.q | 25 ++
.../clientpositive/position_alias_test_1.q | 18 ++
.../results/clientpositive/masking_10.q.out | 244 +++++++++++++++++++
.../clientpositive/position_alias_test_1.q.out | 148 +++++++++++
6 files changed, 443 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/08ca7b2d/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index e7687be..10f16ca 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -297,6 +297,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
public RelNode genLogicalPlan(ASTNode ast) throws SemanticException {
LOG.info("Starting generating logical plan");
PreCboCtx cboCtx = new PreCboCtx();
+ //change the location of position alias process here
+ processPositionAlias(ast);
if (!genResolvedParseTree(ast, cboCtx)) {
return null;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/08ca7b2d/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 9c37af8..9eafb0b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -10903,7 +10903,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
ctesExpanded = new ArrayList<String>();
// 1. analyze and process the position alias
- processPositionAlias(ast);
+ // step processPositionAlias out of genResolvedParseTree
// 2. analyze create table command
if (ast.getToken().getType() == HiveParser.TOK_CREATETABLE) {
@@ -11019,6 +11019,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
void analyzeInternal(ASTNode ast, PlannerContext plannerCtx) throws SemanticException {
// 1. Generate Resolved Parse tree from syntax tree
LOG.info("Starting Semantic Analysis");
+ //change the location of position alias process here
+ processPositionAlias(ast);
if (!genResolvedParseTree(ast, plannerCtx)) {
return;
}
@@ -11032,6 +11034,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
if (tree != ast) {
ctx.setSkipTableMasking(true);
init(true);
+ //change the location of position alias process here
+ processPositionAlias(tree);
genResolvedParseTree(tree, plannerCtx);
if (this instanceof CalcitePlanner) {
((CalcitePlanner) this).resetCalciteConfiguration();
@@ -12258,7 +12262,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
}
// Process the position alias in GROUPBY and ORDERBY
- private void processPositionAlias(ASTNode ast) throws SemanticException {
+ public void processPositionAlias(ASTNode ast) throws SemanticException {
boolean isBothByPos = HiveConf.getBoolVar(conf, ConfVars.HIVE_GROUPBY_ORDERBY_POSITION_ALIAS);
boolean isGbyByPos = isBothByPos
|| HiveConf.getBoolVar(conf, ConfVars.HIVE_GROUPBY_POSITION_ALIAS);
http://git-wip-us.apache.org/repos/asf/hive/blob/08ca7b2d/ql/src/test/queries/clientpositive/masking_10.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/masking_10.q b/ql/src/test/queries/clientpositive/masking_10.q
new file mode 100644
index 0000000..e933253
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/masking_10.q
@@ -0,0 +1,25 @@
+set hive.mapred.mode=nonstrict;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+
+drop table masking_test;
+
+create temporary table masking_test as select cast(key as int) as key, value from src;
+
+set hive.groupby.position.alias = true;
+set hive.cbo.enable=true;
+
+explain select 2017 as a, value from masking_test group by 1, 2;
+
+select 2017 as a, value from masking_test group by 1, 2;
+
+explain
+select * from
+ masking_test alias01
+ left join
+ (
+ select 2017 as a, value from masking_test group by 1, 2
+ ) alias02
+ on alias01.key = alias02.a
+ left join
+ masking_test alias03
+on alias01.key = alias03.key;
http://git-wip-us.apache.org/repos/asf/hive/blob/08ca7b2d/ql/src/test/queries/clientpositive/position_alias_test_1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/position_alias_test_1.q b/ql/src/test/queries/clientpositive/position_alias_test_1.q
new file mode 100644
index 0000000..599bc08
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/position_alias_test_1.q
@@ -0,0 +1,18 @@
+create table alias_test_01(a INT, b STRING) ;
+ create table alias_test_02(a INT, b STRING) ;
+ create table alias_test_03(a INT, b STRING) ;
+ set hive.groupby.position.alias = true;
+ set hive.cbo.enable=true;
+
+
+ explain
+ select * from
+ alias_test_01 alias01
+ left join
+ (
+ select 2017 as a, b from alias_test_02 group by 1, 2
+ ) alias02
+ on alias01.a = alias02.a
+ left join
+ alias_test_03 alias03
+ on alias01.a = alias03.a;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/08ca7b2d/ql/src/test/results/clientpositive/masking_10.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/masking_10.q.out b/ql/src/test/results/clientpositive/masking_10.q.out
new file mode 100644
index 0000000..d6293e3
--- /dev/null
+++ b/ql/src/test/results/clientpositive/masking_10.q.out
@@ -0,0 +1,244 @@
+PREHOOK: query: drop table masking_test
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table masking_test
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create temporary table masking_test as select cast(key as int) as key, value from src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@masking_test
+POSTHOOK: query: create temporary table masking_test as select cast(key as int) as key, value from src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@masking_test
+PREHOOK: query: explain select 2017 as a, value from masking_test group by 1, 2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select 2017 as a, value from masking_test group by 1, 2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: masking_test
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((key % 2) = 0) and (key < 10)) (type: boolean)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: reverse(value) (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 2017 (type: int), _col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select 2017 as a, value from masking_test group by 1, 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@masking_test
+#### A masked pattern was here ####
+POSTHOOK: query: select 2017 as a, value from masking_test group by 1, 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@masking_test
+#### A masked pattern was here ####
+2017 0_lav
+2017 2_lav
+2017 4_lav
+2017 8_lav
+Warning: Shuffle Join JOIN[34][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain
+select * from
+ masking_test alias01
+ left join
+ (
+ select 2017 as a, value from masking_test group by 1, 2
+ ) alias02
+ on alias01.key = alias02.a
+ left join
+ masking_test alias03
+on alias01.key = alias03.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select * from
+ masking_test alias01
+ left join
+ (
+ select 2017 as a, value from masking_test group by 1, 2
+ ) alias02
+ on alias01.key = alias02.a
+ left join
+ masking_test alias03
+on alias01.key = alias03.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: masking_test
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((key % 2) = 0) and (key < 10)) (type: boolean)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: reverse(value) (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 2017 (type: int), _col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: masking_test
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((key % 2) = 0) and (key < 10)) (type: boolean)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), reverse(value) (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: string)
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ filter predicates:
+ 0 {(VALUE._col0 = 2017)}
+ 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 3403 Data size: 75629 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 3403 Data size: 75629 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string)
+ TableScan
+ alias: masking_test
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((key % 2) = 0) and (key < 10)) (type: boolean)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), reverse(value) (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 3743 Data size: 83191 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3743 Data size: 83191 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
http://git-wip-us.apache.org/repos/asf/hive/blob/08ca7b2d/ql/src/test/results/clientpositive/position_alias_test_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/position_alias_test_1.q.out b/ql/src/test/results/clientpositive/position_alias_test_1.q.out
new file mode 100644
index 0000000..9053bf1
--- /dev/null
+++ b/ql/src/test/results/clientpositive/position_alias_test_1.q.out
@@ -0,0 +1,148 @@
+PREHOOK: query: create table alias_test_01(a INT, b STRING)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@alias_test_01
+POSTHOOK: query: create table alias_test_01(a INT, b STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@alias_test_01
+PREHOOK: query: create table alias_test_02(a INT, b STRING)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@alias_test_02
+POSTHOOK: query: create table alias_test_02(a INT, b STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@alias_test_02
+PREHOOK: query: create table alias_test_03(a INT, b STRING)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@alias_test_03
+POSTHOOK: query: create table alias_test_03(a INT, b STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@alias_test_03
+PREHOOK: query: explain
+ select * from
+ alias_test_01 alias01
+ left join
+ (
+ select 2017 as a, b from alias_test_02 group by 1, 2
+ ) alias02
+ on alias01.a = alias02.a
+ left join
+ alias_test_03 alias03
+ on alias01.a = alias03.a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+ select * from
+ alias_test_01 alias01
+ left join
+ (
+ select 2017 as a, b from alias_test_02 group by 1, 2
+ ) alias02
+ on alias01.a = alias02.a
+ left join
+ alias_test_03 alias03
+ on alias01.a = alias03.a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: alias_test_02
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: b (type: string)
+ outputColumnNames: b
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ keys: 2017 (type: int), b (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: 2017 (type: int), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: 2017 (type: int), _col1 (type: string)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: 2017 (type: int), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: 2017 (type: int), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col1 (type: string)
+ TableScan
+ alias: alias01
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: a (type: int)
+ sort order: +
+ Map-reduce partition columns: a (type: int)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ value expressions: b (type: string)
+ TableScan
+ alias: alias03
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: a (type: int)
+ sort order: +
+ Map-reduce partition columns: a (type: int)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ value expressions: b (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ Left Outer Join0 to 2
+ keys:
+ 0 a (type: int)
+ 1 _col0 (type: int)
+ 2 a (type: int)
+ outputColumnNames: _col0, _col1, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
[40/50] [abbrv] hive git commit: HIVE-15955: make explain formatted
to include opId and etc (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Posted by se...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/759766ee/ql/src/test/results/clientpositive/vector_outer_join3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_outer_join3.q.out b/ql/src/test/results/clientpositive/vector_outer_join3.q.out
index 49c658b..1d2abee 100644
--- a/ql/src/test/results/clientpositive/vector_outer_join3.q.out
+++ b/ql/src/test/results/clientpositive/vector_outer_join3.q.out
@@ -242,7 +242,7 @@ left outer join small_alltypesorc_a hd
on hd.cstring1 = c.cstring1
) t1
POSTHOOK: type: QUERY
-{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"cint (type: int)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"}}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE
","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"}}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[2, 6]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0
":"_col0 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col1"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true",
"Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","vectorOutput:":"true","native:":"false","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","Not ACID UPDATE or DELETE IS true","No buckets IS true","No TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true
","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","Uniform Hash IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 6]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}},"
Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"Group By Vectorization:":{"vectorOutput:":"false","native:":"false","projectedOutputColumns:":"null"},"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}}
+{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cint (type: int)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","Statist
ics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[2, 6]"},"Stat
istics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col1"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"className:"
:"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","vectorOutput:":"true","native:":"false","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink Vectorization:":{"className
:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","Not ACID UPDATE or DELETE IS true","No buckets IS true","No TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","Uniform Hash IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32","OutputOperators:":"[GBY_15]"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:"
:"[2, 6]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"Group By Vectorization:":{"vectorOutput:":"false","native:":"false","projectedOutputColumns:":"null"},"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Colum
n stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}}
PREHOOK: query: select count(*) from (select c.cstring1
from small_alltypesorc_a c
left outer join small_alltypesorc_a cd
@@ -282,7 +282,7 @@ left outer join small_alltypesorc_a hd
on hd.cstring1 = c.cstring1
) t1
POSTHOOK: type: QUERY
-{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"cstring2 (type: string)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"}}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Colum
n stats: NONE","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"}}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"children":{"Select Operator":{"expressions:":"cstring1 (type: string), cstring2 (type: string)","outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[6, 7]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0
to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS tr
ue","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","vectorOutput:":"true","native:":"false","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","Not ACID UPDATE or DELETE IS true","No buckets IS true","No TopN IS true","No DISTINCT columns IS true","BinarySort
ableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","Uniform Hash IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0"}},"Local Work:":{"Ma
p Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"Group By Vectorization:":{"vectorOutput:":"false","native:":"false","projectedOutputColumns:":"null"},"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}}
+{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cstring2 (type: string)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":
"hd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cstring1 (type: string), cstring2 (type: string)","outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColum
ns:":"[6, 7]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"Map Join Vec
torization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","vectorOutput:":"true","native:":"false","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink V
ectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","Not ACID UPDATE or DELETE IS true","No buckets IS true","No TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","Uniform Hash IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32","OutputOperators:":"[GBY_15]"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCoun
t:":"12","includeColumns:":"[6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"Group By Vectorization:":{"vectorOutput:":"false","native:":"false","projectedOutputColumns:":"null"},"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 B
asic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}}
PREHOOK: query: select count(*) from (select c.cstring1
from small_alltypesorc_a c
left outer join small_alltypesorc_a cd
@@ -322,7 +322,7 @@ left outer join small_alltypesorc_a hd
on hd.cstring1 = c.cstring1 and hd.cint = c.cint
) t1
POSTHOOK: type: QUERY
-{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"cbigint (type: bigint), cstring2 (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"}}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":
"hd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"}}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"children":{"Select Operator":{"expressions:":"cint (type: int), cbigint (type: bigint), cstring1 (type: string), cstring2 (type: string)","outputColumnNames:":["_col0","_col1","_col2","_col3"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"tr
ue","projectedOutputColumns:":"[2, 3, 6, 7]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col2"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col0 (type: int), _col2 (type
: string)","1":"_col0 (type: int), _col1 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","vectorOutput:":"true","native:":"false","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"Reduce Output Operator":{"sort orde
r:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","Not ACID UPDATE or DELETE IS true","No buckets IS true","No TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","Uniform Hash IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2,
3, 6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"Group By Vectorization:":{"vectorOutput:":"false","native:":"false","projectedOutputColumns:":"null"},"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:
":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}}
+{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cbigint (type: bigint), cstring2 (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"}
,"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cbigint (type: bigint), cstrin
g1 (type: string), cstring2 (type: string)","outputColumnNames:":["_col0","_col1","_col2","_col3"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[2, 3, 6, 7]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col2"],"Statistics:":"
Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator",
"vectorOutput:":"true","native:":"false","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","Not ACID UPDATE or DELETE IS true","No buckets IS true","No TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","Uniform Hash IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32","OutputOperators:":"[GBY_15]"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"tru
e","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 3, 6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"Group By Vectorization:":{"vectorOutput:":"false","native:":
"false","projectedOutputColumns:":"null"},"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}}
PREHOOK: query: select count(*) from (select c.cstring1
from small_alltypesorc_a c
left outer join small_alltypesorc_a cd
http://git-wip-us.apache.org/repos/asf/hive/blob/759766ee/ql/src/test/results/clientpositive/vector_outer_join4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_outer_join4.q.out b/ql/src/test/results/clientpositive/vector_outer_join4.q.out
index fce35a1..a73a5e1 100644
--- a/ql/src/test/results/clientpositive/vector_outer_join4.q.out
+++ b/ql/src/test/results/clientpositive/vector_outer_join4.q.out
@@ -256,7 +256,7 @@ from small_alltypesorc_b c
left outer join small_alltypesorc_b cd
on cd.cint = c.cint
POSTHOOK: type: QUERY
-{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-4":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-4"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-4":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)","outputColumnNames:":["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","
_col10","_col11"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col2 (type: int)","1":"_col2 (type: int)"}}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)","outputColumnNames:":["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"],"Select Vectorization:":{"className:":
"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col2 (type: int)","1":"_col2 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"],"Statisti
cs:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]","dataColumns:":["ctinyint:tinyint",
"csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":["bigint","bigint","bigint","bigint","double","double","string","string","timestamp","timestamp","bigint","bigint"]}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}}
+{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-4":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-4"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-4":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)","outputColumnNames:":["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_co
l7","_col8","_col9","_col10","_col11"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col2 (type: int)","1":"_col2 (type: int)"},"OperatorId:":"HASHTABLESINK_10"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)","outputColumnNames:":["_col0","_col1","_col2","_col3","_col4",
"_col5","_col6","_col7","_col8","_col9","_col10","_col11"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_12","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col2 (type: int)","1":"_col2 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11
","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"],"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_13","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_14"}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:
":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":["bigint","bigint","bigint","bigint","double","double","string","string","timestamp","timestamp","bigint","bigint"]}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_15"}}}}}}
PREHOOK: query: select *
from small_alltypesorc_b c
left outer join small_alltypesorc_b cd
@@ -337,7 +337,7 @@ from small_alltypesorc_b c
left outer join small_alltypesorc_b hd
on hd.ctinyint = c.ctinyint
POSTHOOK: type: QUERY
-{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-4":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-4"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-4":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:hd":{"TableScan":{"alias:":"hd","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"}}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats:
NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","outputColumnNames:":["_col0"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0]"},"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"out
putColumnNames:":["_col0"],"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0]","dataColumns:":["ctinyint:tinyint"
,"csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}}
+{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-4":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-4"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-4":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:hd":{"TableScan":{"alias:":"hd","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"OperatorId:":"HASHTABLESINK_10"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","Sta
tistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","outputColumnNames:":["_col0"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0]"},"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_12","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Tabl
e and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_13","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_14"}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNat
ive:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_15"}}}}}}
PREHOOK: query: select c.ctinyint
from small_alltypesorc_b c
left outer join small_alltypesorc_b hd
@@ -780,7 +780,7 @@ left outer join small_alltypesorc_b hd
on hd.ctinyint = c.ctinyint
) t1
POSTHOOK: type: QUERY
-{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","children":{"Select Operator":{"expressions:":"cint (type: int)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"}}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE
","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"}}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), cint (type: int)","outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 2]"},"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:"
:{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS
true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 36 Data size: 8082 Basic stats: COMPLETE Column stats: NONE","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","vectorOutput:":"true","native:":"false","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","Not ACID UPDATE or DELETE IS true","No buckets IS true","No TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys I
S true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","Uniform Hash IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0, 2]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work"
:{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"Group By Vectorization:":{"vectorOutput:":"false","native:":"false","projectedOutputColumns:":"null"},"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{}}}}}}
+{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cint (type: int)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","Statist
ics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), cint (type: int)","outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 2]"},"
Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join0 to 1"}],"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"class
Name:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 36 Data size: 8082 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","vectorOutput:":"true","native:":"false","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink Vectorization:":{"cla
ssName:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","Not ACID UPDATE or DELETE IS true","No buckets IS true","No TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false","Uniform Hash IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32","OutputOperators:":"[GBY_15]"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeCol
umns:":"[0, 2]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"Group By Vectorization:":{"vectorOutput:":"false","native:":"false","projectedOutputColumns:":"null"},"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}}
PREHOOK: query: select count(*) from (select c.ctinyint
from small_alltypesorc_b c
left outer join small_alltypesorc_b cd
[45/50] [abbrv] hive git commit: HIVE-15964: LLAP: Llap IO codepath
not getting invoked due to file column id mismatch (Rajesh Balamohan,
reviewed by Prasanth Jayachandran, Sergey Shelukhin)
Posted by se...@apache.org.
HIVE-15964: LLAP: Llap IO codepath not getting invoked due to file column id mismatch (Rajesh Balamohan, reviewed by Prasanth Jayachandran, Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/539d3c62
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/539d3c62
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/539d3c62
Branch: refs/heads/hive-14535
Commit: 539d3c6264dc8ae462e906a390dcb1d45a63422e
Parents: 53f0335
Author: Rajesh Balamohan <rb...@apache.org>
Authored: Fri Feb 24 03:50:09 2017 +0530
Committer: Rajesh Balamohan <rb...@apache.org>
Committed: Fri Feb 24 03:50:09 2017 +0530
----------------------------------------------------------------------
.../hive/llap/io/api/impl/LlapRecordReader.java | 11 +-
.../test/queries/clientpositive/llap_reader.q | 40 +++++
.../clientpositive/llap/llap_reader.q.out | 167 +++++++++++++++++++
.../results/clientpositive/llap_reader.q.out | 86 ++++++++++
4 files changed, 301 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/539d3c62/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
index 9b1a905..d4e14a8 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
@@ -93,6 +93,8 @@ class LlapRecordReader
private SchemaEvolution evolution;
+ private final boolean isAcidScan;
+
public LlapRecordReader(JobConf job, FileSplit split, List<Integer> includedCols,
String hostName, ColumnVectorProducer cvp, ExecutorService executor,
InputFormat<?, ?> sourceInputFormat, Deserializer sourceSerDe, Reporter reporter)
@@ -139,7 +141,7 @@ class LlapRecordReader
partitionValues = null;
}
- boolean isAcidScan = HiveConf.getBoolVar(jobConf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN);
+ isAcidScan = HiveConf.getBoolVar(jobConf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN);
TypeDescription schema = OrcInputFormat.getDesiredRowTypeDescr(
job, isAcidScan, Integer.MAX_VALUE);
@@ -169,8 +171,11 @@ class LlapRecordReader
private boolean checkOrcSchemaEvolution() {
for (int i = 0; i < columnCount; ++i) {
- int colId = columnIds == null ? i : columnIds.get(i);
- if (!evolution.isPPDSafeConversion(colId)) {
+ int projectedColId = columnIds == null ? i : columnIds.get(i);
+ // Adjust file column index for ORC struct.
+ // LLAP IO does not support ACID. When it supports, this would be auto adjusted.
+ int fileColId = OrcInputFormat.getRootColumn(!isAcidScan) + projectedColId + 1;
+ if (!evolution.isPPDSafeConversion(fileColId)) {
LlapIoImpl.LOG.warn("Unsupported schema evolution! Disabling Llap IO for {}", split);
return false;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/539d3c62/ql/src/test/queries/clientpositive/llap_reader.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/llap_reader.q b/ql/src/test/queries/clientpositive/llap_reader.q
new file mode 100644
index 0000000..ac0624d
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/llap_reader.q
@@ -0,0 +1,40 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.llap.io.enabled=true;
+SET hive.map.aggr=false;
+SET hive.exec.post.hooks=;
+
+CREATE TABLE test(f1 int, f2 int, f3 int) stored as orc;
+INSERT INTO TABLE test VALUES (1,1,1), (2,2,2), (3,3,3), (4,4,4);
+
+ALTER TABLE test CHANGE f1 f1 bigint;
+ALTER TABLE test CHANGE f2 f2 bigint;
+ALTER TABLE test CHANGE f3 f3 bigint;
+
+-- llap counters with data and meta cache
+SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecTezSummaryPrinter;
+SELECT count(f1) FROM test GROUP BY f1;
+SELECT count(f1) FROM test GROUP BY f1;
+
+SET hive.exec.post.hooks=;
+CREATE TABLE test_bigint(f1 bigint, f2 bigint, f3 bigint) stored as orc;
+INSERT OVERWRITE TABLE test_bigint select * from test;
+ALTER TABLE test_bigint CHANGE f1 f1 double;
+ALTER TABLE test_bigint CHANGE f2 f2 double;
+ALTER TABLE test_bigint CHANGE f3 f3 double;
+
+-- llap counters with meta cache alone
+SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecTezSummaryPrinter;
+select count(f1) from test_bigint group by f1;
+select count(f1) from test_bigint group by f1;
+
+
+-- Check with ACID table
+SET hive.exec.post.hooks=;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.support.concurrency=true;
+CREATE TABLE test_acid (f1 int, f2 int, val string) clustered by (val) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true');
+INSERT INTO TABLE test_acid VALUES (1,1,'b1'), (2,2,'b2'), (3,3,'b3'), (4,4,'b4');
+
+-- should not have llap counters
+SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecTezSummaryPrinter;
+SELECT count(f1) FROM test_acid GROUP BY f1;
http://git-wip-us.apache.org/repos/asf/hive/blob/539d3c62/ql/src/test/results/clientpositive/llap/llap_reader.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/llap_reader.q.out b/ql/src/test/results/clientpositive/llap/llap_reader.q.out
new file mode 100644
index 0000000..cc556a9
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/llap_reader.q.out
@@ -0,0 +1,167 @@
+PREHOOK: query: CREATE TABLE test(f1 int, f2 int, f3 int) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test
+PREHOOK: query: INSERT INTO TABLE test VALUES (1,1,1), (2,2,2), (3,3,3), (4,4,4)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@test
+PREHOOK: query: ALTER TABLE test CHANGE f1 f1 bigint
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@test
+PREHOOK: Output: default@test
+PREHOOK: query: ALTER TABLE test CHANGE f2 f2 bigint
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@test
+PREHOOK: Output: default@test
+PREHOOK: query: ALTER TABLE test CHANGE f3 f3 bigint
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@test
+PREHOOK: Output: default@test
+PREHOOK: query: SELECT count(f1) FROM test GROUP BY f1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 358
+ HDFS_BYTES_WRITTEN: 143
+ HDFS_READ_OPS: 6
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 4
+ RECORDS_OUT_0: 4
+ RECORDS_OUT_INTERMEDIATE_Map_1: 4
+Stage-1 LLAP IO COUNTERS:
+ ALLOCATED_BYTES: 262144
+ ALLOCATED_USED_BYTES: 4
+ CACHE_MISS_BYTES: 7
+ METADATA_CACHE_MISS: 2
+ NUM_DECODED_BATCHES: 1
+ NUM_VECTOR_BATCHES: 1
+ ROWS_EMITTED: 4
+ SELECTED_ROWGROUPS: 1
+1
+1
+1
+1
+PREHOOK: query: SELECT count(f1) FROM test GROUP BY f1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 0
+ HDFS_BYTES_WRITTEN: 143
+ HDFS_READ_OPS: 2
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 4
+ RECORDS_OUT_0: 4
+ RECORDS_OUT_INTERMEDIATE_Map_1: 4
+Stage-1 LLAP IO COUNTERS:
+ CACHE_HIT_BYTES: 7
+ CACHE_MISS_BYTES: 0
+ METADATA_CACHE_HIT: 2
+ NUM_DECODED_BATCHES: 1
+ NUM_VECTOR_BATCHES: 1
+ ROWS_EMITTED: 4
+ SELECTED_ROWGROUPS: 1
+1
+1
+1
+1
+PREHOOK: query: CREATE TABLE test_bigint(f1 bigint, f2 bigint, f3 bigint) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_bigint
+PREHOOK: query: INSERT OVERWRITE TABLE test_bigint select * from test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test
+PREHOOK: Output: default@test_bigint
+PREHOOK: query: ALTER TABLE test_bigint CHANGE f1 f1 double
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@test_bigint
+PREHOOK: Output: default@test_bigint
+PREHOOK: query: ALTER TABLE test_bigint CHANGE f2 f2 double
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@test_bigint
+PREHOOK: Output: default@test_bigint
+PREHOOK: query: ALTER TABLE test_bigint CHANGE f3 f3 double
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@test_bigint
+PREHOOK: Output: default@test_bigint
+PREHOOK: query: select count(f1) from test_bigint group by f1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_bigint
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 595
+ HDFS_BYTES_WRITTEN: 143
+ HDFS_READ_OPS: 6
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 4
+ RECORDS_OUT_0: 4
+ RECORDS_OUT_INTERMEDIATE_Map_1: 4
+Stage-1 LLAP IO COUNTERS:
+ METADATA_CACHE_MISS: 1
+1
+1
+1
+1
+PREHOOK: query: select count(f1) from test_bigint group by f1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_bigint
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 323
+ HDFS_BYTES_WRITTEN: 143
+ HDFS_READ_OPS: 4
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 4
+ RECORDS_OUT_0: 4
+ RECORDS_OUT_INTERMEDIATE_Map_1: 4
+Stage-1 LLAP IO COUNTERS:
+ METADATA_CACHE_HIT: 1
+1
+1
+1
+1
+PREHOOK: query: CREATE TABLE test_acid (f1 int, f2 int, val string) clustered by (val) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_acid
+PREHOOK: query: INSERT INTO TABLE test_acid VALUES (1,1,'b1'), (2,2,'b2'), (3,3,'b3'), (4,4,'b4')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@test_acid
+PREHOOK: query: SELECT count(f1) FROM test_acid GROUP BY f1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_acid
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+ HDFS_BYTES_READ: 1567
+ HDFS_BYTES_WRITTEN: 143
+ HDFS_READ_OPS: 12
+ HDFS_LARGE_READ_OPS: 0
+ HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+ CREATED_FILES: 1
+ DESERIALIZE_ERRORS: 0
+ RECORDS_IN_Map_1: 4
+ RECORDS_OUT_0: 4
+ RECORDS_OUT_INTERMEDIATE_Map_1: 4
+1
+1
+1
+1
http://git-wip-us.apache.org/repos/asf/hive/blob/539d3c62/ql/src/test/results/clientpositive/llap_reader.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap_reader.q.out b/ql/src/test/results/clientpositive/llap_reader.q.out
new file mode 100644
index 0000000..dcbd3aa
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap_reader.q.out
@@ -0,0 +1,86 @@
+PREHOOK: query: CREATE TABLE test(f1 int, f2 int, f3 int) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test
+PREHOOK: query: INSERT INTO TABLE test VALUES (1,1,1), (2,2,2), (3,3,3), (4,4,4)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@test
+PREHOOK: query: ALTER TABLE test CHANGE f1 f1 bigint
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@test
+PREHOOK: Output: default@test
+PREHOOK: query: ALTER TABLE test CHANGE f2 f2 bigint
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@test
+PREHOOK: Output: default@test
+PREHOOK: query: ALTER TABLE test CHANGE f3 f3 bigint
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@test
+PREHOOK: Output: default@test
+PREHOOK: query: SELECT count(f1) FROM test GROUP BY f1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test
+#### A masked pattern was here ####
+1
+1
+1
+1
+PREHOOK: query: SELECT count(f1) FROM test GROUP BY f1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test
+#### A masked pattern was here ####
+1
+1
+1
+1
+PREHOOK: query: CREATE TABLE test_bigint(f1 bigint, f2 bigint, f3 bigint) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_bigint
+PREHOOK: query: INSERT OVERWRITE TABLE test_bigint select * from test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test
+PREHOOK: Output: default@test_bigint
+PREHOOK: query: ALTER TABLE test_bigint CHANGE f1 f1 double
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@test_bigint
+PREHOOK: Output: default@test_bigint
+PREHOOK: query: ALTER TABLE test_bigint CHANGE f2 f2 double
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@test_bigint
+PREHOOK: Output: default@test_bigint
+PREHOOK: query: ALTER TABLE test_bigint CHANGE f3 f3 double
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@test_bigint
+PREHOOK: Output: default@test_bigint
+PREHOOK: query: select count(f1) from test_bigint group by f1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_bigint
+#### A masked pattern was here ####
+1
+1
+1
+1
+PREHOOK: query: select count(f1) from test_bigint group by f1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_bigint
+#### A masked pattern was here ####
+1
+1
+1
+1
+PREHOOK: query: CREATE TABLE test_acid (f1 int, f2 int, val string) clustered by (val) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_acid
+PREHOOK: query: INSERT INTO TABLE test_acid VALUES (1,1,'b1'), (2,2,'b2'), (3,3,'b3'), (4,4,'b4')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@test_acid
+PREHOOK: query: SELECT count(f1) FROM test_acid GROUP BY f1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_acid
+#### A masked pattern was here ####
+1
+1
+1
+1
[13/50] [abbrv] hive git commit: Addendum to HIVE-15957
Posted by se...@apache.org.
Addendum to HIVE-15957
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5c293716
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5c293716
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5c293716
Branch: refs/heads/hive-14535
Commit: 5c293716a37c57138da642803eee68212dd418ed
Parents: 368d916
Author: Ashutosh Chauhan <ha...@apache.org>
Authored: Fri Feb 17 14:07:38 2017 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Fri Feb 17 14:07:38 2017 -0800
----------------------------------------------------------------------
ql/src/test/results/clientpositive/interval_arithmetic.q.out | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/5c293716/ql/src/test/results/clientpositive/interval_arithmetic.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/interval_arithmetic.q.out b/ql/src/test/results/clientpositive/interval_arithmetic.q.out
index c1fc738..039d3e1 100644
--- a/ql/src/test/results/clientpositive/interval_arithmetic.q.out
+++ b/ql/src/test/results/clientpositive/interval_arithmetic.q.out
@@ -624,7 +624,7 @@ STAGE PLANS:
alias: interval_arithmetic_1
Statistics: Num rows: 12288 Data size: 326837 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: 2017-02-19 03:04:00.0 (type: timestamp)
+ expressions: 2017-02-20 03:04:00.0 (type: timestamp)
outputColumnNames: _col0
Statistics: Num rows: 12288 Data size: 491520 Basic stats: COMPLETE Column stats: COMPLETE
Limit
@@ -640,7 +640,7 @@ POSTHOOK: query: select current_date + interval '1 2:02:00' day to second + inte
POSTHOOK: type: QUERY
POSTHOOK: Input: default@interval_arithmetic_1
#### A masked pattern was here ####
-2017-02-19 03:04:00
+2017-02-20 03:04:00
PREHOOK: query: drop table interval_arithmetic_1
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@interval_arithmetic_1