You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2015/05/12 03:24:25 UTC
[39/39] hive git commit: Merge from trunk to llap - 05/11/2015
(Prasanth Jayachandran)
Merge from trunk to llap - 05/11/2015 (Prasanth Jayachandran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e6b1556e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e6b1556e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e6b1556e
Branch: refs/heads/llap
Commit: e6b1556e39f81dc2861f612733b2ba61c17ff698
Parents: dc7ceb4 433714f
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Mon May 11 18:23:08 2015 -0700
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Mon May 11 18:23:08 2015 -0700
----------------------------------------------------------------------
README.txt | 32 +-
RELEASE_NOTES.txt | 459 +
bin/ext/orcfiledump.sh | 9 +-
.../apache/hadoop/hive/common/FileUtils.java | 155 +-
.../org/apache/hadoop/hive/conf/HiveConf.java | 18 +-
data/files/tjoin1.txt | 3 +
data/files/tjoin2.txt | 4 +
.../hive/hcatalog/common/HiveClientCache.java | 9 +-
.../hcatalog/mapreduce/TestPassProperties.java | 5 +-
.../templeton/tool/TempletonControllerJob.java | 7 +-
.../test/resources/testconfiguration.properties | 5 +-
.../upgrade/mysql/hive-schema-1.2.0.mysql.sql | 2 +-
.../upgrade/mysql/hive-schema-1.3.0.mysql.sql | 2 +-
.../hive/metastore/AggregateStatsCache.java | 33 +-
.../hadoop/hive/metastore/HiveMetaStore.java | 27 +-
.../hive/metastore/MetaStoreDirectSql.java | 24 +-
.../hive/metastore/RetryingMetaStoreClient.java | 60 +-
.../hadoop/hive/metastore/txn/TxnHandler.java | 35 +-
.../hive/metastore/TestHiveMetastoreCli.java | 63 +
.../hive/metastore/txn/TestTxnHandler.java | 39 +-
.../java/org/apache/hadoop/hive/ql/Context.java | 10 +-
.../hive/ql/exec/CommonMergeJoinOperator.java | 54 +-
.../hadoop/hive/ql/exec/MapJoinOperator.java | 1 -
.../hadoop/hive/ql/exec/ReduceSinkOperator.java | 3 +-
.../apache/hadoop/hive/ql/exec/Registry.java | 29 +-
.../apache/hadoop/hive/ql/exec/Utilities.java | 5 +
.../persistence/HybridHashTableContainer.java | 2 +-
.../exec/vector/VectorMapJoinBaseOperator.java | 185 +
.../ql/exec/vector/VectorMapJoinOperator.java | 132 +-
.../VectorMapJoinOuterFilteredOperator.java | 122 +
.../mapjoin/VectorMapJoinCommonOperator.java | 5 +-
.../VectorMapJoinGenerateResultOperator.java | 5 +
.../hive/ql/io/orc/ColumnStatisticsImpl.java | 16 +-
.../ql/io/orc/ConversionTreeReaderFactory.java | 38 +
.../apache/hadoop/hive/ql/io/orc/FileDump.java | 91 +-
.../hadoop/hive/ql/io/orc/JsonFileDump.java | 365 +
.../hadoop/hive/ql/io/orc/OrcInputFormat.java | 8 +-
.../hadoop/hive/ql/io/orc/OrcRecordUpdater.java | 24 +-
.../hive/ql/io/orc/RecordReaderFactory.java | 269 +
.../hadoop/hive/ql/io/orc/RecordReaderImpl.java | 2 +-
.../ql/lockmgr/zookeeper/ZooKeeperHiveLock.java | 22 +
.../optimizer/ConstantPropagateProcFactory.java | 83 +-
.../ql/optimizer/IdentityProjectRemover.java | 12 +
.../ql/optimizer/NonBlockingOpDeDupProc.java | 11 +
.../hadoop/hive/ql/optimizer/Optimizer.java | 8 +-
.../ql/optimizer/calcite/cost/HiveCost.java | 16 +-
.../rules/HiveExpandDistinctAggregatesRule.java | 278 +
.../translator/HiveOpConverterPostProc.java | 10 +
.../hive/ql/optimizer/physical/Vectorizer.java | 23 +-
.../hadoop/hive/ql/parse/CalcitePlanner.java | 14 +-
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 15 +-
.../apache/hadoop/hive/ql/plan/PlanUtils.java | 4 +
.../StorageBasedAuthorizationProvider.java | 114 +-
.../hadoop/hive/ql/session/SessionState.java | 4 +-
.../hadoop/hive/ql/txn/compactor/Cleaner.java | 20 +-
.../hive/ql/txn/compactor/CompactorThread.java | 12 +-
.../hadoop/hive/ql/txn/compactor/Initiator.java | 11 +-
.../hadoop/hive/ql/txn/compactor/Worker.java | 12 +
.../hadoop/hive/ql/io/orc/TestJsonFileDump.java | 138 +
.../hadoop/hive/ql/plan/TestViewEntity.java | 108 +
.../hive/ql/txn/compactor/TestCleaner.java | 56 +-
.../hive/ql/txn/compactor/TestInitiator.java | 63 +-
.../hive/ql/txn/compactor/TestWorker.java | 45 +
.../test/queries/clientpositive/bucket_many.q | 16 +
.../test/queries/clientpositive/explainuser_2.q | 1 +
.../extrapolate_part_stats_partial.q | 2 +
.../extrapolate_part_stats_partial_ndv.q | 2 +
ql/src/test/queries/clientpositive/fold_case.q | 12 +
ql/src/test/queries/clientpositive/fold_when.q | 31 +
.../queries/clientpositive/mapjoin_mapjoin.q | 1 +
ql/src/test/queries/clientpositive/mergejoin.q | 17 +
.../clientpositive/orc_int_type_promotion.q | 79 +
.../clientpositive/vector_left_outer_join2.q | 62 +
.../clientpositive/vector_leftsemi_mapjoin.q | 403 +
ql/src/test/resources/orc-file-dump.json | 1354 ++
.../annotate_stats_join_pkfk.q.out | 20 +-
.../results/clientpositive/bucket_many.q.out | 230 +
.../encryption_insert_partition_static.q.out | 14 +-
.../test/results/clientpositive/fold_case.q.out | 301 +
.../test/results/clientpositive/fold_when.q.out | 480 +
ql/src/test/results/clientpositive/join32.q.out | 84 +-
.../clientpositive/join32_lessSize.q.out | 423 +-
ql/src/test/results/clientpositive/join33.q.out | 84 +-
.../clientpositive/join_alt_syntax.q.out | 306 +-
.../clientpositive/join_cond_pushdown_2.q.out | 150 +-
.../clientpositive/join_cond_pushdown_4.q.out | 150 +-
.../test/results/clientpositive/mergejoin.q.out | 844 +-
.../clientpositive/orc_int_type_promotion.q.out | 377 +
.../ql_rewrite_gbtoidx_cbo_2.q.out | 14 +-
.../results/clientpositive/spark/cbo_gby.q.out | 4 +-
.../clientpositive/spark/cbo_udf_udaf.q.out | 2 +-
...pby_complex_types_multi_single_reducer.q.out | 38 +-
.../results/clientpositive/spark/join32.q.out | 88 +-
.../clientpositive/spark/join32_lessSize.q.out | 286 +-
.../results/clientpositive/spark/join33.q.out | 88 +-
.../clientpositive/spark/join_alt_syntax.q.out | 210 +-
.../spark/join_cond_pushdown_2.q.out | 98 +-
.../spark/join_cond_pushdown_4.q.out | 98 +-
.../spark/lateral_view_explode2.q.out | 4 +-
.../clientpositive/spark/union_remove_25.q.out | 2 +-
.../clientpositive/spark/union_top_level.q.out | 16 +-
.../spark/vector_cast_constant.q.java1.7.out | 16 +-
.../spark/vector_cast_constant.q.java1.8.out | 16 +-
.../spark/vectorized_timestamp_funcs.q.out | 4 +-
.../clientpositive/tez/auto_join29.q.out | 500 +
.../clientpositive/tez/explainuser_2.q.out | 1529 +-
.../clientpositive/tez/limit_pushdown.q.out | 31 +-
.../results/clientpositive/tez/mergejoin.q.out | 844 +-
.../test/results/clientpositive/tez/mrr.q.out | 48 +-
.../tez/vector_count_distinct.q.out | 28 +-
.../tez/vector_left_outer_join2.q.out | 553 +
.../tez/vector_leftsemi_mapjoin.q.out | 13807 +++++++++++++++++
.../tez/vectorization_limit.q.out | 31 +-
.../tez/vectorized_distinct_gby.q.out | 51 +-
.../vector_left_outer_join2.q.out | 568 +
.../vector_leftsemi_mapjoin.q.out | 13572 ++++++++++++++++
.../apache/hive/service/cli/ColumnValue.java | 2 +
.../apache/hadoop/hive/shims/Hadoop23Shims.java | 29 +-
.../org/apache/hadoop/fs/DefaultFileAccess.java | 65 +-
.../apache/hadoop/hive/shims/HadoopShims.java | 24 +-
.../hadoop/hive/shims/HadoopShimsSecure.java | 8 -
testutils/metastore/execute-test-on-lxc.sh | 7 +-
.../org/apache/hive/ptest/execution/PTest.java | 12 +-
.../ptest/execution/conf/TestConfiguration.java | 12 +-
.../execution/conf/TestTestConfiguration.java | 38 +-
.../resources/test-configuration.properties | 2 +
126 files changed, 38732 insertions(+), 2892 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/e6b1556e/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --cc common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index bfc5172,eff4d30..f4a70b2
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@@ -1020,10 -1021,8 +1021,10 @@@ public class HiveConf extends Configura
HIVE_ORC_INCLUDE_FILE_FOOTER_IN_SPLITS("hive.orc.splits.include.file.footer", false,
"If turned on splits generated by orc will include metadata about the stripes in the file. This\n" +
"data is read remotely (from the client or HS2 machine) and sent to all the tasks."),
+ HIVE_ORC_INCLUDE_FILE_ID_IN_SPLITS("hive.orc.splits.include.fileid", true,
+ "Include file ID in splits on file systems thaty support it."),
HIVE_ORC_CACHE_STRIPE_DETAILS_SIZE("hive.orc.cache.stripe.details.size", 10000,
- "Cache size for keeping meta info about orc splits cached in the client."),
+ "Max cache size for keeping meta info about orc splits cached in the client."),
HIVE_ORC_COMPUTE_SPLITS_NUM_THREADS("hive.orc.compute.splits.num.threads", 10,
"How many threads orc should use to create splits in parallel."),
HIVE_ORC_SKIP_CORRUPT_DATA("hive.exec.orc.skip.corrupt.data", false,
http://git-wip-us.apache.org/repos/asf/hive/blob/e6b1556e/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/e6b1556e/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/e6b1556e/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/e6b1556e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/e6b1556e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/JsonFileDump.java
----------------------------------------------------------------------
diff --cc ql/src/java/org/apache/hadoop/hive/ql/io/orc/JsonFileDump.java
index 0000000,c33004e..a438855
mode 000000,100644..100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/JsonFileDump.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/JsonFileDump.java
@@@ -1,0 -1,365 +1,365 @@@
+ /**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ package org.apache.hadoop.hive.ql.io.orc;
+
+ import java.io.IOException;
+ import java.util.List;
+
+ import org.apache.hadoop.conf.Configuration;
+ import org.apache.hadoop.fs.FileSystem;
+ import org.apache.hadoop.fs.Path;
+ import org.apache.hadoop.hive.ql.io.filters.BloomFilterIO;
+ import org.codehaus.jettison.json.JSONException;
+ import org.codehaus.jettison.json.JSONObject;
+ import org.codehaus.jettison.json.JSONArray;
+ import org.codehaus.jettison.json.JSONStringer;
+ import org.codehaus.jettison.json.JSONWriter;
+
+ /**
+ * File dump tool with json formatted output.
+ */
+ public class JsonFileDump {
+
+ public static void printJsonMetaData(List<String> files, Configuration conf,
+ List<Integer> rowIndexCols, boolean prettyPrint, boolean printTimeZone) throws JSONException, IOException {
+ JSONStringer writer = new JSONStringer();
+ boolean multiFile = files.size() > 1;
+ if (multiFile) {
+ writer.array();
+ } else {
+ writer.object();
+ }
+ for (String filename : files) {
+ if (multiFile) {
+ writer.object();
+ }
+ writer.key("fileName").value(filename);
+ Path path = new Path(filename);
+ Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
+ writer.key("fileVersion").value(reader.getFileVersion().getName());
+ writer.key("writerVersion").value(reader.getWriterVersion());
+ RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
+ writer.key("numberOfRows").value(reader.getNumberOfRows());
+ writer.key("compression").value(reader.getCompression());
+ if (reader.getCompression() != CompressionKind.NONE) {
+ writer.key("compressionBufferSize").value(reader.getCompressionSize());
+ }
+ writer.key("schemaString").value(reader.getObjectInspector().getTypeName());
+ writer.key("schema").array();
+ writeSchema(writer, reader.getTypes());
+ writer.endArray();
+
+ writer.key("stripeStatistics").array();
- Metadata metadata = reader.getMetadata();
- for (int n = 0; n < metadata.getStripeStatistics().size(); n++) {
++ List<StripeStatistics> stripeStatistics = reader.getStripeStatistics();
++ for (int n = 0; n < stripeStatistics.size(); n++) {
+ writer.object();
+ writer.key("stripeNumber").value(n + 1);
- StripeStatistics ss = metadata.getStripeStatistics().get(n);
++ StripeStatistics ss = stripeStatistics.get(n);
+ writer.key("columnStatistics").array();
+ for (int i = 0; i < ss.getColumnStatistics().length; i++) {
+ writer.object();
+ writer.key("columnId").value(i);
+ writeColumnStatistics(writer, ss.getColumnStatistics()[i]);
+ writer.endObject();
+ }
+ writer.endArray();
+ writer.endObject();
+ }
+ writer.endArray();
+
+ ColumnStatistics[] stats = reader.getStatistics();
+ int colCount = stats.length;
+ writer.key("fileStatistics").array();
+ for (int i = 0; i < stats.length; ++i) {
+ writer.object();
+ writer.key("columnId").value(i);
+ writeColumnStatistics(writer, stats[i]);
+ writer.endObject();
+ }
+ writer.endArray();
+
+ writer.key("stripes").array();
+ int stripeIx = -1;
+ for (StripeInformation stripe : reader.getStripes()) {
+ ++stripeIx;
+ long stripeStart = stripe.getOffset();
+ OrcProto.StripeFooter footer = rows.readStripeFooter(stripe);
+ writer.object(); // start of stripe information
+ writer.key("stripeNumber").value(stripeIx + 1);
+ writer.key("stripeInformation");
+ writeStripeInformation(writer, stripe);
+ if (printTimeZone) {
+ writer.key("writerTimezone").value(
+ footer.hasWriterTimezone() ? footer.getWriterTimezone() : FileDump.UNKNOWN);
+ }
+ long sectionStart = stripeStart;
+
+ writer.key("streams").array();
+ for (OrcProto.Stream section : footer.getStreamsList()) {
+ writer.object();
+ String kind = section.hasKind() ? section.getKind().name() : FileDump.UNKNOWN;
+ writer.key("columnId").value(section.getColumn());
+ writer.key("section").value(kind);
+ writer.key("startOffset").value(sectionStart);
+ writer.key("length").value(section.getLength());
+ sectionStart += section.getLength();
+ writer.endObject();
+ }
+ writer.endArray();
+
+ writer.key("encodings").array();
+ for (int i = 0; i < footer.getColumnsCount(); ++i) {
+ writer.object();
+ OrcProto.ColumnEncoding encoding = footer.getColumns(i);
+ writer.key("columnId").value(i);
+ writer.key("kind").value(encoding.getKind());
+ if (encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY ||
+ encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) {
+ writer.key("dictionarySize").value(encoding.getDictionarySize());
+ }
+ writer.endObject();
+ }
+ writer.endArray();
+
+ if (rowIndexCols != null && !rowIndexCols.isEmpty()) {
+ // include the columns that are specified, only if the columns are included, bloom filter
+ // will be read
+ boolean[] sargColumns = new boolean[colCount];
+ for (int colIdx : rowIndexCols) {
+ sargColumns[colIdx] = true;
+ }
+ RecordReaderImpl.Index indices = rows.readRowIndex(stripeIx, null, sargColumns);
+ writer.key("indexes").array();
+ for (int col : rowIndexCols) {
+ writer.object();
+ writer.key("columnId").value(col);
+ writeRowGroupIndexes(writer, col, indices.getRowGroupIndex());
+ writeBloomFilterIndexes(writer, col, indices.getBloomFilterIndex());
+ writer.endObject();
+ }
+ writer.endArray();
+ }
+ writer.endObject(); // end of stripe information
+ }
+ writer.endArray();
+
+ FileSystem fs = path.getFileSystem(conf);
+ long fileLen = fs.getContentSummary(path).getLength();
+ long paddedBytes = FileDump.getTotalPaddingSize(reader);
+ // empty ORC file is ~45 bytes. Assumption here is file length always >0
+ double percentPadding = ((double) paddedBytes / (double) fileLen) * 100;
+ writer.key("fileLength").value(fileLen);
+ writer.key("paddingLength").value(paddedBytes);
+ writer.key("paddingRatio").value(percentPadding);
+ rows.close();
+
+ writer.endObject();
+ }
+ if (multiFile) {
+ writer.endArray();
+ }
+
+ if (prettyPrint) {
+ final String prettyJson;
+ if (multiFile) {
+ JSONArray jsonArray = new JSONArray(writer.toString());
+ prettyJson = jsonArray.toString(2);
+ } else {
+ JSONObject jsonObject = new JSONObject(writer.toString());
+ prettyJson = jsonObject.toString(2);
+ }
+ System.out.println(prettyJson);
+ } else {
+ System.out.println(writer.toString());
+ }
+ }
+
+ private static void writeSchema(JSONStringer writer, List<OrcProto.Type> types)
+ throws JSONException {
+ int i = 0;
+ for(OrcProto.Type type : types) {
+ writer.object();
+ writer.key("columnId").value(i++);
+ writer.key("columnType").value(type.getKind());
+ if (type.getFieldNamesCount() > 0) {
+ writer.key("childColumnNames").array();
+ for (String field : type.getFieldNamesList()) {
+ writer.value(field);
+ }
+ writer.endArray();
+ writer.key("childColumnIds").array();
+ for (Integer colId : type.getSubtypesList()) {
+ writer.value(colId);
+ }
+ writer.endArray();
+ }
+ if (type.hasPrecision()) {
+ writer.key("precision").value(type.getPrecision());
+ }
+
+ if (type.hasScale()) {
+ writer.key("scale").value(type.getScale());
+ }
+
+ if (type.hasMaximumLength()) {
+ writer.key("maxLength").value(type.getMaximumLength());
+ }
+ writer.endObject();
+ }
+ }
+
+ private static void writeStripeInformation(JSONWriter writer, StripeInformation stripe)
+ throws JSONException {
+ writer.object();
+ writer.key("offset").value(stripe.getOffset());
+ writer.key("indexLength").value(stripe.getIndexLength());
+ writer.key("dataLength").value(stripe.getDataLength());
+ writer.key("footerLength").value(stripe.getFooterLength());
+ writer.key("rowCount").value(stripe.getNumberOfRows());
+ writer.endObject();
+ }
+
+ private static void writeColumnStatistics(JSONWriter writer, ColumnStatistics cs)
+ throws JSONException {
+ if (cs != null) {
+ writer.key("count").value(cs.getNumberOfValues());
+ writer.key("hasNull").value(cs.hasNull());
+ if (cs instanceof BinaryColumnStatistics) {
+ writer.key("totalLength").value(((BinaryColumnStatistics) cs).getSum());
+ writer.key("type").value(OrcProto.Type.Kind.BINARY);
+ } else if (cs instanceof BooleanColumnStatistics) {
+ writer.key("trueCount").value(((BooleanColumnStatistics) cs).getTrueCount());
+ writer.key("falseCount").value(((BooleanColumnStatistics) cs).getFalseCount());
+ writer.key("type").value(OrcProto.Type.Kind.BOOLEAN);
+ } else if (cs instanceof IntegerColumnStatistics) {
+ writer.key("min").value(((IntegerColumnStatistics) cs).getMinimum());
+ writer.key("max").value(((IntegerColumnStatistics) cs).getMaximum());
+ if (((IntegerColumnStatistics) cs).isSumDefined()) {
+ writer.key("sum").value(((IntegerColumnStatistics) cs).getSum());
+ }
+ writer.key("type").value(OrcProto.Type.Kind.LONG);
+ } else if (cs instanceof DoubleColumnStatistics) {
+ writer.key("min").value(((DoubleColumnStatistics) cs).getMinimum());
+ writer.key("max").value(((DoubleColumnStatistics) cs).getMaximum());
+ writer.key("sum").value(((DoubleColumnStatistics) cs).getSum());
+ writer.key("type").value(OrcProto.Type.Kind.DOUBLE);
+ } else if (cs instanceof StringColumnStatistics) {
+ writer.key("min").value(((StringColumnStatistics) cs).getMinimum());
+ writer.key("max").value(((StringColumnStatistics) cs).getMaximum());
+ writer.key("totalLength").value(((StringColumnStatistics) cs).getSum());
+ writer.key("type").value(OrcProto.Type.Kind.STRING);
+ } else if (cs instanceof DateColumnStatistics) {
+ if (((DateColumnStatistics) cs).getMaximum() != null) {
+ writer.key("min").value(((DateColumnStatistics) cs).getMinimum());
+ writer.key("max").value(((DateColumnStatistics) cs).getMaximum());
+ }
+ writer.key("type").value(OrcProto.Type.Kind.DATE);
+ } else if (cs instanceof TimestampColumnStatistics) {
+ if (((TimestampColumnStatistics) cs).getMaximum() != null) {
+ writer.key("min").value(((TimestampColumnStatistics) cs).getMinimum());
+ writer.key("max").value(((TimestampColumnStatistics) cs).getMaximum());
+ }
+ writer.key("type").value(OrcProto.Type.Kind.TIMESTAMP);
+ } else if (cs instanceof DecimalColumnStatistics) {
+ if (((DecimalColumnStatistics) cs).getMaximum() != null) {
+ writer.key("min").value(((DecimalColumnStatistics) cs).getMinimum());
+ writer.key("max").value(((DecimalColumnStatistics) cs).getMaximum());
+ writer.key("sum").value(((DecimalColumnStatistics) cs).getSum());
+ }
+ writer.key("type").value(OrcProto.Type.Kind.DECIMAL);
+ }
+ }
+ }
+
+ private static void writeBloomFilterIndexes(JSONWriter writer, int col,
+ OrcProto.BloomFilterIndex[] bloomFilterIndex) throws JSONException {
+
+ BloomFilterIO stripeLevelBF = null;
+ if (bloomFilterIndex != null && bloomFilterIndex[col] != null) {
+ int entryIx = 0;
+ writer.key("bloomFilterIndexes").array();
+ for (OrcProto.BloomFilter bf : bloomFilterIndex[col].getBloomFilterList()) {
+ writer.object();
+ writer.key("entryId").value(entryIx++);
+ BloomFilterIO toMerge = new BloomFilterIO(bf);
+ writeBloomFilterStats(writer, toMerge);
+ if (stripeLevelBF == null) {
+ stripeLevelBF = toMerge;
+ } else {
+ stripeLevelBF.merge(toMerge);
+ }
+ writer.endObject();
+ }
+ writer.endArray();
+ }
+ if (stripeLevelBF != null) {
+ writer.key("stripeLevelBloomFilter");
+ writer.object();
+ writeBloomFilterStats(writer, stripeLevelBF);
+ writer.endObject();
+ }
+ }
+
+ private static void writeBloomFilterStats(JSONWriter writer, BloomFilterIO bf)
+ throws JSONException {
+ int bitCount = bf.getBitSize();
+ int popCount = 0;
+ for (long l : bf.getBitSet()) {
+ popCount += Long.bitCount(l);
+ }
+ int k = bf.getNumHashFunctions();
+ float loadFactor = (float) popCount / (float) bitCount;
+ float expectedFpp = (float) Math.pow(loadFactor, k);
+ writer.key("numHashFunctions").value(k);
+ writer.key("bitCount").value(bitCount);
+ writer.key("popCount").value(popCount);
+ writer.key("loadFactor").value(loadFactor);
+ writer.key("expectedFpp").value(expectedFpp);
+ }
+
+ private static void writeRowGroupIndexes(JSONWriter writer, int col,
+ OrcProto.RowIndex[] rowGroupIndex)
+ throws JSONException {
+
+ OrcProto.RowIndex index;
+ if (rowGroupIndex == null || (col >= rowGroupIndex.length) ||
+ ((index = rowGroupIndex[col]) == null)) {
+ return;
+ }
+
+ writer.key("rowGroupIndexes").array();
+ for (int entryIx = 0; entryIx < index.getEntryCount(); ++entryIx) {
+ writer.object();
+ writer.key("entryId").value(entryIx);
+ OrcProto.RowIndexEntry entry = index.getEntry(entryIx);
+ if (entry == null) {
+ continue;
+ }
+ OrcProto.ColumnStatistics colStats = entry.getStatistics();
+ writeColumnStatistics(writer, ColumnStatisticsImpl.deserialize(colStats));
+ writer.key("positions").array();
+ for (int posIx = 0; posIx < entry.getPositionsCount(); ++posIx) {
+ writer.value(entry.getPositions(posIx));
+ }
+ writer.endArray();
+ writer.endObject();
+ }
+ writer.endArray();
+ }
+
+ }
http://git-wip-us.apache.org/repos/asf/hive/blob/e6b1556e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/e6b1556e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/e6b1556e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/e6b1556e/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/e6b1556e/ql/src/test/results/clientpositive/tez/mrr.q.out
----------------------------------------------------------------------
diff --cc ql/src/test/results/clientpositive/tez/mrr.q.out
index 8101f3b,d90b27f..1713783
--- a/ql/src/test/results/clientpositive/tez/mrr.q.out
+++ b/ql/src/test/results/clientpositive/tez/mrr.q.out
@@@ -469,19 -468,26 +467,25 @@@ STAGE PLANS
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col1 (type: bigint), _col0 (type: string)
- sort order: ++
- Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col1)
+ keys: _col0 (type: string)
+ mode: complete
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
- key expressions: _col1 (type: bigint)
- sort order: +
++ key expressions: _col1 (type: bigint), _col0 (type: string)
++ sort order: ++
+ Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string)
Reducer 4
Reduce Operator Tree:
Select Operator
- expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint)
+ expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: bigint)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@@ -501,734 -507,739 +505,738 @@@ POSTHOOK: query: SELECT s2.key, count(d
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
-98 1
-97 1
-96 1
-95 1
-92 1
-90 1
-9 1
-87 1
-86 1
-85 1
-84 1
-83 1
-82 1
-80 1
-8 1
-78 1
-77 1
-76 1
-74 1
-72 1
-70 1
-69 1
-67 1
-66 1
-65 1
-64 1
-58 1
-57 1
-54 1
-53 1
-51 1
-5 1
-498 1
-497 1
-496 1
-495 1
-494 1
-493 1
-492 1
-491 1
-490 1
-489 1
-487 1
-485 1
-484 1
-483 1
-482 1
-481 1
-480 1
-479 1
-478 1
-477 1
-475 1
-472 1
-470 1
-47 1
-469 1
-468 1
-467 1
-466 1
-463 1
-462 1
-460 1
-459 1
-458 1
-457 1
-455 1
-454 1
-453 1
-452 1
-449 1
-448 1
-446 1
-444 1
-443 1
-44 1
-439 1
-438 1
-437 1
-436 1
-435 1
-432 1
-431 1
-430 1
-43 1
-429 1
-427 1
-424 1
-421 1
-42 1
-419 1
-418 1
-417 1
-414 1
-413 1
-411 1
-41 1
-409 1
-407 1
-406 1
-404 1
-403 1
-402 1
-401 1
-400 1
-4 1
-399 1
-397 1
-396 1
-395 1
-394 1
-393 1
-392 1
-389 1
-386 1
-384 1
-382 1
-379 1
-378 1
-377 1
-375 1
-374 1
-373 1
-37 1
-369 1
-368 1
-367 1
-366 1
-365 1
-364 1
-362 1
-360 1
-356 1
-353 1
-351 1
-35 1
-348 1
-345 1
-344 1
-342 1
-341 1
-34 1
-339 1
-338 1
-336 1
-335 1
-333 1
-332 1
-331 1
-33 1
-327 1
-325 1
-323 1
-322 1
-321 1
-318 1
-317 1
-316 1
-315 1
-311 1
-310 1
-309 1
-308 1
-307 1
-306 1
-305 1
-302 1
-30 1
-298 1
-296 1
-292 1
-291 1
-289 1
-288 1
-287 1
-286 1
-285 1
-284 1
-283 1
-282 1
-281 1
-280 1
-28 1
-278 1
-277 1
-275 1
-274 1
-273 1
-272 1
-27 1
-266 1
-265 1
-263 1
-262 1
-260 1
-26 1
-258 1
-257 1
-256 1
-255 1
-252 1
-249 1
-248 1
-247 1
-244 1
-242 1
-241 1
-24 1
-239 1
-238 1
-237 1
-235 1
-233 1
-230 1
-229 1
-228 1
-226 1
-224 1
-223 1
-222 1
-221 1
-219 1
-218 1
-217 1
-216 1
-214 1
-213 1
-209 1
-208 1
-207 1
-205 1
-203 1
-202 1
-201 1
-200 1
-20 1
-2 1
-199 1
-197 1
-196 1
-195 1
-194 1
-193 1
-192 1
-191 1
-190 1
-19 1
-189 1
-187 1
-186 1
-183 1
-181 1
-180 1
-18 1
-179 1
-178 1
-177 1
-176 1
-175 1
-174 1
-172 1
-170 1
-17 1
-169 1
-168 1
-167 1
-166 1
-165 1
-164 1
-163 1
-162 1
-160 1
-158 1
-157 1
-156 1
-155 1
-153 1
-152 1
-150 1
-15 1
-149 1
-146 1
-145 1
-143 1
-138 1
-137 1
-136 1
-134 1
-133 1
-131 1
-129 1
-128 1
-126 1
-125 1
-120 1
-12 1
-119 1
-118 1
-116 1
-114 1
-113 1
-111 1
-11 1
-105 1
-104 1
-103 1
-100 1
-10 1
0 1
-PREHOOK: query: -- same query with broadcast join
-EXPLAIN SELECT s2.key, count(distinct s2.value) as cnt FROM src s1 join src s2 on (s1.key = s2.key) GROUP BY s2.key ORDER BY cnt
-PREHOOK: type: QUERY
-POSTHOOK: query: -- same query with broadcast join
-EXPLAIN SELECT s2.key, count(distinct s2.value) as cnt FROM src s1 join src s2 on (s1.key = s2.key) GROUP BY s2.key ORDER BY cnt
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
- Edges:
- Map 1 <- Map 4 (BROADCAST_EDGE)
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
- Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: s1
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col1
- input vertices:
- 1 Map 4
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- HybridGraceHashJoin: true
- Group By Operator
- keys: _col0 (type: string), _col1 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: string)
- sort order: ++
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- Map 4
- Map Operator Tree:
- TableScan
- alias: s1
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Reducer 2
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: string), KEY._col1 (type: string)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(_col1)
- keys: _col0 (type: string)
- mode: complete
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col1 (type: bigint)
- sort order: +
- Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string)
- Reducer 3
- Reduce Operator Tree:
- Select Operator
- expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT s2.key, count(distinct s2.value) as cnt FROM src s1 join src s2 on (s1.key = s2.key) GROUP BY s2.key ORDER BY cnt
-PREHOOK: type: QUERY
-PREHOOK: Input: default@src
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT s2.key, count(distinct s2.value) as cnt FROM src s1 join src s2 on (s1.key = s2.key) GROUP BY s2.key ORDER BY cnt
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
-#### A masked pattern was here ####
-98 1
-97 1
-96 1
-95 1
-92 1
-90 1
-9 1
-87 1
-86 1
-85 1
-84 1
-83 1
-82 1
-80 1
-8 1
-78 1
-77 1
-76 1
-74 1
-72 1
-70 1
-69 1
-67 1
-66 1
-65 1
-64 1
-58 1
-57 1
-54 1
-53 1
-51 1
-5 1
-498 1
-497 1
-496 1
-495 1
-494 1
-493 1
-492 1
-491 1
-490 1
-489 1
-487 1
-485 1
-484 1
-483 1
-482 1
-481 1
-480 1
-479 1
-478 1
-477 1
-475 1
-472 1
-470 1
-47 1
-469 1
-468 1
-467 1
-466 1
-463 1
-462 1
-460 1
-459 1
-458 1
-457 1
-455 1
-454 1
-453 1
-452 1
-449 1
-448 1
-446 1
-444 1
-443 1
-44 1
-439 1
-438 1
-437 1
-436 1
-435 1
-432 1
-431 1
-430 1
-43 1
-429 1
-427 1
-424 1
-421 1
-42 1
-419 1
-418 1
-417 1
-414 1
-413 1
-411 1
-41 1
-409 1
-407 1
-406 1
-404 1
-403 1
-402 1
-401 1
-400 1
-4 1
-399 1
-397 1
-396 1
-395 1
-394 1
-393 1
-392 1
-389 1
-386 1
-384 1
-382 1
-379 1
-378 1
-377 1
-375 1
-374 1
-373 1
-37 1
-369 1
-368 1
-367 1
-366 1
-365 1
-364 1
-362 1
-360 1
-356 1
-353 1
-351 1
-35 1
-348 1
-345 1
-344 1
-342 1
-341 1
-34 1
-339 1
-338 1
-336 1
-335 1
-333 1
-332 1
-331 1
-33 1
-327 1
-325 1
-323 1
-322 1
-321 1
-318 1
-317 1
-316 1
-315 1
-311 1
-310 1
-309 1
-308 1
-307 1
-306 1
-305 1
-302 1
-30 1
-298 1
-296 1
-292 1
-291 1
-289 1
-288 1
-287 1
-286 1
-285 1
-284 1
-283 1
-282 1
-281 1
-280 1
-28 1
-278 1
-277 1
-275 1
-274 1
-273 1
-272 1
-27 1
-266 1
-265 1
-263 1
-262 1
-260 1
-26 1
-258 1
-257 1
-256 1
-255 1
-252 1
-249 1
-248 1
-247 1
-244 1
-242 1
-241 1
-24 1
-239 1
-238 1
-237 1
-235 1
-233 1
-230 1
-229 1
-228 1
-226 1
-224 1
-223 1
-222 1
-221 1
-219 1
-218 1
-217 1
-216 1
-214 1
-213 1
-209 1
-208 1
-207 1
-205 1
-203 1
-202 1
-201 1
-200 1
-20 1
-2 1
-199 1
-197 1
-196 1
-195 1
-194 1
-193 1
-192 1
-191 1
-190 1
-19 1
-189 1
-187 1
-186 1
-183 1
-181 1
-180 1
-18 1
-179 1
-178 1
-177 1
-176 1
-175 1
-174 1
-172 1
-170 1
-17 1
-169 1
-168 1
-167 1
-166 1
-165 1
-164 1
-163 1
-162 1
-160 1
-158 1
-157 1
-156 1
-155 1
-153 1
-152 1
-150 1
-15 1
-149 1
-146 1
-145 1
-143 1
-138 1
-137 1
-136 1
-134 1
-133 1
-131 1
-129 1
-128 1
-126 1
-125 1
-120 1
-12 1
-119 1
-118 1
-116 1
-114 1
-113 1
-111 1
-11 1
-105 1
-104 1
-103 1
-100 1
10 1
-0 1
+100 1
+103 1
+104 1
+105 1
+11 1
+111 1
+113 1
+114 1
+116 1
+118 1
+119 1
+12 1
+120 1
+125 1
+126 1
+128 1
+129 1
+131 1
+133 1
+134 1
+136 1
+137 1
+138 1
+143 1
+145 1
+146 1
+149 1
+15 1
+150 1
+152 1
+153 1
+155 1
+156 1
+157 1
+158 1
+160 1
+162 1
+163 1
+164 1
+165 1
+166 1
+167 1
+168 1
+169 1
+17 1
+170 1
+172 1
+174 1
+175 1
+176 1
+177 1
+178 1
+179 1
+18 1
+180 1
+181 1
+183 1
+186 1
+187 1
+189 1
+19 1
+190 1
+191 1
+192 1
+193 1
+194 1
+195 1
+196 1
+197 1
+199 1
+2 1
+20 1
+200 1
+201 1
+202 1
+203 1
+205 1
+207 1
+208 1
+209 1
+213 1
+214 1
+216 1
+217 1
+218 1
+219 1
+221 1
+222 1
+223 1
+224 1
+226 1
+228 1
+229 1
+230 1
+233 1
+235 1
+237 1
+238 1
+239 1
+24 1
+241 1
+242 1
+244 1
+247 1
+248 1
+249 1
+252 1
+255 1
+256 1
+257 1
+258 1
+26 1
+260 1
+262 1
+263 1
+265 1
+266 1
+27 1
+272 1
+273 1
+274 1
+275 1
+277 1
+278 1
+28 1
+280 1
+281 1
+282 1
+283 1
+284 1
+285 1
+286 1
+287 1
+288 1
+289 1
+291 1
+292 1
+296 1
+298 1
+30 1
+302 1
+305 1
+306 1
+307 1
+308 1
+309 1
+310 1
+311 1
+315 1
+316 1
+317 1
+318 1
+321 1
+322 1
+323 1
+325 1
+327 1
+33 1
+331 1
+332 1
+333 1
+335 1
+336 1
+338 1
+339 1
+34 1
+341 1
+342 1
+344 1
+345 1
+348 1
+35 1
+351 1
+353 1
+356 1
+360 1
+362 1
+364 1
+365 1
+366 1
+367 1
+368 1
+369 1
+37 1
+373 1
+374 1
+375 1
+377 1
+378 1
+379 1
+382 1
+384 1
+386 1
+389 1
+392 1
+393 1
+394 1
+395 1
+396 1
+397 1
+399 1
+4 1
+400 1
+401 1
+402 1
+403 1
+404 1
+406 1
+407 1
+409 1
+41 1
+411 1
+413 1
+414 1
+417 1
+418 1
+419 1
+42 1
+421 1
+424 1
+427 1
+429 1
+43 1
+430 1
+431 1
+432 1
+435 1
+436 1
+437 1
+438 1
+439 1
+44 1
+443 1
+444 1
+446 1
+448 1
+449 1
+452 1
+453 1
+454 1
+455 1
+457 1
+458 1
+459 1
+460 1
+462 1
+463 1
+466 1
+467 1
+468 1
+469 1
+47 1
+470 1
+472 1
+475 1
+477 1
+478 1
+479 1
+480 1
+481 1
+482 1
+483 1
+484 1
+485 1
+487 1
+489 1
+490 1
+491 1
+492 1
+493 1
+494 1
+495 1
+496 1
+497 1
+498 1
+5 1
+51 1
+53 1
+54 1
+57 1
+58 1
+64 1
+65 1
+66 1
+67 1
+69 1
+70 1
+72 1
+74 1
+76 1
+77 1
+78 1
+8 1
+80 1
+82 1
+83 1
+84 1
+85 1
+86 1
+87 1
+9 1
+90 1
+92 1
+95 1
+96 1
+97 1
+98 1
+PREHOOK: query: -- same query with broadcast join
+EXPLAIN SELECT s2.key, count(distinct s2.value) as cnt FROM src s1 join src s2 on (s1.key = s2.key) GROUP BY s2.key ORDER BY cnt,s2.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- same query with broadcast join
+EXPLAIN SELECT s2.key, count(distinct s2.value) as cnt FROM src s1 join src s2 on (s1.key = s2.key) GROUP BY s2.key ORDER BY cnt,s2.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Map 1 <- Map 4 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: s1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1
+ input vertices:
+ 1 Map 4
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Group By Operator
- aggregations: count(DISTINCT _col1)
+ keys: _col0 (type: string), _col1 (type: string)
+ mode: hash
- outputColumnNames: _col0, _col1, _col2
++ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: s1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
- aggregations: count(DISTINCT KEY._col1:0._col0)
- keys: KEY._col0 (type: string)
++ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col1 (type: bigint), _col0 (type: string)
- sort order: ++
- Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
++ Group By Operator
++ aggregations: count(_col1)
++ keys: _col0 (type: string)
++ mode: complete
++ outputColumnNames: _col0, _col1
++ Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE
++ Reduce Output Operator
++ key expressions: _col1 (type: bigint), _col0 (type: string)
++ sort order: ++
++ Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE
+ Reducer 3
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: bigint)
+ outputColumnNames: _col0, _col1
- Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
++ Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
- Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
++ Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT s2.key, count(distinct s2.value) as cnt FROM src s1 join src s2 on (s1.key = s2.key) GROUP BY s2.key ORDER BY cnt,s2.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT s2.key, count(distinct s2.value) as cnt FROM src s1 join src s2 on (s1.key = s2.key) GROUP BY s2.key ORDER BY cnt,s2.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 1
+10 1
+100 1
+103 1
+104 1
+105 1
+11 1
+111 1
+113 1
+114 1
+116 1
+118 1
+119 1
+12 1
+120 1
+125 1
+126 1
+128 1
+129 1
+131 1
+133 1
+134 1
+136 1
+137 1
+138 1
+143 1
+145 1
+146 1
+149 1
+15 1
+150 1
+152 1
+153 1
+155 1
+156 1
+157 1
+158 1
+160 1
+162 1
+163 1
+164 1
+165 1
+166 1
+167 1
+168 1
+169 1
+17 1
+170 1
+172 1
+174 1
+175 1
+176 1
+177 1
+178 1
+179 1
+18 1
+180 1
+181 1
+183 1
+186 1
+187 1
+189 1
+19 1
+190 1
+191 1
+192 1
+193 1
+194 1
+195 1
+196 1
+197 1
+199 1
+2 1
+20 1
+200 1
+201 1
+202 1
+203 1
+205 1
+207 1
+208 1
+209 1
+213 1
+214 1
+216 1
+217 1
+218 1
+219 1
+221 1
+222 1
+223 1
+224 1
+226 1
+228 1
+229 1
+230 1
+233 1
+235 1
+237 1
+238 1
+239 1
+24 1
+241 1
+242 1
+244 1
+247 1
+248 1
+249 1
+252 1
+255 1
+256 1
+257 1
+258 1
+26 1
+260 1
+262 1
+263 1
+265 1
+266 1
+27 1
+272 1
+273 1
+274 1
+275 1
+277 1
+278 1
+28 1
+280 1
+281 1
+282 1
+283 1
+284 1
+285 1
+286 1
+287 1
+288 1
+289 1
+291 1
+292 1
+296 1
+298 1
+30 1
+302 1
+305 1
+306 1
+307 1
+308 1
+309 1
+310 1
+311 1
+315 1
+316 1
+317 1
+318 1
+321 1
+322 1
+323 1
+325 1
+327 1
+33 1
+331 1
+332 1
+333 1
+335 1
+336 1
+338 1
+339 1
+34 1
+341 1
+342 1
+344 1
+345 1
+348 1
+35 1
+351 1
+353 1
+356 1
+360 1
+362 1
+364 1
+365 1
+366 1
+367 1
+368 1
+369 1
+37 1
+373 1
+374 1
+375 1
+377 1
+378 1
+379 1
+382 1
+384 1
+386 1
+389 1
+392 1
+393 1
+394 1
+395 1
+396 1
+397 1
+399 1
+4 1
+400 1
+401 1
+402 1
+403 1
+404 1
+406 1
+407 1
+409 1
+41 1
+411 1
+413 1
+414 1
+417 1
+418 1
+419 1
+42 1
+421 1
+424 1
+427 1
+429 1
+43 1
+430 1
+431 1
+432 1
+435 1
+436 1
+437 1
+438 1
+439 1
+44 1
+443 1
+444 1
+446 1
+448 1
+449 1
+452 1
+453 1
+454 1
+455 1
+457 1
+458 1
+459 1
+460 1
+462 1
+463 1
+466 1
+467 1
+468 1
+469 1
+47 1
+470 1
+472 1
+475 1
+477 1
+478 1
+479 1
+480 1
+481 1
+482 1
+483 1
+484 1
+485 1
+487 1
+489 1
+490 1
+491 1
+492 1
+493 1
+494 1
+495 1
+496 1
+497 1
+498 1
+5 1
+51 1
+53 1
+54 1
+57 1
+58 1
+64 1
+65 1
+66 1
+67 1
+69 1
+70 1
+72 1
+74 1
+76 1
+77 1
+78 1
+8 1
+80 1
+82 1
+83 1
+84 1
+85 1
+86 1
+87 1
+9 1
+90 1
+92 1
+95 1
+96 1
+97 1
+98 1
PREHOOK: query: -- query with multiple branches in the task dag
EXPLAIN
SELECT *
http://git-wip-us.apache.org/repos/asf/hive/blob/e6b1556e/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/e6b1556e/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java
----------------------------------------------------------------------