You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2015/05/12 03:24:25 UTC
[39/39] hive git commit: Merge from trunk to llap - 05/11/2015 (Prasanth Jayachandran)

Merge from trunk to llap - 05/11/2015 (Prasanth Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e6b1556e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e6b1556e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e6b1556e

Branch: refs/heads/llap
Commit: e6b1556e39f81dc2861f612733b2ba61c17ff698
Parents: dc7ceb4 433714f
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Mon May 11 18:23:08 2015 -0700
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Mon May 11 18:23:08 2015 -0700

----------------------------------------------------------------------
 README.txt                                      |    32 +-
 RELEASE_NOTES.txt                               |   459 +
 bin/ext/orcfiledump.sh                          |     9 +-
 .../apache/hadoop/hive/common/FileUtils.java    |   155 +-
 .../org/apache/hadoop/hive/conf/HiveConf.java   |    18 +-
 data/files/tjoin1.txt                           |     3 +
 data/files/tjoin2.txt                           |     4 +
 .../hive/hcatalog/common/HiveClientCache.java   |     9 +-
 .../hcatalog/mapreduce/TestPassProperties.java  |     5 +-
 .../templeton/tool/TempletonControllerJob.java  |     7 +-
 .../test/resources/testconfiguration.properties |     5 +-
 .../upgrade/mysql/hive-schema-1.2.0.mysql.sql   |     2 +-
 .../upgrade/mysql/hive-schema-1.3.0.mysql.sql   |     2 +-
 .../hive/metastore/AggregateStatsCache.java     |    33 +-
 .../hadoop/hive/metastore/HiveMetaStore.java    |    27 +-
 .../hive/metastore/MetaStoreDirectSql.java      |    24 +-
 .../hive/metastore/RetryingMetaStoreClient.java |    60 +-
 .../hadoop/hive/metastore/txn/TxnHandler.java   |    35 +-
 .../hive/metastore/TestHiveMetastoreCli.java    |    63 +
 .../hive/metastore/txn/TestTxnHandler.java      |    39 +-
 .../java/org/apache/hadoop/hive/ql/Context.java |    10 +-
 .../hive/ql/exec/CommonMergeJoinOperator.java   |    54 +-
 .../hadoop/hive/ql/exec/MapJoinOperator.java    |     1 -
 .../hadoop/hive/ql/exec/ReduceSinkOperator.java |     3 +-
 .../apache/hadoop/hive/ql/exec/Registry.java    |    29 +-
 .../apache/hadoop/hive/ql/exec/Utilities.java   |     5 +
 .../persistence/HybridHashTableContainer.java   |     2 +-
 .../exec/vector/VectorMapJoinBaseOperator.java  |   185 +
 .../ql/exec/vector/VectorMapJoinOperator.java   |   132 +-
 .../VectorMapJoinOuterFilteredOperator.java     |   122 +
 .../mapjoin/VectorMapJoinCommonOperator.java    |     5 +-
 .../VectorMapJoinGenerateResultOperator.java    |     5 +
 .../hive/ql/io/orc/ColumnStatisticsImpl.java    |    16 +-
 .../ql/io/orc/ConversionTreeReaderFactory.java  |    38 +
 .../apache/hadoop/hive/ql/io/orc/FileDump.java  |    91 +-
 .../hadoop/hive/ql/io/orc/JsonFileDump.java     |   365 +
 .../hadoop/hive/ql/io/orc/OrcInputFormat.java   |     8 +-
 .../hadoop/hive/ql/io/orc/OrcRecordUpdater.java |    24 +-
 .../hive/ql/io/orc/RecordReaderFactory.java     |   269 +
 .../hadoop/hive/ql/io/orc/RecordReaderImpl.java |     2 +-
 .../ql/lockmgr/zookeeper/ZooKeeperHiveLock.java |    22 +
 .../optimizer/ConstantPropagateProcFactory.java |    83 +-
 .../ql/optimizer/IdentityProjectRemover.java    |    12 +
 .../ql/optimizer/NonBlockingOpDeDupProc.java    |    11 +
 .../hadoop/hive/ql/optimizer/Optimizer.java     |     8 +-
 .../ql/optimizer/calcite/cost/HiveCost.java     |    16 +-
 .../rules/HiveExpandDistinctAggregatesRule.java |   278 +
 .../translator/HiveOpConverterPostProc.java     |    10 +
 .../hive/ql/optimizer/physical/Vectorizer.java  |    23 +-
 .../hadoop/hive/ql/parse/CalcitePlanner.java    |    14 +-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |    15 +-
 .../apache/hadoop/hive/ql/plan/PlanUtils.java   |     4 +
 .../StorageBasedAuthorizationProvider.java      |   114 +-
 .../hadoop/hive/ql/session/SessionState.java    |     4 +-
 .../hadoop/hive/ql/txn/compactor/Cleaner.java   |    20 +-
 .../hive/ql/txn/compactor/CompactorThread.java  |    12 +-
 .../hadoop/hive/ql/txn/compactor/Initiator.java |    11 +-
 .../hadoop/hive/ql/txn/compactor/Worker.java    |    12 +
 .../hadoop/hive/ql/io/orc/TestJsonFileDump.java |   138 +
 .../hadoop/hive/ql/plan/TestViewEntity.java     |   108 +
 .../hive/ql/txn/compactor/TestCleaner.java      |    56 +-
 .../hive/ql/txn/compactor/TestInitiator.java    |    63 +-
 .../hive/ql/txn/compactor/TestWorker.java       |    45 +
 .../test/queries/clientpositive/bucket_many.q   |    16 +
 .../test/queries/clientpositive/explainuser_2.q |     1 +
 .../extrapolate_part_stats_partial.q            |     2 +
 .../extrapolate_part_stats_partial_ndv.q        |     2 +
 ql/src/test/queries/clientpositive/fold_case.q  |    12 +
 ql/src/test/queries/clientpositive/fold_when.q  |    31 +
 .../queries/clientpositive/mapjoin_mapjoin.q    |     1 +
 ql/src/test/queries/clientpositive/mergejoin.q  |    17 +
 .../clientpositive/orc_int_type_promotion.q     |    79 +
 .../clientpositive/vector_left_outer_join2.q    |    62 +
 .../clientpositive/vector_leftsemi_mapjoin.q    |   403 +
 ql/src/test/resources/orc-file-dump.json        |  1354 ++
 .../annotate_stats_join_pkfk.q.out              |    20 +-
 .../results/clientpositive/bucket_many.q.out    |   230 +
 .../encryption_insert_partition_static.q.out    |    14 +-
 .../test/results/clientpositive/fold_case.q.out |   301 +
 .../test/results/clientpositive/fold_when.q.out |   480 +
 ql/src/test/results/clientpositive/join32.q.out |    84 +-
 .../clientpositive/join32_lessSize.q.out        |   423 +-
 ql/src/test/results/clientpositive/join33.q.out |    84 +-
 .../clientpositive/join_alt_syntax.q.out        |   306 +-
 .../clientpositive/join_cond_pushdown_2.q.out   |   150 +-
 .../clientpositive/join_cond_pushdown_4.q.out   |   150 +-
 .../test/results/clientpositive/mergejoin.q.out |   844 +-
 .../clientpositive/orc_int_type_promotion.q.out |   377 +
 .../ql_rewrite_gbtoidx_cbo_2.q.out              |    14 +-
 .../results/clientpositive/spark/cbo_gby.q.out  |     4 +-
 .../clientpositive/spark/cbo_udf_udaf.q.out     |     2 +-
 ...pby_complex_types_multi_single_reducer.q.out |    38 +-
 .../results/clientpositive/spark/join32.q.out   |    88 +-
 .../clientpositive/spark/join32_lessSize.q.out  |   286 +-
 .../results/clientpositive/spark/join33.q.out   |    88 +-
 .../clientpositive/spark/join_alt_syntax.q.out  |   210 +-
 .../spark/join_cond_pushdown_2.q.out            |    98 +-
 .../spark/join_cond_pushdown_4.q.out            |    98 +-
 .../spark/lateral_view_explode2.q.out           |     4 +-
 .../clientpositive/spark/union_remove_25.q.out  |     2 +-
 .../clientpositive/spark/union_top_level.q.out  |    16 +-
 .../spark/vector_cast_constant.q.java1.7.out    |    16 +-
 .../spark/vector_cast_constant.q.java1.8.out    |    16 +-
 .../spark/vectorized_timestamp_funcs.q.out      |     4 +-
 .../clientpositive/tez/auto_join29.q.out        |   500 +
 .../clientpositive/tez/explainuser_2.q.out      |  1529 +-
 .../clientpositive/tez/limit_pushdown.q.out     |    31 +-
 .../results/clientpositive/tez/mergejoin.q.out  |   844 +-
 .../test/results/clientpositive/tez/mrr.q.out   |    48 +-
 .../tez/vector_count_distinct.q.out             |    28 +-
 .../tez/vector_left_outer_join2.q.out           |   553 +
 .../tez/vector_leftsemi_mapjoin.q.out           | 13807 +++++++++++++++++
 .../tez/vectorization_limit.q.out               |    31 +-
 .../tez/vectorized_distinct_gby.q.out           |    51 +-
 .../vector_left_outer_join2.q.out               |   568 +
 .../vector_leftsemi_mapjoin.q.out               | 13572 ++++++++++++++++
 .../apache/hive/service/cli/ColumnValue.java    |     2 +
 .../apache/hadoop/hive/shims/Hadoop23Shims.java |    29 +-
 .../org/apache/hadoop/fs/DefaultFileAccess.java |    65 +-
 .../apache/hadoop/hive/shims/HadoopShims.java   |    24 +-
 .../hadoop/hive/shims/HadoopShimsSecure.java    |     8 -
 testutils/metastore/execute-test-on-lxc.sh      |     7 +-
 .../org/apache/hive/ptest/execution/PTest.java  |    12 +-
 .../ptest/execution/conf/TestConfiguration.java |    12 +-
 .../execution/conf/TestTestConfiguration.java   |    38 +-
 .../resources/test-configuration.properties     |     2 +
 126 files changed, 38732 insertions(+), 2892 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/e6b1556e/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --cc common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index bfc5172,eff4d30..f4a70b2
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@@ -1020,10 -1021,8 +1021,10 @@@ public class HiveConf extends Configura
      HIVE_ORC_INCLUDE_FILE_FOOTER_IN_SPLITS("hive.orc.splits.include.file.footer", false,
          "If turned on splits generated by orc will include metadata about the stripes in the file. This\n" +
          "data is read remotely (from the client or HS2 machine) and sent to all the tasks."),
 +    HIVE_ORC_INCLUDE_FILE_ID_IN_SPLITS("hive.orc.splits.include.fileid", true,
 +        "Include file ID in splits on file systems thaty support it."),
      HIVE_ORC_CACHE_STRIPE_DETAILS_SIZE("hive.orc.cache.stripe.details.size", 10000,
-         "Cache size for keeping meta info about orc splits cached in the client."),
+         "Max cache size for keeping meta info about orc splits cached in the client."),
      HIVE_ORC_COMPUTE_SPLITS_NUM_THREADS("hive.orc.compute.splits.num.threads", 10,
          "How many threads orc should use to create splits in parallel."),
      HIVE_ORC_SKIP_CORRUPT_DATA("hive.exec.orc.skip.corrupt.data", false,

http://git-wip-us.apache.org/repos/asf/hive/blob/e6b1556e/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/hive/blob/e6b1556e/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/hive/blob/e6b1556e/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/hive/blob/e6b1556e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/hive/blob/e6b1556e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/JsonFileDump.java
----------------------------------------------------------------------
diff --cc ql/src/java/org/apache/hadoop/hive/ql/io/orc/JsonFileDump.java
index 0000000,c33004e..a438855
mode 000000,100644..100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/JsonFileDump.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/JsonFileDump.java
@@@ -1,0 -1,365 +1,365 @@@
+ /**
+  * Licensed to the Apache Software Foundation (ASF) under one
+  * or more contributor license agreements.  See the NOTICE file
+  * distributed with this work for additional information
+  * regarding copyright ownership.  The ASF licenses this file
+  * to you under the Apache License, Version 2.0 (the
+  * "License"); you may not use this file except in compliance
+  * with the License.  You may obtain a copy of the License at
+  * <p/>
+  * http://www.apache.org/licenses/LICENSE-2.0
+  * <p/>
+  * Unless required by applicable law or agreed to in writing, software
+  * distributed under the License is distributed on an "AS IS" BASIS,
+  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  * See the License for the specific language governing permissions and
+  * limitations under the License.
+  */
+ package org.apache.hadoop.hive.ql.io.orc;
+ 
+ import java.io.IOException;
+ import java.util.List;
+ 
+ import org.apache.hadoop.conf.Configuration;
+ import org.apache.hadoop.fs.FileSystem;
+ import org.apache.hadoop.fs.Path;
+ import org.apache.hadoop.hive.ql.io.filters.BloomFilterIO;
+ import org.codehaus.jettison.json.JSONException;
+ import org.codehaus.jettison.json.JSONObject;
+ import org.codehaus.jettison.json.JSONArray;
+ import org.codehaus.jettison.json.JSONStringer;
+ import org.codehaus.jettison.json.JSONWriter;
+ 
+ /**
+  * File dump tool with json formatted output.
+  */
+ public class JsonFileDump {
+ 
+   public static void printJsonMetaData(List<String> files, Configuration conf,
+       List<Integer> rowIndexCols, boolean prettyPrint, boolean printTimeZone) throws JSONException, IOException {
+     JSONStringer writer = new JSONStringer();
+     boolean multiFile = files.size() > 1;
+     if (multiFile) {
+       writer.array();
+     } else {
+       writer.object();
+     }
+     for (String filename : files) {
+       if (multiFile) {
+         writer.object();
+       }
+       writer.key("fileName").value(filename);
+       Path path = new Path(filename);
+       Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
+       writer.key("fileVersion").value(reader.getFileVersion().getName());
+       writer.key("writerVersion").value(reader.getWriterVersion());
+       RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
+       writer.key("numberOfRows").value(reader.getNumberOfRows());
+       writer.key("compression").value(reader.getCompression());
+       if (reader.getCompression() != CompressionKind.NONE) {
+         writer.key("compressionBufferSize").value(reader.getCompressionSize());
+       }
+       writer.key("schemaString").value(reader.getObjectInspector().getTypeName());
+       writer.key("schema").array();
+       writeSchema(writer, reader.getTypes());
+       writer.endArray();
+ 
+       writer.key("stripeStatistics").array();
 -      Metadata metadata = reader.getMetadata();
 -      for (int n = 0; n < metadata.getStripeStatistics().size(); n++) {
++      List<StripeStatistics> stripeStatistics = reader.getStripeStatistics();
++      for (int n = 0; n < stripeStatistics.size(); n++) {
+         writer.object();
+         writer.key("stripeNumber").value(n + 1);
 -        StripeStatistics ss = metadata.getStripeStatistics().get(n);
++        StripeStatistics ss = stripeStatistics.get(n);
+         writer.key("columnStatistics").array();
+         for (int i = 0; i < ss.getColumnStatistics().length; i++) {
+           writer.object();
+           writer.key("columnId").value(i);
+           writeColumnStatistics(writer, ss.getColumnStatistics()[i]);
+           writer.endObject();
+         }
+         writer.endArray();
+         writer.endObject();
+       }
+       writer.endArray();
+ 
+       ColumnStatistics[] stats = reader.getStatistics();
+       int colCount = stats.length;
+       writer.key("fileStatistics").array();
+       for (int i = 0; i < stats.length; ++i) {
+         writer.object();
+         writer.key("columnId").value(i);
+         writeColumnStatistics(writer, stats[i]);
+         writer.endObject();
+       }
+       writer.endArray();
+ 
+       writer.key("stripes").array();
+       int stripeIx = -1;
+       for (StripeInformation stripe : reader.getStripes()) {
+         ++stripeIx;
+         long stripeStart = stripe.getOffset();
+         OrcProto.StripeFooter footer = rows.readStripeFooter(stripe);
+         writer.object(); // start of stripe information
+         writer.key("stripeNumber").value(stripeIx + 1);
+         writer.key("stripeInformation");
+         writeStripeInformation(writer, stripe);
+         if (printTimeZone) {
+           writer.key("writerTimezone").value(
+               footer.hasWriterTimezone() ? footer.getWriterTimezone() : FileDump.UNKNOWN);
+         }
+         long sectionStart = stripeStart;
+ 
+         writer.key("streams").array();
+         for (OrcProto.Stream section : footer.getStreamsList()) {
+           writer.object();
+           String kind = section.hasKind() ? section.getKind().name() : FileDump.UNKNOWN;
+           writer.key("columnId").value(section.getColumn());
+           writer.key("section").value(kind);
+           writer.key("startOffset").value(sectionStart);
+           writer.key("length").value(section.getLength());
+           sectionStart += section.getLength();
+           writer.endObject();
+         }
+         writer.endArray();
+ 
+         writer.key("encodings").array();
+         for (int i = 0; i < footer.getColumnsCount(); ++i) {
+           writer.object();
+           OrcProto.ColumnEncoding encoding = footer.getColumns(i);
+           writer.key("columnId").value(i);
+           writer.key("kind").value(encoding.getKind());
+           if (encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY ||
+               encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) {
+             writer.key("dictionarySize").value(encoding.getDictionarySize());
+           }
+           writer.endObject();
+         }
+         writer.endArray();
+ 
+         if (rowIndexCols != null && !rowIndexCols.isEmpty()) {
+           // include the columns that are specified, only if the columns are included, bloom filter
+           // will be read
+           boolean[] sargColumns = new boolean[colCount];
+           for (int colIdx : rowIndexCols) {
+             sargColumns[colIdx] = true;
+           }
+           RecordReaderImpl.Index indices = rows.readRowIndex(stripeIx, null, sargColumns);
+           writer.key("indexes").array();
+           for (int col : rowIndexCols) {
+             writer.object();
+             writer.key("columnId").value(col);
+             writeRowGroupIndexes(writer, col, indices.getRowGroupIndex());
+             writeBloomFilterIndexes(writer, col, indices.getBloomFilterIndex());
+             writer.endObject();
+           }
+           writer.endArray();
+         }
+         writer.endObject(); // end of stripe information
+       }
+       writer.endArray();
+ 
+       FileSystem fs = path.getFileSystem(conf);
+       long fileLen = fs.getContentSummary(path).getLength();
+       long paddedBytes = FileDump.getTotalPaddingSize(reader);
+       // empty ORC file is ~45 bytes. Assumption here is file length always >0
+       double percentPadding = ((double) paddedBytes / (double) fileLen) * 100;
+       writer.key("fileLength").value(fileLen);
+       writer.key("paddingLength").value(paddedBytes);
+       writer.key("paddingRatio").value(percentPadding);
+       rows.close();
+ 
+       writer.endObject();
+     }
+     if (multiFile) {
+       writer.endArray();
+     }
+ 
+     if (prettyPrint) {
+       final String prettyJson;
+       if (multiFile) {
+         JSONArray jsonArray = new JSONArray(writer.toString());
+         prettyJson = jsonArray.toString(2);
+       } else {
+         JSONObject jsonObject = new JSONObject(writer.toString());
+         prettyJson = jsonObject.toString(2);
+       }
+       System.out.println(prettyJson);
+     } else {
+       System.out.println(writer.toString());
+     }
+   }
+ 
+   private static void writeSchema(JSONStringer writer, List<OrcProto.Type> types)
+       throws JSONException {
+     int i = 0;
+     for(OrcProto.Type type : types) {
+       writer.object();
+       writer.key("columnId").value(i++);
+       writer.key("columnType").value(type.getKind());
+       if (type.getFieldNamesCount() > 0) {
+         writer.key("childColumnNames").array();
+         for (String field : type.getFieldNamesList()) {
+           writer.value(field);
+         }
+         writer.endArray();
+         writer.key("childColumnIds").array();
+         for (Integer colId : type.getSubtypesList()) {
+           writer.value(colId);
+         }
+         writer.endArray();
+       }
+       if (type.hasPrecision()) {
+         writer.key("precision").value(type.getPrecision());
+       }
+ 
+       if (type.hasScale()) {
+         writer.key("scale").value(type.getScale());
+       }
+ 
+       if (type.hasMaximumLength()) {
+         writer.key("maxLength").value(type.getMaximumLength());
+       }
+       writer.endObject();
+     }
+   }
+ 
+   private static void writeStripeInformation(JSONWriter writer, StripeInformation stripe)
+       throws JSONException {
+     writer.object();
+     writer.key("offset").value(stripe.getOffset());
+     writer.key("indexLength").value(stripe.getIndexLength());
+     writer.key("dataLength").value(stripe.getDataLength());
+     writer.key("footerLength").value(stripe.getFooterLength());
+     writer.key("rowCount").value(stripe.getNumberOfRows());
+     writer.endObject();
+   }
+ 
+   private static void writeColumnStatistics(JSONWriter writer, ColumnStatistics cs)
+       throws JSONException {
+     if (cs != null) {
+       writer.key("count").value(cs.getNumberOfValues());
+       writer.key("hasNull").value(cs.hasNull());
+       if (cs instanceof BinaryColumnStatistics) {
+         writer.key("totalLength").value(((BinaryColumnStatistics) cs).getSum());
+         writer.key("type").value(OrcProto.Type.Kind.BINARY);
+       } else if (cs instanceof BooleanColumnStatistics) {
+         writer.key("trueCount").value(((BooleanColumnStatistics) cs).getTrueCount());
+         writer.key("falseCount").value(((BooleanColumnStatistics) cs).getFalseCount());
+         writer.key("type").value(OrcProto.Type.Kind.BOOLEAN);
+       } else if (cs instanceof IntegerColumnStatistics) {
+         writer.key("min").value(((IntegerColumnStatistics) cs).getMinimum());
+         writer.key("max").value(((IntegerColumnStatistics) cs).getMaximum());
+         if (((IntegerColumnStatistics) cs).isSumDefined()) {
+           writer.key("sum").value(((IntegerColumnStatistics) cs).getSum());
+         }
+         writer.key("type").value(OrcProto.Type.Kind.LONG);
+       } else if (cs instanceof DoubleColumnStatistics) {
+         writer.key("min").value(((DoubleColumnStatistics) cs).getMinimum());
+         writer.key("max").value(((DoubleColumnStatistics) cs).getMaximum());
+         writer.key("sum").value(((DoubleColumnStatistics) cs).getSum());
+         writer.key("type").value(OrcProto.Type.Kind.DOUBLE);
+       } else if (cs instanceof StringColumnStatistics) {
+         writer.key("min").value(((StringColumnStatistics) cs).getMinimum());
+         writer.key("max").value(((StringColumnStatistics) cs).getMaximum());
+         writer.key("totalLength").value(((StringColumnStatistics) cs).getSum());
+         writer.key("type").value(OrcProto.Type.Kind.STRING);
+       } else if (cs instanceof DateColumnStatistics) {
+         if (((DateColumnStatistics) cs).getMaximum() != null) {
+           writer.key("min").value(((DateColumnStatistics) cs).getMinimum());
+           writer.key("max").value(((DateColumnStatistics) cs).getMaximum());
+         }
+         writer.key("type").value(OrcProto.Type.Kind.DATE);
+       } else if (cs instanceof TimestampColumnStatistics) {
+         if (((TimestampColumnStatistics) cs).getMaximum() != null) {
+           writer.key("min").value(((TimestampColumnStatistics) cs).getMinimum());
+           writer.key("max").value(((TimestampColumnStatistics) cs).getMaximum());
+         }
+         writer.key("type").value(OrcProto.Type.Kind.TIMESTAMP);
+       } else if (cs instanceof DecimalColumnStatistics) {
+         if (((DecimalColumnStatistics) cs).getMaximum() != null) {
+           writer.key("min").value(((DecimalColumnStatistics) cs).getMinimum());
+           writer.key("max").value(((DecimalColumnStatistics) cs).getMaximum());
+           writer.key("sum").value(((DecimalColumnStatistics) cs).getSum());
+         }
+         writer.key("type").value(OrcProto.Type.Kind.DECIMAL);
+       }
+     }
+   }
+ 
+   private static void writeBloomFilterIndexes(JSONWriter writer, int col,
+       OrcProto.BloomFilterIndex[] bloomFilterIndex) throws JSONException {
+ 
+     BloomFilterIO stripeLevelBF = null;
+     if (bloomFilterIndex != null && bloomFilterIndex[col] != null) {
+       int entryIx = 0;
+       writer.key("bloomFilterIndexes").array();
+       for (OrcProto.BloomFilter bf : bloomFilterIndex[col].getBloomFilterList()) {
+         writer.object();
+         writer.key("entryId").value(entryIx++);
+         BloomFilterIO toMerge = new BloomFilterIO(bf);
+         writeBloomFilterStats(writer, toMerge);
+         if (stripeLevelBF == null) {
+           stripeLevelBF = toMerge;
+         } else {
+           stripeLevelBF.merge(toMerge);
+         }
+         writer.endObject();
+       }
+       writer.endArray();
+     }
+     if (stripeLevelBF != null) {
+       writer.key("stripeLevelBloomFilter");
+       writer.object();
+       writeBloomFilterStats(writer, stripeLevelBF);
+       writer.endObject();
+     }
+   }
+ 
+   private static void writeBloomFilterStats(JSONWriter writer, BloomFilterIO bf)
+       throws JSONException {
+     int bitCount = bf.getBitSize();
+     int popCount = 0;
+     for (long l : bf.getBitSet()) {
+       popCount += Long.bitCount(l);
+     }
+     int k = bf.getNumHashFunctions();
+     float loadFactor = (float) popCount / (float) bitCount;
+     float expectedFpp = (float) Math.pow(loadFactor, k);
+     writer.key("numHashFunctions").value(k);
+     writer.key("bitCount").value(bitCount);
+     writer.key("popCount").value(popCount);
+     writer.key("loadFactor").value(loadFactor);
+     writer.key("expectedFpp").value(expectedFpp);
+   }
+ 
+   private static void writeRowGroupIndexes(JSONWriter writer, int col,
+       OrcProto.RowIndex[] rowGroupIndex)
+       throws JSONException {
+ 
+     OrcProto.RowIndex index;
+     if (rowGroupIndex == null || (col >= rowGroupIndex.length) ||
+         ((index = rowGroupIndex[col]) == null)) {
+       return;
+     }
+ 
+     writer.key("rowGroupIndexes").array();
+     for (int entryIx = 0; entryIx < index.getEntryCount(); ++entryIx) {
+       writer.object();
+       writer.key("entryId").value(entryIx);
+       OrcProto.RowIndexEntry entry = index.getEntry(entryIx);
+       if (entry == null) {
+         continue;
+       }
+       OrcProto.ColumnStatistics colStats = entry.getStatistics();
+       writeColumnStatistics(writer, ColumnStatisticsImpl.deserialize(colStats));
+       writer.key("positions").array();
+       for (int posIx = 0; posIx < entry.getPositionsCount(); ++posIx) {
+         writer.value(entry.getPositions(posIx));
+       }
+       writer.endArray();
+       writer.endObject();
+     }
+     writer.endArray();
+   }
+ 
+ }

http://git-wip-us.apache.org/repos/asf/hive/blob/e6b1556e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/hive/blob/e6b1556e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/hive/blob/e6b1556e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/hive/blob/e6b1556e/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/hive/blob/e6b1556e/ql/src/test/results/clientpositive/tez/mrr.q.out
----------------------------------------------------------------------
diff --cc ql/src/test/results/clientpositive/tez/mrr.q.out
index 8101f3b,d90b27f..1713783
--- a/ql/src/test/results/clientpositive/tez/mrr.q.out
+++ b/ql/src/test/results/clientpositive/tez/mrr.q.out
@@@ -469,19 -468,26 +467,25 @@@ STAGE PLANS
                  mode: mergepartial
                  outputColumnNames: _col0, _col1
                  Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
-                 Reduce Output Operator
-                   key expressions: _col1 (type: bigint), _col0 (type: string)
-                   sort order: ++
-                   Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+                 Group By Operator
+                   aggregations: count(_col1)
+                   keys: _col0 (type: string)
+                   mode: complete
+                   outputColumnNames: _col0, _col1
+                   Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE
+                   Reduce Output Operator
 -                    key expressions: _col1 (type: bigint)
 -                    sort order: +
++                    key expressions: _col1 (type: bigint), _col0 (type: string)
++                    sort order: ++
+                     Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE
 -                    value expressions: _col0 (type: string)
          Reducer 4 
              Reduce Operator Tree:
                Select Operator
 -                expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint)
 +                expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: bigint)
                  outputColumnNames: _col0, _col1
-                 Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+                 Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE
                  File Output Operator
                    compressed: false
-                   Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
+                   Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE
                    table:
                        input format: org.apache.hadoop.mapred.TextInputFormat
                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@@ -501,734 -507,739 +505,738 @@@ POSTHOOK: query: SELECT s2.key, count(d
  POSTHOOK: type: QUERY
  POSTHOOK: Input: default@src
  #### A masked pattern was here ####
 -98	1
 -97	1
 -96	1
 -95	1
 -92	1
 -90	1
 -9	1
 -87	1
 -86	1
 -85	1
 -84	1
 -83	1
 -82	1
 -80	1
 -8	1
 -78	1
 -77	1
 -76	1
 -74	1
 -72	1
 -70	1
 -69	1
 -67	1
 -66	1
 -65	1
 -64	1
 -58	1
 -57	1
 -54	1
 -53	1
 -51	1
 -5	1
 -498	1
 -497	1
 -496	1
 -495	1
 -494	1
 -493	1
 -492	1
 -491	1
 -490	1
 -489	1
 -487	1
 -485	1
 -484	1
 -483	1
 -482	1
 -481	1
 -480	1
 -479	1
 -478	1
 -477	1
 -475	1
 -472	1
 -470	1
 -47	1
 -469	1
 -468	1
 -467	1
 -466	1
 -463	1
 -462	1
 -460	1
 -459	1
 -458	1
 -457	1
 -455	1
 -454	1
 -453	1
 -452	1
 -449	1
 -448	1
 -446	1
 -444	1
 -443	1
 -44	1
 -439	1
 -438	1
 -437	1
 -436	1
 -435	1
 -432	1
 -431	1
 -430	1
 -43	1
 -429	1
 -427	1
 -424	1
 -421	1
 -42	1
 -419	1
 -418	1
 -417	1
 -414	1
 -413	1
 -411	1
 -41	1
 -409	1
 -407	1
 -406	1
 -404	1
 -403	1
 -402	1
 -401	1
 -400	1
 -4	1
 -399	1
 -397	1
 -396	1
 -395	1
 -394	1
 -393	1
 -392	1
 -389	1
 -386	1
 -384	1
 -382	1
 -379	1
 -378	1
 -377	1
 -375	1
 -374	1
 -373	1
 -37	1
 -369	1
 -368	1
 -367	1
 -366	1
 -365	1
 -364	1
 -362	1
 -360	1
 -356	1
 -353	1
 -351	1
 -35	1
 -348	1
 -345	1
 -344	1
 -342	1
 -341	1
 -34	1
 -339	1
 -338	1
 -336	1
 -335	1
 -333	1
 -332	1
 -331	1
 -33	1
 -327	1
 -325	1
 -323	1
 -322	1
 -321	1
 -318	1
 -317	1
 -316	1
 -315	1
 -311	1
 -310	1
 -309	1
 -308	1
 -307	1
 -306	1
 -305	1
 -302	1
 -30	1
 -298	1
 -296	1
 -292	1
 -291	1
 -289	1
 -288	1
 -287	1
 -286	1
 -285	1
 -284	1
 -283	1
 -282	1
 -281	1
 -280	1
 -28	1
 -278	1
 -277	1
 -275	1
 -274	1
 -273	1
 -272	1
 -27	1
 -266	1
 -265	1
 -263	1
 -262	1
 -260	1
 -26	1
 -258	1
 -257	1
 -256	1
 -255	1
 -252	1
 -249	1
 -248	1
 -247	1
 -244	1
 -242	1
 -241	1
 -24	1
 -239	1
 -238	1
 -237	1
 -235	1
 -233	1
 -230	1
 -229	1
 -228	1
 -226	1
 -224	1
 -223	1
 -222	1
 -221	1
 -219	1
 -218	1
 -217	1
 -216	1
 -214	1
 -213	1
 -209	1
 -208	1
 -207	1
 -205	1
 -203	1
 -202	1
 -201	1
 -200	1
 -20	1
 -2	1
 -199	1
 -197	1
 -196	1
 -195	1
 -194	1
 -193	1
 -192	1
 -191	1
 -190	1
 -19	1
 -189	1
 -187	1
 -186	1
 -183	1
 -181	1
 -180	1
 -18	1
 -179	1
 -178	1
 -177	1
 -176	1
 -175	1
 -174	1
 -172	1
 -170	1
 -17	1
 -169	1
 -168	1
 -167	1
 -166	1
 -165	1
 -164	1
 -163	1
 -162	1
 -160	1
 -158	1
 -157	1
 -156	1
 -155	1
 -153	1
 -152	1
 -150	1
 -15	1
 -149	1
 -146	1
 -145	1
 -143	1
 -138	1
 -137	1
 -136	1
 -134	1
 -133	1
 -131	1
 -129	1
 -128	1
 -126	1
 -125	1
 -120	1
 -12	1
 -119	1
 -118	1
 -116	1
 -114	1
 -113	1
 -111	1
 -11	1
 -105	1
 -104	1
 -103	1
 -100	1
 -10	1
  0	1
 -PREHOOK: query: -- same query with broadcast join
 -EXPLAIN SELECT s2.key, count(distinct s2.value) as cnt FROM src s1 join src s2 on (s1.key = s2.key) GROUP BY s2.key ORDER BY cnt
 -PREHOOK: type: QUERY
 -POSTHOOK: query: -- same query with broadcast join
 -EXPLAIN SELECT s2.key, count(distinct s2.value) as cnt FROM src s1 join src s2 on (s1.key = s2.key) GROUP BY s2.key ORDER BY cnt
 -POSTHOOK: type: QUERY
 -STAGE DEPENDENCIES:
 -  Stage-1 is a root stage
 -  Stage-0 depends on stages: Stage-1
 -
 -STAGE PLANS:
 -  Stage: Stage-1
 -    Tez
 -      Edges:
 -        Map 1 <- Map 4 (BROADCAST_EDGE)
 -        Reducer 2 <- Map 1 (SIMPLE_EDGE)
 -        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
 -#### A masked pattern was here ####
 -      Vertices:
 -        Map 1 
 -            Map Operator Tree:
 -                TableScan
 -                  alias: s1
 -                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
 -                  Filter Operator
 -                    predicate: key is not null (type: boolean)
 -                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
 -                    Select Operator
 -                      expressions: key (type: string), value (type: string)
 -                      outputColumnNames: _col0, _col1
 -                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
 -                      Map Join Operator
 -                        condition map:
 -                             Inner Join 0 to 1
 -                        keys:
 -                          0 _col0 (type: string)
 -                          1 _col0 (type: string)
 -                        outputColumnNames: _col0, _col1
 -                        input vertices:
 -                          1 Map 4
 -                        Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
 -                        HybridGraceHashJoin: true
 -                        Group By Operator
 -                          keys: _col0 (type: string), _col1 (type: string)
 -                          mode: hash
 -                          outputColumnNames: _col0, _col1
 -                          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
 -                          Reduce Output Operator
 -                            key expressions: _col0 (type: string), _col1 (type: string)
 -                            sort order: ++
 -                            Map-reduce partition columns: _col0 (type: string)
 -                            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
 -        Map 4 
 -            Map Operator Tree:
 -                TableScan
 -                  alias: s1
 -                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
 -                  Filter Operator
 -                    predicate: key is not null (type: boolean)
 -                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
 -                    Select Operator
 -                      expressions: key (type: string)
 -                      outputColumnNames: _col0
 -                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
 -                      Reduce Output Operator
 -                        key expressions: _col0 (type: string)
 -                        sort order: +
 -                        Map-reduce partition columns: _col0 (type: string)
 -                        Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
 -        Reducer 2 
 -            Reduce Operator Tree:
 -              Group By Operator
 -                keys: KEY._col0 (type: string), KEY._col1 (type: string)
 -                mode: mergepartial
 -                outputColumnNames: _col0, _col1
 -                Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
 -                Group By Operator
 -                  aggregations: count(_col1)
 -                  keys: _col0 (type: string)
 -                  mode: complete
 -                  outputColumnNames: _col0, _col1
 -                  Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE
 -                  Reduce Output Operator
 -                    key expressions: _col1 (type: bigint)
 -                    sort order: +
 -                    Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE
 -                    value expressions: _col0 (type: string)
 -        Reducer 3 
 -            Reduce Operator Tree:
 -              Select Operator
 -                expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint)
 -                outputColumnNames: _col0, _col1
 -                Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE
 -                File Output Operator
 -                  compressed: false
 -                  Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE
 -                  table:
 -                      input format: org.apache.hadoop.mapred.TextInputFormat
 -                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
 -                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 -
 -  Stage: Stage-0
 -    Fetch Operator
 -      limit: -1
 -      Processor Tree:
 -        ListSink
 -
 -PREHOOK: query: SELECT s2.key, count(distinct s2.value) as cnt FROM src s1 join src s2 on (s1.key = s2.key) GROUP BY s2.key ORDER BY cnt
 -PREHOOK: type: QUERY
 -PREHOOK: Input: default@src
 -#### A masked pattern was here ####
 -POSTHOOK: query: SELECT s2.key, count(distinct s2.value) as cnt FROM src s1 join src s2 on (s1.key = s2.key) GROUP BY s2.key ORDER BY cnt
 -POSTHOOK: type: QUERY
 -POSTHOOK: Input: default@src
 -#### A masked pattern was here ####
 -98	1
 -97	1
 -96	1
 -95	1
 -92	1
 -90	1
 -9	1
 -87	1
 -86	1
 -85	1
 -84	1
 -83	1
 -82	1
 -80	1
 -8	1
 -78	1
 -77	1
 -76	1
 -74	1
 -72	1
 -70	1
 -69	1
 -67	1
 -66	1
 -65	1
 -64	1
 -58	1
 -57	1
 -54	1
 -53	1
 -51	1
 -5	1
 -498	1
 -497	1
 -496	1
 -495	1
 -494	1
 -493	1
 -492	1
 -491	1
 -490	1
 -489	1
 -487	1
 -485	1
 -484	1
 -483	1
 -482	1
 -481	1
 -480	1
 -479	1
 -478	1
 -477	1
 -475	1
 -472	1
 -470	1
 -47	1
 -469	1
 -468	1
 -467	1
 -466	1
 -463	1
 -462	1
 -460	1
 -459	1
 -458	1
 -457	1
 -455	1
 -454	1
 -453	1
 -452	1
 -449	1
 -448	1
 -446	1
 -444	1
 -443	1
 -44	1
 -439	1
 -438	1
 -437	1
 -436	1
 -435	1
 -432	1
 -431	1
 -430	1
 -43	1
 -429	1
 -427	1
 -424	1
 -421	1
 -42	1
 -419	1
 -418	1
 -417	1
 -414	1
 -413	1
 -411	1
 -41	1
 -409	1
 -407	1
 -406	1
 -404	1
 -403	1
 -402	1
 -401	1
 -400	1
 -4	1
 -399	1
 -397	1
 -396	1
 -395	1
 -394	1
 -393	1
 -392	1
 -389	1
 -386	1
 -384	1
 -382	1
 -379	1
 -378	1
 -377	1
 -375	1
 -374	1
 -373	1
 -37	1
 -369	1
 -368	1
 -367	1
 -366	1
 -365	1
 -364	1
 -362	1
 -360	1
 -356	1
 -353	1
 -351	1
 -35	1
 -348	1
 -345	1
 -344	1
 -342	1
 -341	1
 -34	1
 -339	1
 -338	1
 -336	1
 -335	1
 -333	1
 -332	1
 -331	1
 -33	1
 -327	1
 -325	1
 -323	1
 -322	1
 -321	1
 -318	1
 -317	1
 -316	1
 -315	1
 -311	1
 -310	1
 -309	1
 -308	1
 -307	1
 -306	1
 -305	1
 -302	1
 -30	1
 -298	1
 -296	1
 -292	1
 -291	1
 -289	1
 -288	1
 -287	1
 -286	1
 -285	1
 -284	1
 -283	1
 -282	1
 -281	1
 -280	1
 -28	1
 -278	1
 -277	1
 -275	1
 -274	1
 -273	1
 -272	1
 -27	1
 -266	1
 -265	1
 -263	1
 -262	1
 -260	1
 -26	1
 -258	1
 -257	1
 -256	1
 -255	1
 -252	1
 -249	1
 -248	1
 -247	1
 -244	1
 -242	1
 -241	1
 -24	1
 -239	1
 -238	1
 -237	1
 -235	1
 -233	1
 -230	1
 -229	1
 -228	1
 -226	1
 -224	1
 -223	1
 -222	1
 -221	1
 -219	1
 -218	1
 -217	1
 -216	1
 -214	1
 -213	1
 -209	1
 -208	1
 -207	1
 -205	1
 -203	1
 -202	1
 -201	1
 -200	1
 -20	1
 -2	1
 -199	1
 -197	1
 -196	1
 -195	1
 -194	1
 -193	1
 -192	1
 -191	1
 -190	1
 -19	1
 -189	1
 -187	1
 -186	1
 -183	1
 -181	1
 -180	1
 -18	1
 -179	1
 -178	1
 -177	1
 -176	1
 -175	1
 -174	1
 -172	1
 -170	1
 -17	1
 -169	1
 -168	1
 -167	1
 -166	1
 -165	1
 -164	1
 -163	1
 -162	1
 -160	1
 -158	1
 -157	1
 -156	1
 -155	1
 -153	1
 -152	1
 -150	1
 -15	1
 -149	1
 -146	1
 -145	1
 -143	1
 -138	1
 -137	1
 -136	1
 -134	1
 -133	1
 -131	1
 -129	1
 -128	1
 -126	1
 -125	1
 -120	1
 -12	1
 -119	1
 -118	1
 -116	1
 -114	1
 -113	1
 -111	1
 -11	1
 -105	1
 -104	1
 -103	1
 -100	1
  10	1
 -0	1
 +100	1
 +103	1
 +104	1
 +105	1
 +11	1
 +111	1
 +113	1
 +114	1
 +116	1
 +118	1
 +119	1
 +12	1
 +120	1
 +125	1
 +126	1
 +128	1
 +129	1
 +131	1
 +133	1
 +134	1
 +136	1
 +137	1
 +138	1
 +143	1
 +145	1
 +146	1
 +149	1
 +15	1
 +150	1
 +152	1
 +153	1
 +155	1
 +156	1
 +157	1
 +158	1
 +160	1
 +162	1
 +163	1
 +164	1
 +165	1
 +166	1
 +167	1
 +168	1
 +169	1
 +17	1
 +170	1
 +172	1
 +174	1
 +175	1
 +176	1
 +177	1
 +178	1
 +179	1
 +18	1
 +180	1
 +181	1
 +183	1
 +186	1
 +187	1
 +189	1
 +19	1
 +190	1
 +191	1
 +192	1
 +193	1
 +194	1
 +195	1
 +196	1
 +197	1
 +199	1
 +2	1
 +20	1
 +200	1
 +201	1
 +202	1
 +203	1
 +205	1
 +207	1
 +208	1
 +209	1
 +213	1
 +214	1
 +216	1
 +217	1
 +218	1
 +219	1
 +221	1
 +222	1
 +223	1
 +224	1
 +226	1
 +228	1
 +229	1
 +230	1
 +233	1
 +235	1
 +237	1
 +238	1
 +239	1
 +24	1
 +241	1
 +242	1
 +244	1
 +247	1
 +248	1
 +249	1
 +252	1
 +255	1
 +256	1
 +257	1
 +258	1
 +26	1
 +260	1
 +262	1
 +263	1
 +265	1
 +266	1
 +27	1
 +272	1
 +273	1
 +274	1
 +275	1
 +277	1
 +278	1
 +28	1
 +280	1
 +281	1
 +282	1
 +283	1
 +284	1
 +285	1
 +286	1
 +287	1
 +288	1
 +289	1
 +291	1
 +292	1
 +296	1
 +298	1
 +30	1
 +302	1
 +305	1
 +306	1
 +307	1
 +308	1
 +309	1
 +310	1
 +311	1
 +315	1
 +316	1
 +317	1
 +318	1
 +321	1
 +322	1
 +323	1
 +325	1
 +327	1
 +33	1
 +331	1
 +332	1
 +333	1
 +335	1
 +336	1
 +338	1
 +339	1
 +34	1
 +341	1
 +342	1
 +344	1
 +345	1
 +348	1
 +35	1
 +351	1
 +353	1
 +356	1
 +360	1
 +362	1
 +364	1
 +365	1
 +366	1
 +367	1
 +368	1
 +369	1
 +37	1
 +373	1
 +374	1
 +375	1
 +377	1
 +378	1
 +379	1
 +382	1
 +384	1
 +386	1
 +389	1
 +392	1
 +393	1
 +394	1
 +395	1
 +396	1
 +397	1
 +399	1
 +4	1
 +400	1
 +401	1
 +402	1
 +403	1
 +404	1
 +406	1
 +407	1
 +409	1
 +41	1
 +411	1
 +413	1
 +414	1
 +417	1
 +418	1
 +419	1
 +42	1
 +421	1
 +424	1
 +427	1
 +429	1
 +43	1
 +430	1
 +431	1
 +432	1
 +435	1
 +436	1
 +437	1
 +438	1
 +439	1
 +44	1
 +443	1
 +444	1
 +446	1
 +448	1
 +449	1
 +452	1
 +453	1
 +454	1
 +455	1
 +457	1
 +458	1
 +459	1
 +460	1
 +462	1
 +463	1
 +466	1
 +467	1
 +468	1
 +469	1
 +47	1
 +470	1
 +472	1
 +475	1
 +477	1
 +478	1
 +479	1
 +480	1
 +481	1
 +482	1
 +483	1
 +484	1
 +485	1
 +487	1
 +489	1
 +490	1
 +491	1
 +492	1
 +493	1
 +494	1
 +495	1
 +496	1
 +497	1
 +498	1
 +5	1
 +51	1
 +53	1
 +54	1
 +57	1
 +58	1
 +64	1
 +65	1
 +66	1
 +67	1
 +69	1
 +70	1
 +72	1
 +74	1
 +76	1
 +77	1
 +78	1
 +8	1
 +80	1
 +82	1
 +83	1
 +84	1
 +85	1
 +86	1
 +87	1
 +9	1
 +90	1
 +92	1
 +95	1
 +96	1
 +97	1
 +98	1
 +PREHOOK: query: -- same query with broadcast join
 +EXPLAIN SELECT s2.key, count(distinct s2.value) as cnt FROM src s1 join src s2 on (s1.key = s2.key) GROUP BY s2.key ORDER BY cnt,s2.key
 +PREHOOK: type: QUERY
 +POSTHOOK: query: -- same query with broadcast join
 +EXPLAIN SELECT s2.key, count(distinct s2.value) as cnt FROM src s1 join src s2 on (s1.key = s2.key) GROUP BY s2.key ORDER BY cnt,s2.key
 +POSTHOOK: type: QUERY
 +STAGE DEPENDENCIES:
 +  Stage-1 is a root stage
 +  Stage-0 depends on stages: Stage-1
 +
 +STAGE PLANS:
 +  Stage: Stage-1
 +    Tez
 +      Edges:
 +        Map 1 <- Map 4 (BROADCAST_EDGE)
 +        Reducer 2 <- Map 1 (SIMPLE_EDGE)
 +        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
 +#### A masked pattern was here ####
 +      Vertices:
 +        Map 1 
 +            Map Operator Tree:
 +                TableScan
 +                  alias: s1
 +                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
 +                  Filter Operator
 +                    predicate: key is not null (type: boolean)
 +                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
 +                    Select Operator
 +                      expressions: key (type: string), value (type: string)
 +                      outputColumnNames: _col0, _col1
 +                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
 +                      Map Join Operator
 +                        condition map:
 +                             Inner Join 0 to 1
 +                        keys:
 +                          0 _col0 (type: string)
 +                          1 _col0 (type: string)
 +                        outputColumnNames: _col0, _col1
 +                        input vertices:
 +                          1 Map 4
 +                        Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
 +                        HybridGraceHashJoin: true
 +                        Group By Operator
-                           aggregations: count(DISTINCT _col1)
 +                          keys: _col0 (type: string), _col1 (type: string)
 +                          mode: hash
-                           outputColumnNames: _col0, _col1, _col2
++                          outputColumnNames: _col0, _col1
 +                          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
 +                          Reduce Output Operator
 +                            key expressions: _col0 (type: string), _col1 (type: string)
 +                            sort order: ++
 +                            Map-reduce partition columns: _col0 (type: string)
 +                            Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
 +        Map 4 
 +            Map Operator Tree:
 +                TableScan
 +                  alias: s1
 +                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
 +                  Filter Operator
 +                    predicate: key is not null (type: boolean)
 +                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
 +                    Select Operator
 +                      expressions: key (type: string)
 +                      outputColumnNames: _col0
 +                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
 +                      Reduce Output Operator
 +                        key expressions: _col0 (type: string)
 +                        sort order: +
 +                        Map-reduce partition columns: _col0 (type: string)
 +                        Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
 +        Reducer 2 
 +            Reduce Operator Tree:
 +              Group By Operator
-                 aggregations: count(DISTINCT KEY._col1:0._col0)
-                 keys: KEY._col0 (type: string)
++                keys: KEY._col0 (type: string), KEY._col1 (type: string)
 +                mode: mergepartial
 +                outputColumnNames: _col0, _col1
 +                Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
-                 Reduce Output Operator
-                   key expressions: _col1 (type: bigint), _col0 (type: string)
-                   sort order: ++
-                   Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
++                Group By Operator
++                  aggregations: count(_col1)
++                  keys: _col0 (type: string)
++                  mode: complete
++                  outputColumnNames: _col0, _col1
++                  Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE
++                  Reduce Output Operator
++                    key expressions: _col1 (type: bigint), _col0 (type: string)
++                    sort order: ++
++                    Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE
 +        Reducer 3 
 +            Reduce Operator Tree:
 +              Select Operator
 +                expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: bigint)
 +                outputColumnNames: _col0, _col1
-                 Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
++                Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE
 +                File Output Operator
 +                  compressed: false
-                   Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE
++                  Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE
 +                  table:
 +                      input format: org.apache.hadoop.mapred.TextInputFormat
 +                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
 +                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 +
 +  Stage: Stage-0
 +    Fetch Operator
 +      limit: -1
 +      Processor Tree:
 +        ListSink
 +
 +PREHOOK: query: SELECT s2.key, count(distinct s2.value) as cnt FROM src s1 join src s2 on (s1.key = s2.key) GROUP BY s2.key ORDER BY cnt,s2.key
 +PREHOOK: type: QUERY
 +PREHOOK: Input: default@src
 +#### A masked pattern was here ####
 +POSTHOOK: query: SELECT s2.key, count(distinct s2.value) as cnt FROM src s1 join src s2 on (s1.key = s2.key) GROUP BY s2.key ORDER BY cnt,s2.key
 +POSTHOOK: type: QUERY
 +POSTHOOK: Input: default@src
 +#### A masked pattern was here ####
 +0	1
 +10	1
 +100	1
 +103	1
 +104	1
 +105	1
 +11	1
 +111	1
 +113	1
 +114	1
 +116	1
 +118	1
 +119	1
 +12	1
 +120	1
 +125	1
 +126	1
 +128	1
 +129	1
 +131	1
 +133	1
 +134	1
 +136	1
 +137	1
 +138	1
 +143	1
 +145	1
 +146	1
 +149	1
 +15	1
 +150	1
 +152	1
 +153	1
 +155	1
 +156	1
 +157	1
 +158	1
 +160	1
 +162	1
 +163	1
 +164	1
 +165	1
 +166	1
 +167	1
 +168	1
 +169	1
 +17	1
 +170	1
 +172	1
 +174	1
 +175	1
 +176	1
 +177	1
 +178	1
 +179	1
 +18	1
 +180	1
 +181	1
 +183	1
 +186	1
 +187	1
 +189	1
 +19	1
 +190	1
 +191	1
 +192	1
 +193	1
 +194	1
 +195	1
 +196	1
 +197	1
 +199	1
 +2	1
 +20	1
 +200	1
 +201	1
 +202	1
 +203	1
 +205	1
 +207	1
 +208	1
 +209	1
 +213	1
 +214	1
 +216	1
 +217	1
 +218	1
 +219	1
 +221	1
 +222	1
 +223	1
 +224	1
 +226	1
 +228	1
 +229	1
 +230	1
 +233	1
 +235	1
 +237	1
 +238	1
 +239	1
 +24	1
 +241	1
 +242	1
 +244	1
 +247	1
 +248	1
 +249	1
 +252	1
 +255	1
 +256	1
 +257	1
 +258	1
 +26	1
 +260	1
 +262	1
 +263	1
 +265	1
 +266	1
 +27	1
 +272	1
 +273	1
 +274	1
 +275	1
 +277	1
 +278	1
 +28	1
 +280	1
 +281	1
 +282	1
 +283	1
 +284	1
 +285	1
 +286	1
 +287	1
 +288	1
 +289	1
 +291	1
 +292	1
 +296	1
 +298	1
 +30	1
 +302	1
 +305	1
 +306	1
 +307	1
 +308	1
 +309	1
 +310	1
 +311	1
 +315	1
 +316	1
 +317	1
 +318	1
 +321	1
 +322	1
 +323	1
 +325	1
 +327	1
 +33	1
 +331	1
 +332	1
 +333	1
 +335	1
 +336	1
 +338	1
 +339	1
 +34	1
 +341	1
 +342	1
 +344	1
 +345	1
 +348	1
 +35	1
 +351	1
 +353	1
 +356	1
 +360	1
 +362	1
 +364	1
 +365	1
 +366	1
 +367	1
 +368	1
 +369	1
 +37	1
 +373	1
 +374	1
 +375	1
 +377	1
 +378	1
 +379	1
 +382	1
 +384	1
 +386	1
 +389	1
 +392	1
 +393	1
 +394	1
 +395	1
 +396	1
 +397	1
 +399	1
 +4	1
 +400	1
 +401	1
 +402	1
 +403	1
 +404	1
 +406	1
 +407	1
 +409	1
 +41	1
 +411	1
 +413	1
 +414	1
 +417	1
 +418	1
 +419	1
 +42	1
 +421	1
 +424	1
 +427	1
 +429	1
 +43	1
 +430	1
 +431	1
 +432	1
 +435	1
 +436	1
 +437	1
 +438	1
 +439	1
 +44	1
 +443	1
 +444	1
 +446	1
 +448	1
 +449	1
 +452	1
 +453	1
 +454	1
 +455	1
 +457	1
 +458	1
 +459	1
 +460	1
 +462	1
 +463	1
 +466	1
 +467	1
 +468	1
 +469	1
 +47	1
 +470	1
 +472	1
 +475	1
 +477	1
 +478	1
 +479	1
 +480	1
 +481	1
 +482	1
 +483	1
 +484	1
 +485	1
 +487	1
 +489	1
 +490	1
 +491	1
 +492	1
 +493	1
 +494	1
 +495	1
 +496	1
 +497	1
 +498	1
 +5	1
 +51	1
 +53	1
 +54	1
 +57	1
 +58	1
 +64	1
 +65	1
 +66	1
 +67	1
 +69	1
 +70	1
 +72	1
 +74	1
 +76	1
 +77	1
 +78	1
 +8	1
 +80	1
 +82	1
 +83	1
 +84	1
 +85	1
 +86	1
 +87	1
 +9	1
 +90	1
 +92	1
 +95	1
 +96	1
 +97	1
 +98	1
  PREHOOK: query: -- query with multiple branches in the task dag
  EXPLAIN
  SELECT * 

http://git-wip-us.apache.org/repos/asf/hive/blob/e6b1556e/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/hive/blob/e6b1556e/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShims.java
----------------------------------------------------------------------