You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2013/08/08 09:53:16 UTC

svn commit: r1511610 - in /hive/branches/tez: ./ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ ql/src/java/org/apache/hadoop/hive/ql/parse/ ql/src/test/queries/clientpositive/ ql/src/test/results/clien...

Author: gunther
Date: Thu Aug  8 07:53:15 2013
New Revision: 1511610

URL: http://svn.apache.org/r1511610
Log:
HIVE-5008: Reuse MapRedUtils to generate Map/ReduceWork (Gunther Hagleitner)

Added:
    hive/branches/tez/ql/src/test/queries/clientpositive/mrr.q
    hive/branches/tez/ql/src/test/results/clientpositive/mrr.q.out
Modified:
    hive/branches/tez/build.properties
    hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
    hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
    hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
    hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java

Modified: hive/branches/tez/build.properties
URL: http://svn.apache.org/viewvc/hive/branches/tez/build.properties?rev=1511610&r1=1511609&r2=1511610&view=diff
==============================================================================
--- hive/branches/tez/build.properties (original)
+++ hive/branches/tez/build.properties Thu Aug  8 07:53:15 2013
@@ -29,7 +29,7 @@ javac.args.warnings=
 
 hadoop-0.20.version=0.20.2
 hadoop-0.20S.version=1.1.2
-hadoop-0.23.version=2.0.5-alpha
+hadoop-0.23.version=2.1.0-beta
 hadoop.version=${hadoop-0.20.version}
 hadoop.security.version=${hadoop-0.20S.version}
 # Used to determine which set of Hadoop artifacts we depend on.

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java?rev=1511610&r1=1511609&r2=1511610&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java Thu Aug  8 07:53:15 2013
@@ -238,9 +238,9 @@ public class DagUtils {
   /*
    * Helper function to create Vertex for given ReduceWork.
    */
-  private static Vertex creatVertex(JobConf conf, ReduceWork reduceWork, int seqNo,
+  private static Vertex createVertex(JobConf conf, ReduceWork reduceWork, int seqNo,
       LocalResource appJarLr, List<LocalResource> additionalLr, FileSystem fs,
-      Path mrScratchDir, Context ctx) throws IOException {
+      Path mrScratchDir, Context ctx) throws Exception {
 
     // write out the operator plan
     Path planPath = Utilities.setReduceWork(conf, reduceWork,
@@ -404,16 +404,16 @@ public class DagUtils {
    */
   public static Vertex createVertex(JobConf conf, BaseWork work,
       Path scratchDir, int seqNo, LocalResource appJarLr, List<LocalResource> additionalLr,
-      FileSystem fileSystem, Context ctx) {
+      FileSystem fileSystem, Context ctx) throws Exception {
 
     // simply dispatch the call to the right method for the actual (sub-) type of
     // BaseWork.
     if (work instanceof MapWork) {
-      return createVertex(conf, (MapWork) work, scratchDir, seqNo, appJarLr,
-          additionalLr, fileSystem, ctx);
+      return createVertex(conf, (MapWork) work, seqNo, appJarLr,
+          additionalLr, fileSystem, scratchDir, ctx);
     } else if (work instanceof ReduceWork) {
-      return createVertex(conf, (ReduceWork) work, scratchDir, seqNo, appJarLr,
-          additionalLr, fileSystem, ctx);
+      return createVertex(conf, (ReduceWork) work, seqNo, appJarLr,
+          additionalLr, fileSystem, scratchDir, ctx);
     } else {
       assert false;
       return null;

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java?rev=1511610&r1=1511609&r2=1511610&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java Thu Aug  8 07:53:15 2013
@@ -102,7 +102,7 @@ public class TezTask extends Task<TezWor
 
   private DAG build(JobConf conf, TezWork work, Path scratchDir,
       LocalResource appJarLr, Context ctx)
-      throws IOException {
+      throws Exception {
 
     Map<BaseWork, Vertex> workToVertex = new HashMap<BaseWork, Vertex>();
     Map<BaseWork, JobConf> workToConf = new HashMap<BaseWork, JobConf>();

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java?rev=1511610&r1=1511609&r2=1511610&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java Thu Aug  8 07:53:15 2013
@@ -85,7 +85,7 @@ public final class GenMapRedUtils {
     LOG = LogFactory.getLog("org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils");
   }
 
-  private static boolean needsTagging(ReduceWork rWork) {
+  public static boolean needsTagging(ReduceWork rWork) {
     return rWork != null && (rWork.getReducer().getClass() == JoinOperator.class ||
          rWork.getReducer().getClass() == DemuxOperator.class);
   }
@@ -439,18 +439,38 @@ public final class GenMapRedUtils {
   public static void setTaskPlan(String alias_id,
       Operator<? extends OperatorDesc> topOp, Task<?> task, boolean local,
       GenMRProcContext opProcCtx, PrunedPartitionList pList) throws SemanticException {
-    MapWork plan = ((MapredWork) task.getWork()).getMapWork();
-    ParseContext parseCtx = opProcCtx.getParseCtx();
-    Set<ReadEntity> inputs = opProcCtx.getInputs();
+    setMapWork(((MapredWork) task.getWork()).getMapWork(), opProcCtx.getParseCtx(),
+        opProcCtx.getInputs(), pList, topOp, alias_id, opProcCtx.getConf(), local);
+    opProcCtx.addSeenOp(task, topOp);
+  }
 
+  /**
+   * initialize MapWork
+   *
+   * @param alias_id
+   *          current alias
+   * @param topOp
+   *          the top operator of the stack
+   * @param plan
+   *          map work to initialize
+   * @param local
+   *          whether you need to add to map-reduce or local work
+   * @param pList
+   *          pruned partition list. If it is null it will be computed on-the-fly.
+   * @param inputs
+   *          read entities for the map work
+   * @param conf
+   *          current instance of hive conf
+   */
+  public static void setMapWork(MapWork plan, ParseContext parseCtx, Set<ReadEntity> inputs,
+      PrunedPartitionList partsList, Operator<? extends OperatorDesc> topOp, String alias_id,
+      HiveConf conf, boolean local) throws SemanticException {
     ArrayList<Path> partDir = new ArrayList<Path>();
     ArrayList<PartitionDesc> partDesc = new ArrayList<PartitionDesc>();
 
     Path tblDir = null;
     TableDesc tblDesc = null;
 
-    PrunedPartitionList partsList = pList;
-
     plan.setNameToSplitSample(parseCtx.getNameToSplitSample());
 
     if (partsList == null) {
@@ -458,7 +478,7 @@ public final class GenMapRedUtils {
         partsList = parseCtx.getOpToPartList().get((TableScanOperator) topOp);
         if (partsList == null) {
           partsList = PartitionPruner.prune(parseCtx.getTopToTable().get(topOp),
-              parseCtx.getOpToPartPruner().get(topOp), opProcCtx.getConf(),
+              parseCtx.getOpToPartPruner().get(topOp), conf,
               alias_id, parseCtx.getPrunedPartitions());
           parseCtx.getOpToPartList().put((TableScanOperator) topOp, partsList);
         }
@@ -704,7 +724,6 @@ public final class GenMapRedUtils {
       }
       plan.setMapLocalWork(localPlan);
     }
-    opProcCtx.addSeenOp(task, topOp);
   }
 
   /**

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java?rev=1511610&r1=1511609&r2=1511610&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java Thu Aug  8 07:53:15 2013
@@ -24,9 +24,12 @@ import java.util.Stack;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.lib.NodeProcessor;
 import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils;
 import org.apache.hadoop.hive.ql.plan.BaseWork;
 import org.apache.hadoop.hive.ql.plan.MapWork;
 import org.apache.hadoop.hive.ql.plan.ReduceWork;
@@ -82,7 +85,13 @@ public class GenTezWork implements NodeP
       assert root.getParentOperators().isEmpty();
       LOG.debug("Adding map work for " + root);
       MapWork mapWork = new MapWork();
-      mapWork.getAliasToWork().put("", root);
+
+      // map work starts with table scan operators
+      assert root instanceof TableScanOperator;
+      String alias = ((TableScanOperator)root).getConf().getAlias();
+
+      GenMapRedUtils.setMapWork(mapWork, context.parseContext,
+          context.inputs, null, root, alias, context.conf, false);
       tezWork.add(mapWork);
       work = mapWork;
     } else {
@@ -90,6 +99,16 @@ public class GenTezWork implements NodeP
       LOG.debug("Adding reduce work for " + root);
       ReduceWork reduceWork = new ReduceWork();
       reduceWork.setReducer(root);
+      reduceWork.setNeedsTagging(GenMapRedUtils.needsTagging(reduceWork));
+
+      // All parents should be reduce sinks. We pick the first to choose the
+      // number of reducers. In the join/union case they will all be -1. In
+      // sort/order case where it matters there will be only one parent.
+      assert root.getParentOperators().get(0) instanceof ReduceSinkOperator;
+      ReduceSinkOperator reduceSink
+        = (ReduceSinkOperator)root.getParentOperators().get(0);
+      reduceWork.setNumReduceTasks(reduceSink.getConf().getNumReducers());
+
       tezWork.add(reduceWork);
       tezWork.connect(
           context.preceedingWork,

Added: hive/branches/tez/ql/src/test/queries/clientpositive/mrr.q
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/test/queries/clientpositive/mrr.q?rev=1511610&view=auto
==============================================================================
--- hive/branches/tez/ql/src/test/queries/clientpositive/mrr.q (added)
+++ hive/branches/tez/ql/src/test/queries/clientpositive/mrr.q Thu Aug  8 07:53:15 2013
@@ -0,0 +1,6 @@
+set hive.auto.convert.join=true;
+set hive.optimize.tez=true;
+EXPLAIN EXTENDED SELECT key, count(value) as cnt FROM src GROUP BY key ORDER BY cnt;
+EXPLAIN EXTENDED SELECT s2.key, count(distinct s2.value) as cnt FROM src s1 join src s2 on (s1.key = s2.key) GROUP BY s2.key ORDER BY cnt;
+
+SELECT key, count(value) as cnt FROM src GROUP BY key ORDER BY cnt;
\ No newline at end of file

Added: hive/branches/tez/ql/src/test/results/clientpositive/mrr.q.out
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/test/results/clientpositive/mrr.q.out?rev=1511610&view=auto
==============================================================================
--- hive/branches/tez/ql/src/test/results/clientpositive/mrr.q.out (added)
+++ hive/branches/tez/ql/src/test/results/clientpositive/mrr.q.out Thu Aug  8 07:53:15 2013
@@ -0,0 +1,386 @@
+PREHOOK: query: EXPLAIN EXTENDED SELECT key, count(value) as cnt FROM src GROUP BY key ORDER BY cnt
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN EXTENDED SELECT key, count(value) as cnt FROM src GROUP BY key ORDER BY cnt
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL value)) cnt)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL cnt)))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Alias -> Map Operator Tree:
+        src 
+          TableScan
+            alias: src
+            GatherStats: false
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: value
+                    type: string
+              outputColumnNames: key, value
+              Group By Operator
+                aggregations:
+                      expr: count(value)
+                bucketGroup: false
+                keys:
+                      expr: key
+                      type: string
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions:
+                        expr: _col0
+                        type: string
+                  sort order: +
+                  Map-reduce partition columns:
+                        expr: _col0
+                        type: string
+                  tag: -1
+                  value expressions:
+                        expr: _col1
+                        type: bigint
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            properties:
+              bucket_count -1
+              columns key,value
+              columns.types string:string
+#### A masked pattern was here ####
+              name default.src
+              numFiles 1
+              numPartitions 0
+              numRows 0
+              rawDataSize 0
+              serialization.ddl struct src { string key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 5812
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.types string:string
+#### A masked pattern was here ####
+                name default.src
+                numFiles 1
+                numPartitions 0
+                numRows 0
+                rawDataSize 0
+                serialization.ddl struct src { string key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                totalSize 5812
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.src
+            name: default.src
+      Truncated Path -> Alias:
+        /src [src]
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: bigint
+            outputColumnNames: _col0, _col1
+            Reduce Output Operator
+              key expressions:
+                    expr: _col1
+                    type: bigint
+              sort order: +
+              tag: -1
+              value expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: bigint
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Extract
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                properties:
+                  columns _col0,_col1
+                  columns.types string:bigint
+                  escape.delim \
+                  hive.serialization.extend.nesting.levels true
+                  serialization.format 1
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: EXPLAIN EXTENDED SELECT s2.key, count(distinct s2.value) as cnt FROM src s1 join src s2 on (s1.key = s2.key) GROUP BY s2.key ORDER BY cnt
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN EXTENDED SELECT s2.key, count(distinct s2.value) as cnt FROM src s1 join src s2 on (s1.key = s2.key) GROUP BY s2.key ORDER BY cnt
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) s1) (TOK_TABREF (TOK_TABNAME src) s2) (= (. (TOK_TABLE_OR_COL s1) key) (. (TOK_TABLE_OR_COL s2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) key)) (TOK_SELEXPR (TOK_FUNCTIONDI count (. (TOK_TABLE_OR_COL s2) value)) cnt)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL s2) key)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL cnt)))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Alias -> Map Operator Tree:
+        s2 
+          TableScan
+            alias: s2
+            GatherStats: false
+            Reduce Output Operator
+              key expressions:
+                    expr: key
+                    type: string
+              sort order: +
+              Map-reduce partition columns:
+                    expr: key
+                    type: string
+              tag: 1
+              value expressions:
+                    expr: key
+                    type: string
+                    expr: value
+                    type: string
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            properties:
+              bucket_count -1
+              columns key,value
+              columns.types string:string
+#### A masked pattern was here ####
+              name default.src
+              numFiles 1
+              numPartitions 0
+              numRows 0
+              rawDataSize 0
+              serialization.ddl struct src { string key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 5812
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.types string:string
+#### A masked pattern was here ####
+                name default.src
+                numFiles 1
+                numPartitions 0
+                numRows 0
+                rawDataSize 0
+                serialization.ddl struct src { string key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                totalSize 5812
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.src
+            name: default.src
+      Truncated Path -> Alias:
+        /src [s2]
+      Alias -> Map Operator Tree:
+        s1 
+          TableScan
+            alias: s1
+            GatherStats: false
+            Reduce Output Operator
+              key expressions:
+                    expr: key
+                    type: string
+              sort order: +
+              Map-reduce partition columns:
+                    expr: key
+                    type: string
+              tag: 0
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            properties:
+              bucket_count -1
+              columns key,value
+              columns.types string:string
+#### A masked pattern was here ####
+              name default.src
+              numFiles 1
+              numPartitions 0
+              numRows 0
+              rawDataSize 0
+              serialization.ddl struct src { string key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 5812
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.types string:string
+#### A masked pattern was here ####
+                name default.src
+                numFiles 1
+                numPartitions 0
+                numRows 0
+                rawDataSize 0
+                serialization.ddl struct src { string key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                totalSize 5812
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.src
+            name: default.src
+      Truncated Path -> Alias:
+        /src [s1]
+      Needs Tagging: true
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          condition expressions:
+            0 
+            1 {VALUE._col0} {VALUE._col1}
+          handleSkewJoin: false
+          outputColumnNames: _col4, _col5
+          Select Operator
+            expressions:
+                  expr: _col4
+                  type: string
+                  expr: _col5
+                  type: string
+            outputColumnNames: _col4, _col5
+            Group By Operator
+              aggregations:
+                    expr: count(DISTINCT _col5)
+              bucketGroup: false
+              keys:
+                    expr: _col4
+                    type: string
+                    expr: _col5
+                    type: string
+              mode: hash
+              outputColumnNames: _col0, _col1, _col2
+              Reduce Output Operator
+                key expressions:
+                      expr: _col0
+                      type: string
+                      expr: _col1
+                      type: string
+                sort order: ++
+                Map-reduce partition columns:
+                      expr: _col0
+                      type: string
+                tag: -1
+                value expressions:
+                      expr: _col2
+                      type: bigint
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(DISTINCT KEY._col1:0._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: bigint
+            outputColumnNames: _col0, _col1
+            Reduce Output Operator
+              key expressions:
+                    expr: _col1
+                    type: bigint
+              sort order: +
+              tag: -1
+              value expressions:
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: bigint
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Extract
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                properties:
+                  columns _col0,_col1
+                  columns.types string:bigint
+                  escape.delim \
+                  hive.serialization.extend.nesting.levels true
+                  serialization.format 1
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+