You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/10/01 00:28:12 UTC

svn commit: r1628570 - in /hive/branches/spark: itests/src/test/resources/ ql/src/java/org/apache/hadoop/hive/ql/exec/spark/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/ ql/src/test/results/clientpositive/tez/

Author: xuefu
Date: Tue Sep 30 22:28:12 2014
New Revision: 1628570

URL: http://svn.apache.org/r1628570
Log:
HIVE-8180: Update SparkReduceRecordHandler for processing the vectors [spark branch] (Chinna via Xuefu)

Modified:
    hive/branches/spark/itests/src/test/resources/testconfiguration.properties
    hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java
    hive/branches/spark/ql/src/test/queries/clientpositive/vector_cast_constant.q
    hive/branches/spark/ql/src/test/results/clientpositive/tez/vector_cast_constant.q.out
    hive/branches/spark/ql/src/test/results/clientpositive/vector_cast_constant.q.out

Modified: hive/branches/spark/itests/src/test/resources/testconfiguration.properties
URL: http://svn.apache.org/viewvc/hive/branches/spark/itests/src/test/resources/testconfiguration.properties?rev=1628570&r1=1628569&r2=1628570&view=diff
==============================================================================
--- hive/branches/spark/itests/src/test/resources/testconfiguration.properties (original)
+++ hive/branches/spark/itests/src/test/resources/testconfiguration.properties Tue Sep 30 22:28:12 2014
@@ -551,4 +551,7 @@ spark.query.files=add_part_multiple.q \
   load_dyn_part14.q \
   load_dyn_part15.q \
   vectorization_13.q \
-  vectorized_shufflejoin.q
+  vectorized_shufflejoin.q \
+  vector_cast_constant.q \
+  vectorization_9.q \
+  vectorization_12.q

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java?rev=1628570&r1=1628569&r2=1628570&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java Tue Sep 30 22:28:12 2014
@@ -31,6 +31,10 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.ql.exec.*;
 import org.apache.hadoop.hive.ql.exec.mr.ExecMapper.ReportStats;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.ReduceWork;
@@ -40,7 +44,10 @@ import org.apache.hadoop.hive.serde2.Ser
 import org.apache.hadoop.hive.serde2.SerDeUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.DataOutputBuffer;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.OutputCollector;
@@ -78,11 +85,22 @@ public class SparkReduceRecordHandler ex
   private TableDesc keyTableDesc;
   private TableDesc[] valueTableDesc;
   private ObjectInspector[] rowObjectInspector;
+  private boolean vectorized = false;
 
   // runtime objects
   private transient Object keyObject;
   private transient BytesWritable groupKey;
 
+  private DataOutputBuffer buffer;
+  private VectorizedRowBatch[] batches;
+  // number of columns pertaining to keys in a vectorized row batch
+  private int keysColumnOffset;
+  private final int BATCH_SIZE = VectorizedRowBatch.DEFAULT_SIZE;
+  private StructObjectInspector keyStructInspector;
+  private StructObjectInspector[] valueStructInspectors;
+  /* this is only used in the error code path */
+  private List<VectorExpressionWriter>[] valueStringWriters;
+
   public void init(JobConf job, OutputCollector output, Reporter reporter) {
     super.init(job, output, reporter);
 
@@ -100,6 +118,7 @@ public class SparkReduceRecordHandler ex
     }
 
     reducer = gWork.getReducer();
+    vectorized = gWork.getVectorMode();
     reducer.setParentOperators(null); // clear out any parents as reducer is the
     // root
     isTagged = gWork.getNeedsTagging();
@@ -110,22 +129,67 @@ public class SparkReduceRecordHandler ex
       SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null);
       keyObjectInspector = inputKeyDeserializer.getObjectInspector();
       valueTableDesc = new TableDesc[gWork.getTagToValueDesc().size()];
+
+      if (vectorized) {
+	final int maxTags = gWork.getTagToValueDesc().size();
+	keyStructInspector = (StructObjectInspector) keyObjectInspector;
+	batches = new VectorizedRowBatch[maxTags];
+	valueStructInspectors = new StructObjectInspector[maxTags];
+	valueStringWriters = (List<VectorExpressionWriter>[]) new List[maxTags];
+	keysColumnOffset = keyStructInspector.getAllStructFieldRefs().size();
+	buffer = new DataOutputBuffer();
+      }
+
       for (int tag = 0; tag < gWork.getTagToValueDesc().size(); tag++) {
-        // We should initialize the SerDe with the TypeInfo when available.
-        valueTableDesc[tag] = gWork.getTagToValueDesc().get(tag);
-        inputValueDeserializer[tag] = ReflectionUtils.newInstance(
-          valueTableDesc[tag].getDeserializerClass(), null);
-        SerDeUtils.initializeSerDe(inputValueDeserializer[tag], null,
-          valueTableDesc[tag].getProperties(), null);
-        valueObjectInspector[tag] = inputValueDeserializer[tag]
-          .getObjectInspector();
-
-        ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
-        ois.add(keyObjectInspector);
-        ois.add(valueObjectInspector[tag]);
-        reducer.setGroupKeyObjectInspector(keyObjectInspector);
-        rowObjectInspector[tag] = ObjectInspectorFactory
-          .getStandardStructObjectInspector(Utilities.reduceFieldNameList, ois);
+	// We should initialize the SerDe with the TypeInfo when available.
+	valueTableDesc[tag] = gWork.getTagToValueDesc().get(tag);
+	inputValueDeserializer[tag] = ReflectionUtils.newInstance(
+	    valueTableDesc[tag].getDeserializerClass(), null);
+	SerDeUtils.initializeSerDe(inputValueDeserializer[tag], null,
+	    valueTableDesc[tag].getProperties(), null);
+	valueObjectInspector[tag] = inputValueDeserializer[tag].getObjectInspector();
+
+	ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
+
+	if (vectorized) {
+	  /* vectorization only works with struct object inspectors */
+	  valueStructInspectors[tag] = (StructObjectInspector) valueObjectInspector[tag];
+
+	  batches[tag] = VectorizedBatchUtil.constructVectorizedRowBatch(keyStructInspector,
+	      valueStructInspectors[tag]);
+	  final int totalColumns = keysColumnOffset
+	      + valueStructInspectors[tag].getAllStructFieldRefs().size();
+	  valueStringWriters[tag] = new ArrayList<VectorExpressionWriter>(totalColumns);
+	  valueStringWriters[tag].addAll(Arrays.asList(VectorExpressionWriterFactory
+	      .genVectorStructExpressionWritables(keyStructInspector)));
+	  valueStringWriters[tag].addAll(Arrays.asList(VectorExpressionWriterFactory
+	      .genVectorStructExpressionWritables(valueStructInspectors[tag])));
+
+	  /*
+	   * The row object inspector used by ReduceWork needs to be a
+	   * **standard** struct object inspector, not just any struct object
+	   * inspector.
+	   */
+	  ArrayList<String> colNames = new ArrayList<String>();
+	  List<? extends StructField> fields = keyStructInspector.getAllStructFieldRefs();
+	  for (StructField field : fields) {
+	    colNames.add(Utilities.ReduceField.KEY.toString() + "." + field.getFieldName());
+	    ois.add(field.getFieldObjectInspector());
+	  }
+	  fields = valueStructInspectors[tag].getAllStructFieldRefs();
+	  for (StructField field : fields) {
+	    colNames.add(Utilities.ReduceField.VALUE.toString() + "." + field.getFieldName());
+	    ois.add(field.getFieldObjectInspector());
+	  }
+	  rowObjectInspector[tag] = ObjectInspectorFactory.getStandardStructObjectInspector(
+	      colNames, ois);
+	} else {
+	  ois.add(keyObjectInspector);
+	  ois.add(valueObjectInspector[tag]);
+	  reducer.setGroupKeyObjectInspector(keyObjectInspector);
+	  rowObjectInspector[tag] = ObjectInspectorFactory.getStandardStructObjectInspector(
+	      Utilities.reduceFieldNameList, ois);
+	}
       }
     } catch (Exception e) {
       throw new RuntimeException(e);
@@ -197,7 +261,32 @@ public class SparkReduceRecordHandler ex
         reducer.setGroupKeyObject(keyObject);
         reducer.startGroup();
       }
-      // System.err.print(keyObject.toString());
+      /* this.keyObject passed via reference */
+      if (vectorized) {
+	processVectors(values, tag);
+      } else {
+	processKeyValues(values, tag);
+      }
+
+    } catch (Throwable e) {
+      abort = true;
+      if (e instanceof OutOfMemoryError) {
+	// Don't create a new object if we are already out of memory
+	throw (OutOfMemoryError) e;
+      } else {
+	LOG.fatal(StringUtils.stringifyException(e));
+	throw new RuntimeException(e);
+      }
+    }
+  }
+
+  /**
+   * @param values
+   * @return true if it is not done and can take more inputs
+   */
+  private boolean processKeyValues(Iterator values, byte tag) throws HiveException {
+
+	// System.err.print(keyObject.toString());
       while (values.hasNext()) {
         BytesWritable valueWritable = (BytesWritable) values.next();
         // System.err.print(who.getHo().toString());
@@ -233,15 +322,73 @@ public class SparkReduceRecordHandler ex
         }
       }
 
-    } catch (Throwable e) {
-      abort = true;
-      if (e instanceof OutOfMemoryError) {
-        // Don't create a new object if we are already out of memory
-        throw (OutOfMemoryError) e;
-      } else {
-        LOG.fatal(StringUtils.stringifyException(e));
-        throw new RuntimeException(e);
+    return true; // give me more
+  }
+
+  /**
+   * @param values
+   * @return true if it is not done and can take more inputs
+   */
+  private boolean processVectors(Iterator values, byte tag) throws HiveException {
+    VectorizedRowBatch batch = batches[tag];
+    batch.reset();
+
+    /* deserialize key into columns */
+    VectorizedBatchUtil.addRowToBatchFrom(keyObject, keyStructInspector, 0, 0, batch, buffer);
+    for (int i = 0; i < keysColumnOffset; i++) {
+      VectorizedBatchUtil.setRepeatingColumn(batch, i);
+    }
+
+    int rowIdx = 0;
+    try {
+      while (values.hasNext()) {
+	/* deserialize value into columns */
+	BytesWritable valueWritable = (BytesWritable) values.next();
+	Object valueObj = deserializeValue(valueWritable, tag);
+
+	VectorizedBatchUtil.addRowToBatchFrom(valueObj, valueStructInspectors[tag], rowIdx,
+	    keysColumnOffset, batch, buffer);
+	rowIdx++;
+	if (rowIdx >= BATCH_SIZE) {
+	  VectorizedBatchUtil.setBatchSize(batch, rowIdx);
+	  reducer.processOp(batch, tag);
+	  rowIdx = 0;
+	  if (isLogInfoEnabled) {
+	    logMemoryInfo();
+	  }
+	}
       }
+      if (rowIdx > 0) {
+	VectorizedBatchUtil.setBatchSize(batch, rowIdx);
+	reducer.processOp(batch, tag);
+      }
+      if (isLogInfoEnabled) {
+	logMemoryInfo();
+      }
+    } catch (Exception e) {
+      String rowString = null;
+      try {
+	/* batch.toString depends on this */
+	batch.setValueWriters(valueStringWriters[tag].toArray(new VectorExpressionWriter[0]));
+	rowString = batch.toString();
+      } catch (Exception e2) {
+	rowString = "[Error getting row data with exception " + StringUtils.stringifyException(e2)
+	    + " ]";
+      }
+      throw new HiveException("Hive Runtime Error while processing vector batch (tag=" + tag + ") "
+	  + rowString, e);
+    }
+    return true; // give me more
+  }
+
+  private Object deserializeValue(BytesWritable valueWritable, byte tag) throws HiveException {
+    try {
+      return inputValueDeserializer[tag].deserialize(valueWritable);
+    } catch (SerDeException e) {
+      throw new HiveException("Hive Runtime Error: Unable to deserialize reduce input value (tag="
+	  + tag + ") from "
+	  + Utilities.formatBinaryString(valueWritable.getBytes(), 0, valueWritable.getLength())
+	  + " with properties " + valueTableDesc[tag].getProperties(), e);
     }
   }
 

Modified: hive/branches/spark/ql/src/test/queries/clientpositive/vector_cast_constant.q
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/queries/clientpositive/vector_cast_constant.q?rev=1628570&r1=1628569&r2=1628570&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/queries/clientpositive/vector_cast_constant.q (original)
+++ hive/branches/spark/ql/src/test/queries/clientpositive/vector_cast_constant.q Tue Sep 30 22:28:12 2014
@@ -40,11 +40,11 @@ EXPLAIN SELECT 
   AVG(CAST(50 AS INT)) AS `avg_int_ok`,
   AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`,
   AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok`
-  FROM over1korc GROUP BY i LIMIT 10;
+  FROM over1korc GROUP BY i ORDER BY i LIMIT 10;
 
 SELECT 
   i,
   AVG(CAST(50 AS INT)) AS `avg_int_ok`,
   AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`,
   AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok`
-  FROM over1korc GROUP BY i LIMIT 10;
+  FROM over1korc GROUP BY i ORDER BY i LIMIT 10;

Modified: hive/branches/spark/ql/src/test/results/clientpositive/tez/vector_cast_constant.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/tez/vector_cast_constant.q.out?rev=1628570&r1=1628569&r2=1628570&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/tez/vector_cast_constant.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/tez/vector_cast_constant.q.out Tue Sep 30 22:28:12 2014
@@ -102,14 +102,14 @@ PREHOOK: query: EXPLAIN SELECT 
   AVG(CAST(50 AS INT)) AS `avg_int_ok`,
   AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`,
   AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok`
-  FROM over1korc GROUP BY i LIMIT 10
+  FROM over1korc GROUP BY i ORDER BY i LIMIT 10
 PREHOOK: type: QUERY
 POSTHOOK: query: EXPLAIN SELECT 
   i,
   AVG(CAST(50 AS INT)) AS `avg_int_ok`,
   AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`,
   AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok`
-  FROM over1korc GROUP BY i LIMIT 10
+  FROM over1korc GROUP BY i ORDER BY i LIMIT 10
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -120,6 +120,7 @@ STAGE PLANS:
     Tez
       Edges:
         Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -156,16 +157,28 @@ STAGE PLANS:
                   expressions: _col0 (type: int), _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4))
                   outputColumnNames: _col0, _col1, _col2, _col3
                   Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE
-                  Limit
-                    Number of rows: 10
+                  Reduce Output Operator
+                    key expressions: _col0 (type: int)
+                    sort order: +
+                    Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4))
+        Reducer 3 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4))
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE
+                Limit
+                  Number of rows: 10
+                  Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
                     Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE
-                      table:
-                          input format: org.apache.hadoop.mapred.TextInputFormat
-                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
 
   Stage: Stage-0
     Fetch Operator
@@ -178,7 +191,7 @@ PREHOOK: query: SELECT 
   AVG(CAST(50 AS INT)) AS `avg_int_ok`,
   AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`,
   AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok`
-  FROM over1korc GROUP BY i LIMIT 10
+  FROM over1korc GROUP BY i ORDER BY i LIMIT 10
 PREHOOK: type: QUERY
 PREHOOK: Input: default@over1korc
 #### A masked pattern was here ####
@@ -187,7 +200,7 @@ POSTHOOK: query: SELECT 
   AVG(CAST(50 AS INT)) AS `avg_int_ok`,
   AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`,
   AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok`
-  FROM over1korc GROUP BY i LIMIT 10
+  FROM over1korc GROUP BY i ORDER BY i LIMIT 10
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@over1korc
 #### A masked pattern was here ####

Modified: hive/branches/spark/ql/src/test/results/clientpositive/vector_cast_constant.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/vector_cast_constant.q.out?rev=1628570&r1=1628569&r2=1628570&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/vector_cast_constant.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/vector_cast_constant.q.out Tue Sep 30 22:28:12 2014
@@ -102,18 +102,19 @@ PREHOOK: query: EXPLAIN SELECT 
   AVG(CAST(50 AS INT)) AS `avg_int_ok`,
   AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`,
   AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok`
-  FROM over1korc GROUP BY i LIMIT 10
+  FROM over1korc GROUP BY i ORDER BY i LIMIT 10
 PREHOOK: type: QUERY
 POSTHOOK: query: EXPLAIN SELECT 
   i,
   AVG(CAST(50 AS INT)) AS `avg_int_ok`,
   AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`,
   AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok`
-  FROM over1korc GROUP BY i LIMIT 10
+  FROM over1korc GROUP BY i ORDER BY i LIMIT 10
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
   Stage: Stage-1
@@ -150,16 +151,37 @@ STAGE PLANS:
             expressions: _col0 (type: int), _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4))
             outputColumnNames: _col0, _col1, _col2, _col3
             Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE
-            Limit
-              Number of rows: 10
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: int)
+              sort order: +
+              Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4))
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4))
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE
+          Limit
+            Number of rows: 10
+            Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
               Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE
-              File Output Operator
-                compressed: false
-                Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE
-                table:
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator
@@ -172,7 +194,7 @@ PREHOOK: query: SELECT 
   AVG(CAST(50 AS INT)) AS `avg_int_ok`,
   AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`,
   AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok`
-  FROM over1korc GROUP BY i LIMIT 10
+  FROM over1korc GROUP BY i ORDER BY i LIMIT 10
 PREHOOK: type: QUERY
 PREHOOK: Input: default@over1korc
 #### A masked pattern was here ####
@@ -181,7 +203,7 @@ POSTHOOK: query: SELECT 
   AVG(CAST(50 AS INT)) AS `avg_int_ok`,
   AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`,
   AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok`
-  FROM over1korc GROUP BY i LIMIT 10
+  FROM over1korc GROUP BY i ORDER BY i LIMIT 10
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@over1korc
 #### A masked pattern was here ####