You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2014/08/27 19:00:58 UTC
svn commit: r1620938 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/exec/
java/org/apache/hadoop/hive/ql/exec/vector/ test/results/clientpositive/tez/
Author: prasanthj
Date: Wed Aug 27 17:00:57 2014
New Revision: 1620938
URL: http://svn.apache.org/r1620938
Log:
HIVE-7887: VectorFileSinkOp does not publish the stats correctly (Prasanth J, reviewed by Gunther Hagleitner)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java
hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_part_project.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/vectorized_timestamp_funcs.q.out
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java?rev=1620938&r1=1620937&r2=1620938&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java Wed Aug 27 17:00:57 2014
@@ -92,8 +92,8 @@ public class FileSinkOperator extends Te
protected transient ListBucketingCtx lbCtx;
protected transient boolean isSkewedStoredAsSubDirectories;
protected transient boolean statsCollectRawDataSize;
- private transient boolean[] statsFromRecordWriter;
- private transient boolean isCollectRWStats;
+ protected transient boolean[] statsFromRecordWriter;
+ protected transient boolean isCollectRWStats;
private transient FSPaths prevFsp;
private transient FSPaths fpaths;
private transient ObjectInspector keyOI;
@@ -626,7 +626,7 @@ public class FileSinkOperator extends Te
}
}
- private boolean areAllTrue(boolean[] statsFromRW) {
+ protected boolean areAllTrue(boolean[] statsFromRW) {
for(boolean b : statsFromRW) {
if (!b) {
return false;
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java?rev=1620938&r1=1620937&r2=1620938&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFileSinkOperator.java Wed Aug 27 17:00:57 2014
@@ -145,7 +145,11 @@ public class VectorFileSinkOperator exte
}
rowOutWriters = fpaths.getOutWriters();
- if (conf.isGatherStats()) {
+ // check if all record writers implement statistics. if atleast one RW
+ // doesn't implement stats interface we will fallback to conventional way
+ // of gathering stats
+ isCollectRWStats = areAllTrue(statsFromRecordWriter);
+ if (conf.isGatherStats() && !isCollectRWStats) {
if (statsCollectRawDataSize) {
SerDeStats stats = serializer.getSerDeStats();
if (stats != null) {
Modified: hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_part_project.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_part_project.q.out?rev=1620938&r1=1620937&r2=1620938&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_part_project.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_part_project.q.out Wed Aug 27 17:00:57 2014
@@ -65,28 +65,28 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesorc_part
- Statistics: Num rows: 200 Data size: 4068 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 200 Data size: 41576 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: (cdouble + 2) (type: double)
outputColumnNames: _col0
- Statistics: Num rows: 200 Data size: 4068 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 200 Data size: 41576 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: double)
sort order: +
- Statistics: Num rows: 200 Data size: 4068 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 200 Data size: 41576 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
Reducer 2
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: double)
outputColumnNames: _col0
- Statistics: Num rows: 200 Data size: 4068 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 200 Data size: 41576 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 10
- Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 2070 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 2070 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Modified: hive/trunk/ql/src/test/results/clientpositive/tez/vectorized_timestamp_funcs.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/vectorized_timestamp_funcs.q.out?rev=1620938&r1=1620937&r2=1620938&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/vectorized_timestamp_funcs.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/vectorized_timestamp_funcs.q.out Wed Aug 27 17:00:57 2014
@@ -106,15 +106,15 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesorc_string
- Statistics: Num rows: 40 Data size: 622 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: to_unix_timestamp(ctimestamp1) (type: bigint), year(ctimestamp1) (type: int), month(ctimestamp1) (type: int), day(ctimestamp1) (type: int), dayofmonth(ctimestamp1) (type: int), weekofyear(ctimestamp1) (type: int), hour(ctimestamp1) (type: int), minute(ctimestamp1) (type: int), second(ctimestamp1) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 40 Data size: 622 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: bigint)
sort order: +
- Statistics: Num rows: 40 Data size: 622 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int)
Execution mode: vectorized
Reducer 2
@@ -122,10 +122,10 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 40 Data size: 622 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 40 Data size: 622 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -249,15 +249,15 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesorc_string
- Statistics: Num rows: 40 Data size: 622 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 40 Data size: 622 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: bigint)
sort order: +
- Statistics: Num rows: 40 Data size: 622 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int)
Execution mode: vectorized
Reducer 2
@@ -265,10 +265,10 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 40 Data size: 622 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 40 Data size: 622 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -392,15 +392,15 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesorc_string
- Statistics: Num rows: 40 Data size: 622 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: (to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1)) (type: boolean), (year(ctimestamp1) = year(stimestamp1)) (type: boolean), (month(ctimestamp1) = month(stimestamp1)) (type: boolean), (day(ctimestamp1) = day(stimestamp1)) (type: boolean), (dayofmonth(ctimestamp1) = dayofmonth(stimestamp1)) (type: boolean), (weekofyear(ctimestamp1) = weekofyear(stimestamp1)) (type: boolean), (hour(ctimestamp1) = hour(stimestamp1)) (type: boolean), (minute(ctimestamp1) = minute(stimestamp1)) (type: boolean), (second(ctimestamp1) = second(stimestamp1)) (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 40 Data size: 622 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: boolean)
sort order: +
- Statistics: Num rows: 40 Data size: 622 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean)
Execution mode: vectorized
Reducer 2
@@ -408,10 +408,10 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 40 Data size: 622 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 40 Data size: 622 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -539,15 +539,15 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesorc_wrong
- Statistics: Num rows: 1 Data size: 698 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 698 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: bigint)
sort order: +
- Statistics: Num rows: 1 Data size: 698 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int)
Execution mode: vectorized
Reducer 2
@@ -555,10 +555,10 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 698 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 698 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat