You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2019/09/30 18:08:21 UTC
[hive] branch master updated: HIVE-22232: NPE when
hive.order.columnalignment is set to false (Jesus Camacho Rodriguez,
reviewed by Vineet Garg)
This is an automated email from the ASF dual-hosted git repository.
jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new a832b40 HIVE-22232: NPE when hive.order.columnalignment is set to false (Jesus Camacho Rodriguez, reviewed by Vineet Garg)
a832b40 is described below
commit a832b403873d7ffd73dccb90ad92762614be9482
Author: Jesus Camacho Rodriguez <jc...@apache.org>
AuthorDate: Mon Sep 23 22:00:08 2019 -0700
HIVE-22232: NPE when hive.order.columnalignment is set to false (Jesus Camacho Rodriguez, reviewed by Vineet Garg)
Close apache/hive#783
---
.../optimizer/calcite/translator/ASTConverter.java | 31 +-
.../queries/clientpositive/groupby_nocolumnalign.q | 17 +
.../clientpositive/groupby_nocolumnalign.q.out | 464 +++++++++++++++++++++
3 files changed, 502 insertions(+), 10 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
index 8382f25..6c4edeb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
@@ -160,16 +160,20 @@ public class ASTConverter {
}
HiveAggregate hiveAgg = (HiveAggregate) groupBy;
- for (int pos : hiveAgg.getAggregateColumnsOrder()) {
- RexInputRef iRef = new RexInputRef(groupBy.getGroupSet().nth(pos),
- groupBy.getCluster().getTypeFactory().createSqlType(SqlTypeName.ANY));
- b.add(iRef.accept(new RexVisitor(schema, false, root.getCluster().getRexBuilder())));
- }
- for (int pos = 0; pos < groupBy.getGroupCount(); pos++) {
- if (!hiveAgg.getAggregateColumnsOrder().contains(pos)) {
- RexInputRef iRef = new RexInputRef(groupBy.getGroupSet().nth(pos),
- groupBy.getCluster().getTypeFactory().createSqlType(SqlTypeName.ANY));
- b.add(iRef.accept(new RexVisitor(schema, false, root.getCluster().getRexBuilder())));
+ if (hiveAgg.getAggregateColumnsOrder() != null) {
+ // Aggregation columns may have been sorted in specific order
+ for (int pos : hiveAgg.getAggregateColumnsOrder()) {
+ addRefToBuilder(b, groupBy.getGroupSet().nth(pos));
+ }
+ for (int pos = 0; pos < groupBy.getGroupCount(); pos++) {
+ if (!hiveAgg.getAggregateColumnsOrder().contains(pos)) {
+ addRefToBuilder(b, groupBy.getGroupSet().nth(pos));
+ }
+ }
+ } else {
+ // Aggregation columns have not been reordered
+ for (int i : groupBy.getGroupSet()) {
+ addRefToBuilder(b, i);
}
}
@@ -257,6 +261,12 @@ public class ASTConverter {
return hiveAST.getAST();
}
+ private void addRefToBuilder(ASTBuilder b, int i) {
+ RexInputRef iRef = new RexInputRef(i,
+ root.getCluster().getTypeFactory().createSqlType(SqlTypeName.ANY));
+ b.add(iRef.accept(new RexVisitor(schema, false, root.getCluster().getRexBuilder())));
+ }
+
private ASTNode buildUDTFAST(String functionName, List<ASTNode> children) {
ASTNode node = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTION, "TOK_FUNCTION");
node.addChild((ASTNode) ParseDriver.adaptor.create(HiveParser.Identifier, functionName));
@@ -265,6 +275,7 @@ public class ASTConverter {
}
return node;
}
+
private void convertOrderLimitToASTNode(HiveSortLimit order) {
if (order != null) {
HiveSortLimit hiveSortLimit = order;
diff --git a/ql/src/test/queries/clientpositive/groupby_nocolumnalign.q b/ql/src/test/queries/clientpositive/groupby_nocolumnalign.q
new file mode 100644
index 0000000..8138b3c
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/groupby_nocolumnalign.q
@@ -0,0 +1,17 @@
+--! qt:dataset:src
+-- SORT_QUERY_RESULTS
+
+set hive.order.columnalignment=false;
+
+CREATE TABLE DEST2_n31_2(key INT, val1 STRING, val2 STRING) STORED AS TEXTFILE;
+
+EXPLAIN
+FROM SRC
+INSERT OVERWRITE TABLE DEST2_n31_2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value;
+
+FROM SRC
+INSERT OVERWRITE TABLE DEST2_n31_2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value;
+
+SELECT DEST2_n31_2.* FROM DEST2_n31_2;
+
+DROP TABLE DEST2_n31_2;
diff --git a/ql/src/test/results/clientpositive/groupby_nocolumnalign.q.out b/ql/src/test/results/clientpositive/groupby_nocolumnalign.q.out
new file mode 100644
index 0000000..eefc9b1
--- /dev/null
+++ b/ql/src/test/results/clientpositive/groupby_nocolumnalign.q.out
@@ -0,0 +1,464 @@
+PREHOOK: query: CREATE TABLE DEST2_n31_2(key INT, val1 STRING, val2 STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@DEST2_n31_2
+POSTHOOK: query: CREATE TABLE DEST2_n31_2(key INT, val1 STRING, val2 STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@DEST2_n31_2
+PREHOOK: query: EXPLAIN
+FROM SRC
+INSERT OVERWRITE TABLE DEST2_n31_2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest2_n31_2
+POSTHOOK: query: EXPLAIN
+FROM SRC
+INSERT OVERWRITE TABLE DEST2_n31_2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest2_n31_2
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0, Stage-3
+ Stage-3 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string), substr(value, 5) (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(DISTINCT _col2)
+ keys: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 500 Data size: 185000 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 185000 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(DISTINCT KEY._col2:0._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), CAST( _col2 AS STRING) (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest2_n31_2
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
+ outputColumnNames: key, val1, val2
+ Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll')
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest2_n31_2
+
+ Stage: Stage-2
+ Stats Work
+ Basic Stats Work:
+ Column Stats Desc:
+ Columns: key, val1, val2
+ Column Types: int, string, string
+ Table: default.dest2_n31_2
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+PREHOOK: query: FROM SRC
+INSERT OVERWRITE TABLE DEST2_n31_2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest2_n31_2
+POSTHOOK: query: FROM SRC
+INSERT OVERWRITE TABLE DEST2_n31_2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest2_n31_2
+POSTHOOK: Lineage: dest2_n31_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2_n31_2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2_n31_2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: SELECT DEST2_n31_2.* FROM DEST2_n31_2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest2_n31_2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT DEST2_n31_2.* FROM DEST2_n31_2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest2_n31_2
+#### A masked pattern was here ####
+0 val_0 1
+10 val_10 1
+100 val_100 1
+103 val_103 1
+104 val_104 1
+105 val_105 1
+11 val_11 1
+111 val_111 1
+113 val_113 1
+114 val_114 1
+116 val_116 1
+118 val_118 1
+119 val_119 1
+12 val_12 1
+120 val_120 1
+125 val_125 1
+126 val_126 1
+128 val_128 1
+129 val_129 1
+131 val_131 1
+133 val_133 1
+134 val_134 1
+136 val_136 1
+137 val_137 1
+138 val_138 1
+143 val_143 1
+145 val_145 1
+146 val_146 1
+149 val_149 1
+15 val_15 1
+150 val_150 1
+152 val_152 1
+153 val_153 1
+155 val_155 1
+156 val_156 1
+157 val_157 1
+158 val_158 1
+160 val_160 1
+162 val_162 1
+163 val_163 1
+164 val_164 1
+165 val_165 1
+166 val_166 1
+167 val_167 1
+168 val_168 1
+169 val_169 1
+17 val_17 1
+170 val_170 1
+172 val_172 1
+174 val_174 1
+175 val_175 1
+176 val_176 1
+177 val_177 1
+178 val_178 1
+179 val_179 1
+18 val_18 1
+180 val_180 1
+181 val_181 1
+183 val_183 1
+186 val_186 1
+187 val_187 1
+189 val_189 1
+19 val_19 1
+190 val_190 1
+191 val_191 1
+192 val_192 1
+193 val_193 1
+194 val_194 1
+195 val_195 1
+196 val_196 1
+197 val_197 1
+199 val_199 1
+2 val_2 1
+20 val_20 1
+200 val_200 1
+201 val_201 1
+202 val_202 1
+203 val_203 1
+205 val_205 1
+207 val_207 1
+208 val_208 1
+209 val_209 1
+213 val_213 1
+214 val_214 1
+216 val_216 1
+217 val_217 1
+218 val_218 1
+219 val_219 1
+221 val_221 1
+222 val_222 1
+223 val_223 1
+224 val_224 1
+226 val_226 1
+228 val_228 1
+229 val_229 1
+230 val_230 1
+233 val_233 1
+235 val_235 1
+237 val_237 1
+238 val_238 1
+239 val_239 1
+24 val_24 1
+241 val_241 1
+242 val_242 1
+244 val_244 1
+247 val_247 1
+248 val_248 1
+249 val_249 1
+252 val_252 1
+255 val_255 1
+256 val_256 1
+257 val_257 1
+258 val_258 1
+26 val_26 1
+260 val_260 1
+262 val_262 1
+263 val_263 1
+265 val_265 1
+266 val_266 1
+27 val_27 1
+272 val_272 1
+273 val_273 1
+274 val_274 1
+275 val_275 1
+277 val_277 1
+278 val_278 1
+28 val_28 1
+280 val_280 1
+281 val_281 1
+282 val_282 1
+283 val_283 1
+284 val_284 1
+285 val_285 1
+286 val_286 1
+287 val_287 1
+288 val_288 1
+289 val_289 1
+291 val_291 1
+292 val_292 1
+296 val_296 1
+298 val_298 1
+30 val_30 1
+302 val_302 1
+305 val_305 1
+306 val_306 1
+307 val_307 1
+308 val_308 1
+309 val_309 1
+310 val_310 1
+311 val_311 1
+315 val_315 1
+316 val_316 1
+317 val_317 1
+318 val_318 1
+321 val_321 1
+322 val_322 1
+323 val_323 1
+325 val_325 1
+327 val_327 1
+33 val_33 1
+331 val_331 1
+332 val_332 1
+333 val_333 1
+335 val_335 1
+336 val_336 1
+338 val_338 1
+339 val_339 1
+34 val_34 1
+341 val_341 1
+342 val_342 1
+344 val_344 1
+345 val_345 1
+348 val_348 1
+35 val_35 1
+351 val_351 1
+353 val_353 1
+356 val_356 1
+360 val_360 1
+362 val_362 1
+364 val_364 1
+365 val_365 1
+366 val_366 1
+367 val_367 1
+368 val_368 1
+369 val_369 1
+37 val_37 1
+373 val_373 1
+374 val_374 1
+375 val_375 1
+377 val_377 1
+378 val_378 1
+379 val_379 1
+382 val_382 1
+384 val_384 1
+386 val_386 1
+389 val_389 1
+392 val_392 1
+393 val_393 1
+394 val_394 1
+395 val_395 1
+396 val_396 1
+397 val_397 1
+399 val_399 1
+4 val_4 1
+400 val_400 1
+401 val_401 1
+402 val_402 1
+403 val_403 1
+404 val_404 1
+406 val_406 1
+407 val_407 1
+409 val_409 1
+41 val_41 1
+411 val_411 1
+413 val_413 1
+414 val_414 1
+417 val_417 1
+418 val_418 1
+419 val_419 1
+42 val_42 1
+421 val_421 1
+424 val_424 1
+427 val_427 1
+429 val_429 1
+43 val_43 1
+430 val_430 1
+431 val_431 1
+432 val_432 1
+435 val_435 1
+436 val_436 1
+437 val_437 1
+438 val_438 1
+439 val_439 1
+44 val_44 1
+443 val_443 1
+444 val_444 1
+446 val_446 1
+448 val_448 1
+449 val_449 1
+452 val_452 1
+453 val_453 1
+454 val_454 1
+455 val_455 1
+457 val_457 1
+458 val_458 1
+459 val_459 1
+460 val_460 1
+462 val_462 1
+463 val_463 1
+466 val_466 1
+467 val_467 1
+468 val_468 1
+469 val_469 1
+47 val_47 1
+470 val_470 1
+472 val_472 1
+475 val_475 1
+477 val_477 1
+478 val_478 1
+479 val_479 1
+480 val_480 1
+481 val_481 1
+482 val_482 1
+483 val_483 1
+484 val_484 1
+485 val_485 1
+487 val_487 1
+489 val_489 1
+490 val_490 1
+491 val_491 1
+492 val_492 1
+493 val_493 1
+494 val_494 1
+495 val_495 1
+496 val_496 1
+497 val_497 1
+498 val_498 1
+5 val_5 1
+51 val_51 1
+53 val_53 1
+54 val_54 1
+57 val_57 1
+58 val_58 1
+64 val_64 1
+65 val_65 1
+66 val_66 1
+67 val_67 1
+69 val_69 1
+70 val_70 1
+72 val_72 1
+74 val_74 1
+76 val_76 1
+77 val_77 1
+78 val_78 1
+8 val_8 1
+80 val_80 1
+82 val_82 1
+83 val_83 1
+84 val_84 1
+85 val_85 1
+86 val_86 1
+87 val_87 1
+9 val_9 1
+90 val_90 1
+92 val_92 1
+95 val_95 1
+96 val_96 1
+97 val_97 1
+98 val_98 1
+PREHOOK: query: DROP TABLE DEST2_n31_2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dest2_n31_2
+PREHOOK: Output: default@dest2_n31_2
+POSTHOOK: query: DROP TABLE DEST2_n31_2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@dest2_n31_2
+POSTHOOK: Output: default@dest2_n31_2