You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2012/12/24 06:35:06 UTC
svn commit: r1425589 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/exec/ java/org/apache/hadoop/hive/ql/plan/
test/queries/clientpositive/ test/results/clientpositive/
Author: namit
Date: Mon Dec 24 05:35:06 2012
New Revision: 1425589
URL: http://svn.apache.org/viewvc?rev=1425589&view=rev
Log:
HIVE-3832 Insert overwrite doesn't create a dir if the skewed column position doesnt match
(Gang Tim Liu via namit)
Added:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/SkewedColumnPositionPair.java
hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_11.q
hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_12.q
hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out
hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ListBucketingCtx.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java?rev=1425589&r1=1425588&r2=1425589&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java Mon Dec 24 05:35:06 2012
@@ -46,6 +46,7 @@ import org.apache.hadoop.hive.ql.plan.Ex
import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
import org.apache.hadoop.hive.ql.plan.ListBucketingCtx;
import org.apache.hadoop.hive.ql.plan.PlanUtils;
+import org.apache.hadoop.hive.ql.plan.SkewedColumnPositionPair;
import org.apache.hadoop.hive.ql.plan.api.OperatorType;
import org.apache.hadoop.hive.ql.stats.StatsPublisher;
import org.apache.hadoop.hive.ql.stats.StatsSetupConst;
@@ -719,8 +720,9 @@ public class FileSinkOperator extends Te
"The row has less number of columns than no. of skewed column.";
skewedValsCandidate = new ArrayList<String>(skewedCols.size());
- for (int index : lbCtx.getRowSkewedIndex()) {
- skewedValsCandidate.add(index, standObjs.get(index).toString());
+ for (SkewedColumnPositionPair posPair : lbCtx.getRowSkewedIndex()) {
+ skewedValsCandidate.add(posPair.getSkewColPosition(),
+ standObjs.get(posPair.getTblColPosition()).toString());
}
/* The row matches skewed column names. */
if (allSkewedVals.contains(skewedValsCandidate)) {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ListBucketingCtx.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ListBucketingCtx.java?rev=1425589&r1=1425588&r2=1425589&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ListBucketingCtx.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ListBucketingCtx.java Mon Dec 24 05:35:06 2012
@@ -40,14 +40,14 @@ public class ListBucketingCtx implements
private List<String> skewedColNames;
private List<List<String>> skewedColValues;
private Map<List<String>, String> lbLocationMap;
- private List<Integer> rowSkewedIndex;
+ private List<SkewedColumnPositionPair> rowSkewedIndex;
private boolean isStoredAsSubDirectories;
private String defaultKey;
private String defaultDirName;
private List<String> skewedValuesDirNames;
public ListBucketingCtx() {
- rowSkewedIndex = new ArrayList<Integer>();
+ rowSkewedIndex = new ArrayList<SkewedColumnPositionPair>();
skewedValuesDirNames = new ArrayList<String>();
}
@@ -109,7 +109,8 @@ public class ListBucketingCtx implements
int index = this.skewedColNames.indexOf(cols.get(i).getInternalName());
if (index > -1) {
hitNo++;
- rowSkewedIndex.add(index);
+ SkewedColumnPositionPair pair = new SkewedColumnPositionPair(i, index);
+ rowSkewedIndex.add(pair);
}
}
assert (hitNo == this.skewedColNames.size()) : "RowSchema doesn't have all skewed columns."
@@ -139,20 +140,6 @@ public class ListBucketingCtx implements
}
/**
- * @return the rowSkewedIndex
- */
- public List<Integer> getRowSkewedIndex() {
- return rowSkewedIndex;
- }
-
- /**
- * @param rowSkewedIndex the rowSkewedIndex to set
- */
- public void setRowSkewedIndex(List<Integer> rowSkewedIndex) {
- this.rowSkewedIndex = rowSkewedIndex;
- }
-
- /**
* @return the isStoredAsSubDirectories
*/
public boolean isStoredAsSubDirectories() {
@@ -235,4 +222,20 @@ public class ListBucketingCtx implements
public void setSkewedValuesDirNames(List<String> skewedValuesDirNames) {
this.skewedValuesDirNames = skewedValuesDirNames;
}
+
+ /**
+ * @return the rowSkewedIndex
+ */
+ public List<SkewedColumnPositionPair> getRowSkewedIndex() {
+ return rowSkewedIndex;
+ }
+
+ /**
+ * @param rowSkewedIndex the rowSkewedIndex to set
+ */
+ public void setRowSkewedIndex(List<SkewedColumnPositionPair> rowSkewedIndex) {
+ this.rowSkewedIndex = rowSkewedIndex;
+ }
}
+
+
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/SkewedColumnPositionPair.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/SkewedColumnPositionPair.java?rev=1425589&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/SkewedColumnPositionPair.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/SkewedColumnPositionPair.java Mon Dec 24 05:35:06 2012
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.plan;
+/**
+ * This class record 2 types of positions for a skewed column:
+ * 1. position in table column list
+ * 2. position in skewed column list
+ * Position starts from 0.
+ * For example, create a table with
+ * create table list_bucketing_static_part (key String, value String)
+ * partitioned by (ds String, hr String)
+ * skewed by (value) on ('val_466','val_287','val_82')
+ * stored as DIRECTORIES
+ * STORED AS RCFILE;
+ *
+ * Skewed column is "value".
+ * 1. It's position in table column is 1.
+ * 2. It's position in skewed column list is 0.
+ *
+ * This information will be used in {@FileSinkOperator} generateListBucketingDirName
+ */
+public class SkewedColumnPositionPair {
+ private int tblColPosition;
+ private int skewColPosition;
+
+ public SkewedColumnPositionPair () {}
+
+ public SkewedColumnPositionPair (int tblColPosition, int skewColPosition) {
+ this.tblColPosition = tblColPosition;
+ this.skewColPosition = skewColPosition;
+ }
+
+ /**
+ * @return the tblColPosition
+ */
+ public int getTblColPosition() {
+ return tblColPosition;
+ }
+
+ /**
+ * @param tblColPosition the tblColPosition to set
+ */
+ public void setTblColPosition(int tblColPosition) {
+ this.tblColPosition = tblColPosition;
+ }
+
+ /**
+ * @return the skewColPosition
+ */
+ public int getSkewColPosition() {
+ return skewColPosition;
+ }
+
+ /**
+ * @param skewColPosition the skewColPosition to set
+ */
+ public void setSkewColPosition(int skewColPosition) {
+ this.skewColPosition = skewColPosition;
+ }
+
+}
Added: hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_11.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_11.q?rev=1425589&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_11.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_11.q Mon Dec 24 05:35:06 2012
@@ -0,0 +1,36 @@
+set hive.mapred.supports.subdirectories=true;
+set mapred.input.dir.recursive=true;
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+
+-- Ensure it works if skewed column is not the first column in the table columns
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+-- list bucketing DML: static partition. multiple skewed columns.
+
+-- create a skewed table
+create table list_bucketing_static_part (key String, value String)
+ partitioned by (ds String, hr String)
+ skewed by (value) on ('val_466','val_287','val_82')
+ stored as DIRECTORIES
+ STORED AS RCFILE;
+
+-- list bucketing DML without merge. use bucketize to generate a few small files.
+explain extended
+insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11')
+select key, value from src;
+
+insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11')
+select key, value from src;
+
+-- check DML result
+show partitions list_bucketing_static_part;
+desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11');
+
+set hive.optimize.listbucketing=true;
+explain extended
+select key, value from list_bucketing_static_part where ds='2008-04-08' and hr='11' and value = "val_466";
+select key, value from list_bucketing_static_part where ds='2008-04-08' and hr='11' and value = "val_466";
+
+drop table list_bucketing_static_part;
Added: hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_12.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_12.q?rev=1425589&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_12.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_12.q Mon Dec 24 05:35:06 2012
@@ -0,0 +1,42 @@
+set hive.mapred.supports.subdirectories=true;
+set mapred.input.dir.recursive=true;
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+
+-- Ensure it works if skewed column is not the first column in the table columns
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+-- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns
+create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string)
+ partitioned by (ds String, hr String)
+ skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82'))
+ stored as DIRECTORIES
+ STORED AS RCFILE;
+
+-- list bucketing DML
+explain extended
+insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11')
+select 1, key, 1, value, 1 from src;
+
+insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11')
+select 1, key, 1, value, 1 from src;
+
+-- check DML result
+show partitions list_bucketing_mul_col;
+desc formatted list_bucketing_mul_col partition (ds='2008-04-08', hr='11');
+
+set hive.optimize.listbucketing=true;
+explain extended
+select * from list_bucketing_mul_col
+where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466";
+select * from list_bucketing_mul_col
+where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466";
+
+explain extended
+select * from list_bucketing_mul_col
+where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382";
+select * from list_bucketing_mul_col
+where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382";
+
+drop table list_bucketing_mul_col;
Added: hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out?rev=1425589&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out Mon Dec 24 05:35:06 2012
@@ -0,0 +1,367 @@
+PREHOOK: query: -- Ensure it works if skewed column is not the first column in the table columns
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+-- list bucketing DML: static partition. multiple skewed columns.
+
+-- create a skewed table
+create table list_bucketing_static_part (key String, value String)
+ partitioned by (ds String, hr String)
+ skewed by (value) on ('val_466','val_287','val_82')
+ stored as DIRECTORIES
+ STORED AS RCFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- Ensure it works if skewed column is not the first column in the table columns
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+-- list bucketing DML: static partition. multiple skewed columns.
+
+-- create a skewed table
+create table list_bucketing_static_part (key String, value String)
+ partitioned by (ds String, hr String)
+ skewed by (value) on ('val_466','val_287','val_82')
+ stored as DIRECTORIES
+ STORED AS RCFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@list_bucketing_static_part
+PREHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files.
+explain extended
+insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11')
+select key, value from src
+PREHOOK: type: QUERY
+POSTHOOK: query: -- list bucketing DML without merge. use bucketize to generate a few small files.
+explain extended
+insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11')
+select key, value from src
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME list_bucketing_static_part) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr '11')))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src
+ TableScan
+ alias: src
+ GatherStats: false
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Static Partition Specification: ds=2008-04-08/hr=11/
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.list_bucketing_static_part
+ partition_columns ds/hr
+ serialization.ddl struct list_bucketing_static_part { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.list_bucketing_static_part
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
+ Needs Tagging: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: src
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src
+ numFiles 1
+ numPartitions 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct src { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src
+ numFiles 1
+ numPartitions 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct src { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.src
+ name: default.src
+ Truncated Path -> Alias:
+ /src [src]
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 2008-04-08
+ hr 11
+ replace: true
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.list_bucketing_static_part
+ partition_columns ds/hr
+ serialization.ddl struct list_bucketing_static_part { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.list_bucketing_static_part
+#### A masked pattern was here ####
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+#### A masked pattern was here ####
+
+
+PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11')
+select key, value from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11
+POSTHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11')
+select key, value from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@list_bucketing_static_part@ds=2008-04-08/hr=11
+POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- check DML result
+show partitions list_bucketing_static_part
+PREHOOK: type: SHOWPARTITIONS
+POSTHOOK: query: -- check DML result
+show partitions list_bucketing_static_part
+POSTHOOK: type: SHOWPARTITIONS
+POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ds=2008-04-08/hr=11
+PREHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: desc formatted list_bucketing_static_part partition (ds='2008-04-08', hr='11')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+ds string None
+hr string None
+
+# Detailed Partition Information
+Partition Value: [2008-04-08, 11]
+Database: default
+Table: list_bucketing_static_part
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 4
+ numRows 500
+ rawDataSize 4812
+ totalSize 5522
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Stored As SubDirectories: Yes
+Skewed Columns: [value]
+Skewed Values: [[val_466], [val_287], [val_82]]
+#### A masked pattern was here ####
+Skewed Value to Truncated Path: {[val_82]=/list_bucketing_static_part/ds=2008-04-08/hr=11/value=val_82, [val_287]=/list_bucketing_static_part/ds=2008-04-08/hr=11/value=val_287, [val_466]=/list_bucketing_static_part/ds=2008-04-08/hr=11/value=val_466}
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: explain extended
+select key, value from list_bucketing_static_part where ds='2008-04-08' and hr='11' and value = "val_466"
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select key, value from list_bucketing_static_part where ds='2008-04-08' and hr='11' and value = "val_466"
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME list_bucketing_static_part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (and (and (= (TOK_TABLE_OR_COL ds) '2008-04-08') (= (TOK_TABLE_OR_COL hr) '11')) (= (TOK_TABLE_OR_COL value) "val_466")))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ list_bucketing_static_part
+ TableScan
+ alias: list_bucketing_static_part
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate:
+ expr: (value = 'val_466')
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types string:string
+ escape.delim \
+ serialization.format 1
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Needs Tagging: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: value=val_466
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 11
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.list_bucketing_static_part
+ numFiles 4
+ numPartitions 1
+ numRows 500
+ partition_columns ds/hr
+ rawDataSize 4812
+ serialization.ddl struct list_bucketing_static_part { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ totalSize 5522
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.list_bucketing_static_part
+ numFiles 4
+ numPartitions 1
+ numRows 500
+ partition_columns ds/hr
+ rawDataSize 4812
+ serialization.ddl struct list_bucketing_static_part { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ totalSize 5522
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.list_bucketing_static_part
+ name: default.list_bucketing_static_part
+ Truncated Path -> Alias:
+ /list_bucketing_static_part/ds=2008-04-08/hr=11/value=val_466 [list_bucketing_static_part]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select key, value from list_bucketing_static_part where ds='2008-04-08' and hr='11' and value = "val_466"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value from list_bucketing_static_part where ds='2008-04-08' and hr='11' and value = "val_466"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@list_bucketing_static_part@ds=2008-04-08/hr=11
+#### A masked pattern was here ####
+POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+466 val_466
+466 val_466
+466 val_466
+PREHOOK: query: drop table list_bucketing_static_part
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@list_bucketing_static_part
+PREHOOK: Output: default@list_bucketing_static_part
+POSTHOOK: query: drop table list_bucketing_static_part
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@list_bucketing_static_part
+POSTHOOK: Output: default@list_bucketing_static_part
+POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_static_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
Added: hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out?rev=1425589&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out Mon Dec 24 05:35:06 2012
@@ -0,0 +1,560 @@
+PREHOOK: query: -- Ensure it works if skewed column is not the first column in the table columns
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+-- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns
+create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string)
+ partitioned by (ds String, hr String)
+ skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82'))
+ stored as DIRECTORIES
+ STORED AS RCFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- Ensure it works if skewed column is not the first column in the table columns
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+-- test where the skewed values are more than 1 say columns no. 2 and 4 in a table with 5 columns
+create table list_bucketing_mul_col (col1 String, col2 String, col3 String, col4 String, col5 string)
+ partitioned by (ds String, hr String)
+ skewed by (col2, col4) on (('466','val_466'),('287','val_287'),('82','val_82'))
+ stored as DIRECTORIES
+ STORED AS RCFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@list_bucketing_mul_col
+PREHOOK: query: -- list bucketing DML
+explain extended
+insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11')
+select 1, key, 1, value, 1 from src
+PREHOOK: type: QUERY
+POSTHOOK: query: -- list bucketing DML
+explain extended
+insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11')
+select 1, key, 1, value, 1 from src
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME list_bucketing_mul_col) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr '11')))) (TOK_SELECT (TOK_SELEXPR 1) (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR 1))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src
+ TableScan
+ alias: src
+ GatherStats: false
+ Select Operator
+ expressions:
+ expr: 1
+ type: int
+ expr: key
+ type: string
+ expr: 1
+ type: int
+ expr: value
+ type: string
+ expr: 1
+ type: int
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Select Operator
+ expressions:
+ expr: UDFToString(_col0)
+ type: string
+ expr: _col1
+ type: string
+ expr: UDFToString(_col2)
+ type: string
+ expr: _col3
+ type: string
+ expr: UDFToString(_col4)
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Static Partition Specification: ds=2008-04-08/hr=11/
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ properties:
+ bucket_count -1
+ columns col1,col2,col3,col4,col5
+ columns.types string:string:string:string:string
+#### A masked pattern was here ####
+ name default.list_bucketing_mul_col
+ partition_columns ds/hr
+ serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.list_bucketing_mul_col
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
+ Needs Tagging: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: src
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src
+ numFiles 1
+ numPartitions 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct src { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.src
+ numFiles 1
+ numPartitions 0
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct src { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.src
+ name: default.src
+ Truncated Path -> Alias:
+ /src [src]
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 2008-04-08
+ hr 11
+ replace: true
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ properties:
+ bucket_count -1
+ columns col1,col2,col3,col4,col5
+ columns.types string:string:string:string:string
+#### A masked pattern was here ####
+ name default.list_bucketing_mul_col
+ partition_columns ds/hr
+ serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.list_bucketing_mul_col
+#### A masked pattern was here ####
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+#### A masked pattern was here ####
+
+
+PREHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11')
+select 1, key, 1, value, 1 from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@list_bucketing_mul_col@ds=2008-04-08/hr=11
+POSTHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11')
+select 1, key, 1, value, 1 from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@list_bucketing_mul_col@ds=2008-04-08/hr=11
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col1 EXPRESSION []
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col2 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col3 EXPRESSION []
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col5 EXPRESSION []
+PREHOOK: query: -- check DML result
+show partitions list_bucketing_mul_col
+PREHOOK: type: SHOWPARTITIONS
+POSTHOOK: query: -- check DML result
+show partitions list_bucketing_mul_col
+POSTHOOK: type: SHOWPARTITIONS
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col1 EXPRESSION []
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col2 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col3 EXPRESSION []
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col5 EXPRESSION []
+ds=2008-04-08/hr=11
+PREHOOK: query: desc formatted list_bucketing_mul_col partition (ds='2008-04-08', hr='11')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: desc formatted list_bucketing_mul_col partition (ds='2008-04-08', hr='11')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col1 EXPRESSION []
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col2 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col3 EXPRESSION []
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col5 EXPRESSION []
+# col_name data_type comment
+
+col1 string None
+col2 string None
+col3 string None
+col4 string None
+col5 string None
+
+# Partition Information
+# col_name data_type comment
+
+ds string None
+hr string None
+
+# Detailed Partition Information
+Partition Value: [2008-04-08, 11]
+Database: default
+Table: list_bucketing_mul_col
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ numFiles 4
+ numRows 500
+ rawDataSize 6312
+ totalSize 7094
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Stored As SubDirectories: Yes
+Skewed Columns: [col2, col4]
+Skewed Values: [[466, val_466], [287, val_287], [82, val_82]]
+#### A masked pattern was here ####
+Skewed Value to Truncated Path: {[82, val_82]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=82/col4=val_82, [466, val_466]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=466/col4=val_466, [287, val_287]=/list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=287/col4=val_287}
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: explain extended
+select * from list_bucketing_mul_col
+where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466"
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select * from list_bucketing_mul_col
+where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466"
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col1 EXPRESSION []
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col2 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col3 EXPRESSION []
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col5 EXPRESSION []
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME list_bucketing_mul_col))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (and (and (= (TOK_TABLE_OR_COL ds) '2008-04-08') (= (TOK_TABLE_OR_COL hr) '11')) (= (TOK_TABLE_OR_COL col2) "466")) (= (TOK_TABLE_OR_COL col4) "val_466")))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ list_bucketing_mul_col
+ TableScan
+ alias: list_bucketing_mul_col
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate:
+ expr: ((col2 = '466') and (col4 = 'val_466'))
+ type: boolean
+ Select Operator
+ expressions:
+ expr: col1
+ type: string
+ expr: col2
+ type: string
+ expr: col3
+ type: string
+ expr: col4
+ type: string
+ expr: col5
+ type: string
+ expr: ds
+ type: string
+ expr: hr
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3,_col4,_col5,_col6
+ columns.types string:string:string:string:string:string:string
+ escape.delim \
+ serialization.format 1
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Needs Tagging: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: col4=val_466
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 11
+ properties:
+ bucket_count -1
+ columns col1,col2,col3,col4,col5
+ columns.types string:string:string:string:string
+#### A masked pattern was here ####
+ name default.list_bucketing_mul_col
+ numFiles 4
+ numPartitions 1
+ numRows 500
+ partition_columns ds/hr
+ rawDataSize 6312
+ serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ totalSize 7094
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ properties:
+ bucket_count -1
+ columns col1,col2,col3,col4,col5
+ columns.types string:string:string:string:string
+#### A masked pattern was here ####
+ name default.list_bucketing_mul_col
+ numFiles 4
+ numPartitions 1
+ numRows 500
+ partition_columns ds/hr
+ rawDataSize 6312
+ serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ totalSize 7094
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.list_bucketing_mul_col
+ name: default.list_bucketing_mul_col
+ Truncated Path -> Alias:
+ /list_bucketing_mul_col/ds=2008-04-08/hr=11/col2=466/col4=val_466 [list_bucketing_mul_col]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select * from list_bucketing_mul_col
+where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=11
+#### A masked pattern was here ####
+POSTHOOK: query: select * from list_bucketing_mul_col
+where ds='2008-04-08' and hr='11' and col2 = "466" and col4 = "val_466"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=11
+#### A masked pattern was here ####
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col1 EXPRESSION []
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col2 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col3 EXPRESSION []
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col5 EXPRESSION []
+1 466 1 val_466 1 2008-04-08 11
+1 466 1 val_466 1 2008-04-08 11
+1 466 1 val_466 1 2008-04-08 11
+PREHOOK: query: explain extended
+select * from list_bucketing_mul_col
+where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382"
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select * from list_bucketing_mul_col
+where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382"
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col1 EXPRESSION []
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col2 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col3 EXPRESSION []
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col5 EXPRESSION []
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME list_bucketing_mul_col))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (and (and (= (TOK_TABLE_OR_COL ds) '2008-04-08') (= (TOK_TABLE_OR_COL hr) '11')) (= (TOK_TABLE_OR_COL col2) "382")) (= (TOK_TABLE_OR_COL col4) "val_382")))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ list_bucketing_mul_col
+ TableScan
+ alias: list_bucketing_mul_col
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate:
+ expr: ((col2 = '382') and (col4 = 'val_382'))
+ type: boolean
+ Select Operator
+ expressions:
+ expr: col1
+ type: string
+ expr: col2
+ type: string
+ expr: col3
+ type: string
+ expr: col4
+ type: string
+ expr: col5
+ type: string
+ expr: ds
+ type: string
+ expr: hr
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3,_col4,_col5,_col6
+ columns.types string:string:string:string:string:string:string
+ escape.delim \
+ serialization.format 1
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Needs Tagging: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 11
+ properties:
+ bucket_count -1
+ columns col1,col2,col3,col4,col5
+ columns.types string:string:string:string:string
+#### A masked pattern was here ####
+ name default.list_bucketing_mul_col
+ numFiles 4
+ numPartitions 1
+ numRows 500
+ partition_columns ds/hr
+ rawDataSize 6312
+ serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ totalSize 7094
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+ properties:
+ bucket_count -1
+ columns col1,col2,col3,col4,col5
+ columns.types string:string:string:string:string
+#### A masked pattern was here ####
+ name default.list_bucketing_mul_col
+ numFiles 4
+ numPartitions 1
+ numRows 500
+ partition_columns ds/hr
+ rawDataSize 6312
+ serialization.ddl struct list_bucketing_mul_col { string col1, string col2, string col3, string col4, string col5}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ totalSize 7094
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ name: default.list_bucketing_mul_col
+ name: default.list_bucketing_mul_col
+ Truncated Path -> Alias:
+ /list_bucketing_mul_col/ds=2008-04-08/hr=11/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [list_bucketing_mul_col]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select * from list_bucketing_mul_col
+where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=11
+#### A masked pattern was here ####
+POSTHOOK: query: select * from list_bucketing_mul_col
+where ds='2008-04-08' and hr='11' and col2 = "382" and col4 = "val_382"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@list_bucketing_mul_col@ds=2008-04-08/hr=11
+#### A masked pattern was here ####
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col1 EXPRESSION []
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col2 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col3 EXPRESSION []
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col5 EXPRESSION []
+1 382 1 val_382 1 2008-04-08 11
+1 382 1 val_382 1 2008-04-08 11
+PREHOOK: query: drop table list_bucketing_mul_col
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@list_bucketing_mul_col
+PREHOOK: Output: default@list_bucketing_mul_col
+POSTHOOK: query: drop table list_bucketing_mul_col
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@list_bucketing_mul_col
+POSTHOOK: Output: default@list_bucketing_mul_col
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col1 EXPRESSION []
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col2 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col3 EXPRESSION []
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: list_bucketing_mul_col PARTITION(ds=2008-04-08,hr=11).col5 EXPRESSION []