You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2012/12/10 13:14:49 UTC
svn commit: r1419365 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/exec/ java/org/apache/hadoop/hive/ql/io/
java/org/apache/hadoop/hive/ql/parse/ java/org/apache/hadoop/hive/ql/plan/
test/queries/clientpositive/ test/results/clientnegative/ t...
Author: namit
Date: Mon Dec 10 12:14:47 2012
New Revision: 1419365
URL: http://svn.apache.org/viewvc?rev=1419365&view=rev
Log:
HIVE-3401 Diversify grammar for split sampling
(Navis via namit)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SplitSample.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
hive/trunk/ql/src/test/queries/clientpositive/split_sample.q
hive/trunk/ql/src/test/results/clientnegative/split_sample_out_of_range.q.out
hive/trunk/ql/src/test/results/clientnegative/split_sample_wrong_format.q.out
hive/trunk/ql/src/test/results/clientpositive/input4.q.out
hive/trunk/ql/src/test/results/clientpositive/nonmr_fetch.q.out
hive/trunk/ql/src/test/results/clientpositive/plan_json.q.out
hive/trunk/ql/src/test/results/clientpositive/split_sample.q.out
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java?rev=1419365&r1=1419364&r2=1419365&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java Mon Dec 10 12:14:47 2012
@@ -299,7 +299,7 @@ public class ExplainTask extends Task<Ex
private static boolean isPrintable(Object val) {
if (val instanceof Boolean || val instanceof String
- || val instanceof Integer || val instanceof Byte
+ || val instanceof Integer || val instanceof Long || val instanceof Byte
|| val instanceof Float || val instanceof Double) {
return true;
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java?rev=1419365&r1=1419364&r2=1419365&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java Mon Dec 10 12:14:47 2012
@@ -440,7 +440,7 @@ public class FetchOperator implements Se
totalSize += split.getLength();
}
List<FetchInputFormatSplit> result = new ArrayList<FetchInputFormatSplit>();
- long targetSize = (long) (totalSize * splitSample.getPercent() / 100D);
+ long targetSize = splitSample.getTargetSize(totalSize);
int startIndex = splitSample.getSeedNum() % splits.length;
long size = 0;
for (int i = 0; i < splits.length; i++) {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java?rev=1419365&r1=1419364&r2=1419365&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java Mon Dec 10 12:14:47 2012
@@ -482,8 +482,8 @@ public class MapRedTask extends ExecDriv
for (String alias : work.getAliasToWork().keySet()) {
if (work.getNameToSplitSample().containsKey(alias)) {
allSample = true;
- double rate = work.getNameToSplitSample().get(alias).getPercent();
- if (rate > highestSamplePercentage) {
+ Double rate = work.getNameToSplitSample().get(alias).getPercent();
+ if (rate != null && rate > highestSamplePercentage) {
highestSamplePercentage = rate;
}
} else {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java?rev=1419365&r1=1419364&r2=1419365&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java Mon Dec 10 12:14:47 2012
@@ -27,7 +27,6 @@ import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.FileUtils;
-import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
@@ -61,6 +60,9 @@ public class TableScanOperator extends O
private transient Stat currentStat;
private transient Map<String, Stat> stats;
+ private transient int rowLimit = -1;
+ private transient int currCount = 0;
+
public TableDesc getTableDesc() {
return tableDesc;
}
@@ -78,6 +80,10 @@ public class TableScanOperator extends O
**/
@Override
public void processOp(Object row, int tag) throws HiveException {
+ if (rowLimit >= 0 && currCount++ >= rowLimit) {
+ setDone(true);
+ return;
+ }
if (conf != null && conf.isGatherStats()) {
gatherStats(row);
}
@@ -170,6 +176,7 @@ public class TableScanOperator extends O
if (conf == null) {
return;
}
+ rowLimit = conf.getRowLimit();
if (!conf.isGatherStats()) {
return;
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java?rev=1419365&r1=1419364&r2=1419365&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java Mon Dec 10 12:14:47 2012
@@ -480,8 +480,10 @@ public class CombineHiveInputFormat<K ex
totalSize += split.getLength();
}
- long targetSize = (long) (totalSize * nameToSamples.get(entry.getKey()).getPercent() / 100D);
- int startIndex = nameToSamples.get(entry.getKey()).getSeedNum() % splitList.size();
+ SplitSample splitSample = nameToSamples.get(entry.getKey());
+
+ long targetSize = splitSample.getTargetSize(totalSize);
+ int startIndex = splitSample.getSeedNum() % splitList.size();
long size = 0;
for (int i = 0; i < splitList.size(); i++) {
InputSplitShim split = splitList.get((startIndex + i) % splitList.size());
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g?rev=1419365&r1=1419364&r2=1419365&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g Mon Dec 10 12:14:47 2012
@@ -186,6 +186,9 @@ TOK_TABLELOCATION;
TOK_PARTITIONLOCATION;
TOK_TABLEBUCKETSAMPLE;
TOK_TABLESPLITSAMPLE;
+TOK_PERCENT;
+TOK_LENGTH;
+TOK_ROWCOUNT;
TOK_TMP_FILE;
TOK_TABSORTCOLNAMEASC;
TOK_TABSORTCOLNAMEDESC;
@@ -1810,7 +1813,12 @@ splitSample
@init { msgs.push("table split sample specification"); }
@after { msgs.pop(); }
:
- KW_TABLESAMPLE LPAREN (numerator=Number) KW_PERCENT RPAREN -> ^(TOK_TABLESPLITSAMPLE $numerator)
+ KW_TABLESAMPLE LPAREN (numerator=Number) (percent=KW_PERCENT|KW_ROWS) RPAREN
+ -> {percent != null}? ^(TOK_TABLESPLITSAMPLE TOK_PERCENT $numerator)
+ -> ^(TOK_TABLESPLITSAMPLE TOK_ROWCOUNT $numerator)
+ |
+ KW_TABLESAMPLE LPAREN (numerator=ByteLengthLiteral) RPAREN
+ -> ^(TOK_TABLESPLITSAMPLE TOK_LENGTH $numerator)
;
tableSample
@@ -2426,6 +2434,7 @@ KW_SORTED: 'SORTED';
KW_INTO: 'INTO';
KW_BUCKETS: 'BUCKETS';
KW_ROW: 'ROW';
+KW_ROWS: 'ROWS';
KW_FORMAT: 'FORMAT';
KW_DELIMITED: 'DELIMITED';
KW_FIELDS: 'FIELDS';
@@ -2647,6 +2656,11 @@ TinyintLiteral
(Digit)+ 'Y'
;
+ByteLengthLiteral
+ :
+ (Digit)+ ('b' | 'B' | 'k' | 'K' | 'm' | 'M' | 'g' | 'G')
+ ;
+
Number
:
(Digit)+ ( DOT (Digit)* (Exponent)? | Exponent)?
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java?rev=1419365&r1=1419364&r2=1419365&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java Mon Dec 10 12:14:47 2012
@@ -438,6 +438,7 @@ public class ParseDriver {
try {
r = parser.statement();
} catch (RecognitionException e) {
+ e.printStackTrace();
throw new ParseException(parser.getErrors());
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1419365&r1=1419364&r2=1419365&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Mon Dec 10 12:14:47 2012
@@ -514,14 +514,36 @@ public class SemanticAnalyzer extends Ba
}
ASTNode sampleClause = (ASTNode) tabref.getChild(1);
String alias_id = getAliasId(alias, qb);
- String strPercentage = unescapeIdentifier(sampleClause.getChild(0).getText());
- Double percent = Double.valueOf(strPercentage).doubleValue();
- if (percent < 0 || percent > 100) {
- throw new SemanticException(generateErrorMessage(sampleClause,
- "Sampling percentage should be between 0 and 100"));
+
+ Tree type = sampleClause.getChild(0);
+ String numerator = unescapeIdentifier(sampleClause.getChild(1).getText());
+
+ SplitSample sample;
+ if (type.getType() == HiveParser.TOK_PERCENT) {
+ Double percent = Double.valueOf(numerator).doubleValue();
+ if (percent < 0 || percent > 100) {
+ throw new SemanticException(generateErrorMessage(sampleClause,
+ "Sampling percentage should be between 0 and 100"));
+ }
+ int seedNum = conf.getIntVar(ConfVars.HIVESAMPLERANDOMNUM);
+ sample = new SplitSample(percent, seedNum);
+ } else if (type.getType() == HiveParser.TOK_ROWCOUNT) {
+ sample = new SplitSample(Integer.valueOf(numerator));
+ } else {
+ assert type.getType() == HiveParser.TOK_LENGTH;
+ long length = Integer.valueOf(numerator.substring(0, numerator.length() - 1));
+ char last = numerator.charAt(numerator.length() - 1);
+ if (last == 'k' || last == 'K') {
+ length <<= 10;
+ } else if (last == 'm' || last == 'M') {
+ length <<= 20;
+ } else if (last == 'g' || last == 'G') {
+ length <<= 30;
+ }
+ int seedNum = conf.getIntVar(ConfVars.HIVESAMPLERANDOMNUM);
+ sample = new SplitSample(length, seedNum);
}
- nameToSplitSample.put(alias_id, new SplitSample(
- percent, conf.getIntVar(ConfVars.HIVESAMPLERANDOMNUM)));
+ nameToSplitSample.put(alias_id, sample);
}
// Insert this map into the stats
qb.setTabAlias(alias, tabIdName);
@@ -7204,6 +7226,12 @@ public class SemanticAnalyzer extends Ba
TableScanDesc tsDesc = new TableScanDesc(alias, vcList);
setupStats(tsDesc, qb.getParseInfo(), tab, alias, rwsch);
+ SplitSample sample = nameToSplitSample.get(alias);
+ if (sample != null && sample.getRowCount() != null) {
+ tsDesc.setRowLimit(sample.getRowCount());
+ nameToSplitSample.remove(alias);
+ }
+
top = putOpInsertMap(OperatorFactory.get(tsDesc,
new RowSchema(rwsch.getColumnInfos())), rwsch);
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SplitSample.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SplitSample.java?rev=1419365&r1=1419364&r2=1419365&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SplitSample.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SplitSample.java Mon Dec 10 12:14:47 2012
@@ -26,7 +26,7 @@ import org.apache.hadoop.hive.ql.plan.Ex
/**
*
- * This class stores all the information specified in the TABLESAMPLE(PERCENT ...) clause.
+ * This class stores all the information specified in the TABLESAMPLE(...) clause.
* e.g. for the clause "FROM t TABLESAMPLE(1 PERCENT) it will store the percentage 1,
* and the seed number is to determine which 1%. Currently it is from the conf
* hive.sample.seednumber
@@ -36,10 +36,10 @@ public class SplitSample implements Seri
private static final long serialVersionUID = 1L;
- /**
- * The percentage of the TABLESAMPLE clause.
- */
- private double percent;
+ // only one of belows is not-null
+ private Long totalLength; // total length of sample, prunes splits exceeded
+ private Double percent; // percent to total input, prunes splits exceeded
+ private Integer rowCount; // row count per split, do not prune splits
/**
* The number used to determine which part of the input to sample
@@ -49,21 +49,47 @@ public class SplitSample implements Seri
public SplitSample() {
}
-
public SplitSample(double percent, int seedNum) {
this.percent = percent;
this.seedNum = seedNum;
}
+ public SplitSample(long totalLength, int seedNum) {
+ this.totalLength = totalLength;
+ this.seedNum = seedNum;
+ }
+
+ public SplitSample(int rowCount) {
+ this.rowCount = rowCount;
+ }
+
@Explain(displayName = "percentage")
- public double getPercent() {
+ public Double getPercent() {
return percent;
}
- public void setPercent(double percent) {
+ public void setPercent(Double percent) {
this.percent = percent;
}
+ @Explain(displayName = "total length")
+ public Long getTotalLength() {
+ return totalLength;
+ }
+
+ public void setTotalLength(Long totalLength) {
+ this.totalLength = totalLength;
+ }
+
+ @Explain(displayName = "row count")
+ public Integer getRowCount() {
+ return rowCount;
+ }
+
+ public void setRowCount(Integer rowCount) {
+ this.rowCount = rowCount;
+ }
+
@Explain(displayName = "seed number")
public int getSeedNum() {
return seedNum;
@@ -73,4 +99,7 @@ public class SplitSample implements Seri
this.seedNum = seedNum;
}
+ public long getTargetSize(long totalSize) {
+ return totalLength != null ? totalLength : (long) (totalSize * percent / 100D);
+ }
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java?rev=1419365&r1=1419364&r2=1419365&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java Mon Dec 10 12:14:47 2012
@@ -242,7 +242,7 @@ public class MapredWork extends Abstract
return reducer;
}
- @Explain(displayName = "Percentage Sample")
+ @Explain(displayName = "Split Sample")
public HashMap<String, SplitSample> getNameToSplitSample() {
return nameToSplitSample;
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java?rev=1419365&r1=1419364&r2=1419365&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java Mon Dec 10 12:14:47 2012
@@ -44,6 +44,14 @@ public class TableScanDesc extends Abstr
private List<String> partColumns;
/**
+ * Used for split sampling (row count per split)
+ * For example,
+ * select count(1) from ss_src2 tablesample(10 ROWS);
+ * provides first 10 rows from all input splits
+ */
+ private int rowLimit = -1;
+
+ /**
* A boolean variable set to true by the semantic analyzer only in case of the analyze command.
*
*/
@@ -149,4 +157,17 @@ public class TableScanDesc extends Abstr
public void setMaxStatsKeyPrefixLength(int maxStatsKeyPrefixLength) {
this.maxStatsKeyPrefixLength = maxStatsKeyPrefixLength;
}
+
+ public void setRowLimit(int rowLimit) {
+ this.rowLimit = rowLimit;
+ }
+
+ public int getRowLimit() {
+ return rowLimit;
+ }
+
+ @Explain(displayName = "Row Limit Per Split")
+ public Integer getRowLimitExplain() {
+ return rowLimit >= 0 ? rowLimit : null;
+ }
}
Modified: hive/trunk/ql/src/test/queries/clientpositive/split_sample.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/split_sample.q?rev=1419365&r1=1419364&r2=1419365&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/split_sample.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/split_sample.q Mon Dec 10 12:14:47 2012
@@ -78,3 +78,31 @@ set mapred.min.split.size.per.node=30000
set mapred.min.split.size.per.rack=300000;
select count(1) from ss_src2 tablesample(1 percent);
select count(1) from ss_src2 tablesample(50 percent);
+
+--HIVE-3401 more split samplings
+
+-- total length
+explain
+select count(1) from ss_src2 tablesample(100B);
+select count(1) from ss_src2 tablesample(100B);
+
+explain
+select count(1) from ss_src2 tablesample(1K);
+select count(1) from ss_src2 tablesample(1K);
+
+-- row per split
+explain
+select key, value from ss_src2 tablesample(0 ROWS);
+select key, value from ss_src2 tablesample(0 ROWS);
+
+explain
+select count(1) from ss_src2 tablesample(10 ROWS);
+select count(1) from ss_src2 tablesample(10 ROWS);
+
+explain
+select count(1) from ss_src2 tablesample(100 ROWS);
+select count(1) from ss_src2 tablesample(100 ROWS);
+
+set hive.fetch.task.conversion=more;
+select key from ss_src2 tablesample(200B);
+select key from ss_src2 tablesample(10 ROWS);
Modified: hive/trunk/ql/src/test/results/clientnegative/split_sample_out_of_range.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/split_sample_out_of_range.q.out?rev=1419365&r1=1419364&r2=1419365&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/split_sample_out_of_range.q.out (original)
+++ hive/trunk/ql/src/test/results/clientnegative/split_sample_out_of_range.q.out Mon Dec 10 12:14:47 2012
@@ -1 +1 @@
-FAILED: SemanticException 3:32 Sampling percentage should be between 0 and 100. Error encountered near token '105'
+FAILED: SemanticException 0:0 Sampling percentage should be between 0 and 100. Error encountered near token '105'
Modified: hive/trunk/ql/src/test/results/clientnegative/split_sample_wrong_format.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/split_sample_wrong_format.q.out?rev=1419365&r1=1419364&r2=1419365&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientnegative/split_sample_wrong_format.q.out (original)
+++ hive/trunk/ql/src/test/results/clientnegative/split_sample_wrong_format.q.out Mon Dec 10 12:14:47 2012
@@ -1 +1 @@
-FAILED: SemanticException 3:32 Percentage sampling is not supported in org.apache.hadoop.hive.ql.io.HiveInputFormat. Error encountered near token '1'
+FAILED: SemanticException 0:0 Percentage sampling is not supported in org.apache.hadoop.hive.ql.io.HiveInputFormat. Error encountered near token '1'
Modified: hive/trunk/ql/src/test/results/clientpositive/input4.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/input4.q.out?rev=1419365&r1=1419364&r2=1419365&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/input4.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/input4.q.out Mon Dec 10 12:14:47 2012
@@ -48,7 +48,7 @@ PREHOOK: type: QUERY
POSTHOOK: query: EXPLAIN FORMATTED
SELECT INPUT4.VALUE, INPUT4.KEY FROM INPUT4
POSTHOOK: type: QUERY
-{"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Alias -> Map Operator Tree:":{"input4":{"TS_0":{"SEL_1":{"FS_2":{"File Output Operator":{"GlobalTableId:":"0","compressed:":"false","table:":{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}}}}}}},"Percentage Sample:":{}}},"Stage-0":{"Fetch Operator":{"limit:":"-1"}}},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"ROOT STAGE":"TRUE"}},"ABSTRACT SYNTAX TREE":"(TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME INPUT4))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL INPUT4) VALUE)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL INPUT4) KEY)))))"}
+{"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Split Sample:":{},"Alias -> Map Operator Tree:":{"input4":{"TS_0":{"SEL_1":{"FS_2":{"File Output Operator":{"GlobalTableId:":"0","compressed:":"false","table:":{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}}}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1"}}},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"ROOT STAGE":"TRUE"}},"ABSTRACT SYNTAX TREE":"(TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME INPUT4))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL INPUT4) VALUE)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL INPUT4) KEY)))))"}
PREHOOK: query: SELECT INPUT4.VALUE, INPUT4.KEY FROM INPUT4
PREHOOK: type: QUERY
PREHOOK: Input: default@input4
Modified: hive/trunk/ql/src/test/results/clientpositive/nonmr_fetch.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/nonmr_fetch.q.out?rev=1419365&r1=1419364&r2=1419365&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/nonmr_fetch.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/nonmr_fetch.q.out Mon Dec 10 12:14:47 2012
@@ -231,7 +231,7 @@ POSTHOOK: query: -- negative, table samp
explain select * from src TABLESAMPLE (0.25 PERCENT) limit 10
POSTHOOK: type: QUERY
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE 0.25))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10)))
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_PERCENT 0.25))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10)))
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -258,7 +258,7 @@ STAGE PLANS:
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- Percentage Sample:
+ Split Sample:
src
percentage: 0.25
seed number: 0
@@ -802,7 +802,7 @@ POSTHOOK: query: -- split sampling
explain select * from src TABLESAMPLE (0.25 PERCENT)
POSTHOOK: type: QUERY
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE 0.25))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) (TOK_TABLESPLITSAMPLE TOK_PERCENT 0.25))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
STAGE DEPENDENCIES:
Stage-0 is a root stage
@@ -839,7 +839,7 @@ PREHOOK: type: QUERY
POSTHOOK: query: explain select *, BLOCK__OFFSET__INSIDE__FILE from srcpart TABLESAMPLE (0.25 PERCENT)
POSTHOOK: type: QUERY
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) (TOK_TABLESPLITSAMPLE 0.25))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE)))))
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) (TOK_TABLESPLITSAMPLE TOK_PERCENT 0.25))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (TOK_TABLE_OR_COL BLOCK__OFFSET__INSIDE__FILE)))))
STAGE DEPENDENCIES:
Stage-0 is a root stage
Modified: hive/trunk/ql/src/test/results/clientpositive/plan_json.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/plan_json.q.out?rev=1419365&r1=1419364&r2=1419365&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/plan_json.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/plan_json.q.out Mon Dec 10 12:14:47 2012
@@ -6,4 +6,4 @@ POSTHOOK: query: -- explain plan json:
EXPLAIN FORMATTED SELECT count(1) FROM src
POSTHOOK: type: QUERY
-{"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Reduce Operator Tree:":{"GBY_4":{"SEL_5":{"FS_6":{"File Output Operator":{"GlobalTableId:":"0","compressed:":"false","table:":{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}}}}}},"Alias -> Map Operator Tree:":{"src":{"TS_0":{"SEL_1":{"GBY_2":{"RS_3":{"Reduce Output Operator":{"Map-reduce partition columns:":[],"sort order:":"","tag:":"-1","value expressions:":[{"type:":"bigint","expr:":"_col0"}],"key expressions:":[]}}}}}}},"Percentage Sample:":{}}},"Stage-0":{"Fetch Operator":{"limit:":"-1"}}},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"ROOT STAGE":"TRUE"}},"ABSTRACT SYNTAX TREE":"(TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1)))))"}
+{"STAGE PLANS":{"Stage-1":{"Map Reduce":{"Reduce Operator Tree:":{"GBY_4":{"SEL_5":{"FS_6":{"File Output Operator":{"GlobalTableId:":"0","compressed:":"false","table:":{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"}}}}}},"Split Sample:":{},"Alias -> Map Operator Tree:":{"src":{"TS_0":{"SEL_1":{"GBY_2":{"RS_3":{"Reduce Output Operator":{"Map-reduce partition columns:":[],"sort order:":"","tag:":"-1","value expressions:":[{"type:":"bigint","expr:":"_col0"}],"key expressions:":[]}}}}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1"}}},"STAGE DEPENDENCIES":{"Stage-1":{"ROOT STAGE":"TRUE"},"Stage-0":{"ROOT STAGE":"TRUE"}},"ABSTRACT SYNTAX TREE":"(TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1)))))"}
Modified: hive/trunk/ql/src/test/results/clientpositive/split_sample.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/split_sample.q.out?rev=1419365&r1=1419364&r2=1419365&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/split_sample.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/split_sample.q.out Mon Dec 10 12:14:47 2012
@@ -120,7 +120,7 @@ POSTHOOK: Lineage: ss_i_part PARTITION(p
POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_LIMIT 10)))
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_PERCENT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_LIMIT 10)))
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -147,7 +147,7 @@ STAGE PLANS:
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- Percentage Sample:
+ Split Sample:
ss_src2
percentage: 1.0
seed number: 0
@@ -361,7 +361,7 @@ POSTHOOK: Lineage: ss_i_part PARTITION(p
POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE 70))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL key)))) (TOK_LIMIT 10)))
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_PERCENT 70))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL key)))) (TOK_LIMIT 10)))
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -397,7 +397,7 @@ STAGE PLANS:
value expressions:
expr: _col1
type: bigint
- Percentage Sample:
+ Split Sample:
ss_src2
percentage: 70.0
seed number: 5
@@ -489,7 +489,7 @@ POSTHOOK: Lineage: ss_i_part PARTITION(p
POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_LIMIT 10))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)))))
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_PERCENT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_LIMIT 10))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)))))
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -514,7 +514,7 @@ STAGE PLANS:
value expressions:
expr: _col0
type: int
- Percentage Sample:
+ Split Sample:
subq:ss_src2
percentage: 1.0
seed number: 5
@@ -4049,7 +4049,7 @@ POSTHOOK: Lineage: ss_i_part PARTITION(p
POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME ss_src1) (TOK_TABLESPLITSAMPLE 80) t1) (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE 2) t2) (= (. (TOK_TABLE_OR_COL t1) key) (. (TOK_TABLE_OR_COL t2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL t1) key) k1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL t2) key) k)))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (or (TOK_FUNCTION in (TOK_TABLE_OR_COL k) 199 10199 20199) (TOK_FUNCTION in (TOK_TABLE_OR_COL k1) 199 10199 20199)))))
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME ss_src1) (TOK_TABLESPLITSAMPLE TOK_PERCENT 80) t1) (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_PERCENT 2) t2) (= (. (TOK_TABLE_OR_COL t1) key) (. (TOK_TABLE_OR_COL t2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL t1) key) k1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL t2) key) k)))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (or (TOK_FUNCTION in (TOK_TABLE_OR_COL k) 199 10199 20199) (TOK_FUNCTION in (TOK_TABLE_OR_COL k1) 199 10199 20199)))))
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -4089,7 +4089,7 @@ STAGE PLANS:
value expressions:
expr: key
type: int
- Percentage Sample:
+ Split Sample:
subq:t1
percentage: 80.0
seed number: 5
@@ -4207,7 +4207,7 @@ POSTHOOK: Lineage: ss_i_part PARTITION(p
POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1)))))
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_PERCENT 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1)))))
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -4233,7 +4233,7 @@ STAGE PLANS:
value expressions:
expr: _col0
type: bigint
- Percentage Sample:
+ Split Sample:
ss_src2
percentage: 1.0
seed number: 5
@@ -4303,3 +4303,521 @@ POSTHOOK: Lineage: ss_i_part PARTITION(p
POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
1000
+PREHOOK: query: --HIVE-3401 more split samplings
+
+-- total length
+explain
+select count(1) from ss_src2 tablesample(100B)
+PREHOOK: type: QUERY
+POSTHOOK: query: --HIVE-3401 more split samplings
+
+-- total length
+explain
+select count(1) from ss_src2 tablesample(100B)
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_LENGTH 100B))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ ss_src2
+ TableScan
+ alias: ss_src2
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
+ Split Sample:
+ ss_src2
+ seed number: 5
+ total length: 100
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ mode: mergepartial
+ outputColumnNames: _col0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: bigint
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select count(1) from ss_src2 tablesample(100B)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ss_src2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from ss_src2 tablesample(100B)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ss_src2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+500
+PREHOOK: query: explain
+select count(1) from ss_src2 tablesample(1K)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(1) from ss_src2 tablesample(1K)
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_LENGTH 1K))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ ss_src2
+ TableScan
+ alias: ss_src2
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
+ Split Sample:
+ ss_src2
+ seed number: 5
+ total length: 1024
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ mode: mergepartial
+ outputColumnNames: _col0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: bigint
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select count(1) from ss_src2 tablesample(1K)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ss_src2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from ss_src2 tablesample(1K)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ss_src2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+500
+PREHOOK: query: -- row per split
+explain
+select key, value from ss_src2 tablesample(0 ROWS)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- row per split
+explain
+select key, value from ss_src2 tablesample(0 ROWS)
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 0))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ ss_src2
+ TableScan
+ alias: ss_src2
+ Row Limit Per Split: 0
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select key, value from ss_src2 tablesample(0 ROWS)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ss_src2
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value from ss_src2 tablesample(0 ROWS)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ss_src2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: explain
+select count(1) from ss_src2 tablesample(10 ROWS)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(1) from ss_src2 tablesample(10 ROWS)
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 10))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ ss_src2
+ TableScan
+ alias: ss_src2
+ Row Limit Per Split: 10
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ mode: mergepartial
+ outputColumnNames: _col0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: bigint
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select count(1) from ss_src2 tablesample(10 ROWS)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ss_src2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from ss_src2 tablesample(10 ROWS)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ss_src2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+10
+PREHOOK: query: explain
+select count(1) from ss_src2 tablesample(100 ROWS)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(1) from ss_src2 tablesample(100 ROWS)
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE TOK_ROWCOUNT 100))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ ss_src2
+ TableScan
+ alias: ss_src2
+ Row Limit Per Split: 100
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ mode: mergepartial
+ outputColumnNames: _col0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: bigint
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select count(1) from ss_src2 tablesample(100 ROWS)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ss_src2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from ss_src2 tablesample(100 ROWS)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ss_src2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+100
+PREHOOK: query: select key from ss_src2 tablesample(200B)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ss_src2
+#### A masked pattern was here ####
+POSTHOOK: query: select key from ss_src2 tablesample(200B)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ss_src2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+238
+86
+311
+27
+165
+409
+255
+278
+98
+484
+265
+193
+401
+150
+273
+224
+369
+66
+PREHOOK: query: select key from ss_src2 tablesample(10 ROWS)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ss_src2
+#### A masked pattern was here ####
+POSTHOOK: query: select key from ss_src2 tablesample(10 ROWS)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ss_src2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+238
+86
+311
+27
+165
+409
+255
+278
+98
+484