You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by he...@apache.org on 2011/08/19 06:18:07 UTC
svn commit: r1159499 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/optimizer/physical/index/
java/org/apache/hadoop/hive/ql/parse/ java/org/apache/hadoop/hive/ql/plan/
test/queries/clientpositive/ test/results/clientpositive/
Author: heyongqiang
Date: Fri Aug 19 04:18:07 2011
New Revision: 1159499
URL: http://svn.apache.org/viewvc?rev=1159499&view=rev
Log:
HIVE-2335: Indexes are still automatically queried when out of sync with their source tables (Syed via He Yongqiang)
Added:
hive/trunk/ql/src/test/queries/clientpositive/index_stale.q
hive/trunk/ql/src/test/queries/clientpositive/index_stale_partitioned.q
hive/trunk/ql/src/test/results/clientpositive/index_stale.q.out
hive/trunk/ql/src/test/results/clientpositive/index_stale_partitioned.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterIndexDesc.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java?rev=1159499&r1=1159498&r2=1159499&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java Fri Aug 19 04:18:07 2011
@@ -31,6 +31,9 @@ import java.util.Stack;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.ContentSummary;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.hive.metastore.api.Index;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.MapRedTask;
@@ -68,10 +71,12 @@ public class IndexWhereProcessor impleme
private static final Log LOG = LogFactory.getLog(IndexWhereProcessor.class.getName());
private final Map<Table, List<Index>> indexes;
+ private Map<Index, Table> indexToIndexTable;
public IndexWhereProcessor(Map<Table, List<Index>> indexes) {
super();
this.indexes = indexes;
+ this.indexToIndexTable = new HashMap<Index, Table>();
}
@Override
@@ -243,7 +248,7 @@ public class IndexWhereProcessor impleme
for (Partition part : queryPartitions) {
List<Table> sourceIndexTables = getIndexTables(hive, part);
- if (!containsPartition(hive, sourceIndexTables, part)) {
+ if (!containsPartition(hive, part)) {
return null; // problem if it doesn't contain the partition
}
}
@@ -252,6 +257,23 @@ public class IndexWhereProcessor impleme
}
/**
+ * return index tables associated with a given base table
+ */
+ private List<Table> getIndexTables(Hive hive, Table table) throws
+ HiveException {
+ List<Table> indexTables = new ArrayList<Table>();
+ if (indexes == null || indexes.get(table) == null) {
+ return indexTables;
+ }
+ for (Index index : indexes.get(table)) {
+ Table indexTable = hive.getTable(index.getIndexTableName());
+ indexToIndexTable.put(index, indexTable);
+ indexTables.add(indexTable);
+ }
+ return indexTables;
+ }
+
+ /**
* return index tables associated with the base table of the partition
*/
private List<Table> getIndexTables(Hive hive, Partition part) throws HiveException {
@@ -261,32 +283,99 @@ public class IndexWhereProcessor impleme
return indexTables;
}
for (Index index : indexes.get(partitionedTable)) {
- indexTables.add(hive.getTable(index.getIndexTableName()));
+ Table indexTable = hive.getTable(index.getIndexTableName());
+ indexToIndexTable.put(index, indexTable);
+ indexTables.add(indexTable);
}
return indexTables;
}
/**
- * check that every index table contains the given partition
+ * check that every index table contains the given partition and is fresh
*/
- private boolean containsPartition(Hive hive, List<Table> indexTables, Partition part)
+ private boolean containsPartition(Hive hive, Partition part)
throws HiveException {
HashMap<String, String> partSpec = part.getSpec();
- if (partSpec.isEmpty()) {
- return true; // empty specs come from non-partitioned tables
+ if (indexes == null || indexes.get(part.getTable()) == null) {
+ return false;
}
- if (indexTables == null || indexTables.size() == 0) {
- return false;
+ if (partSpec.isEmpty()) {
+ // empty specs come from non-partitioned tables
+ return isIndexTableFresh(hive, indexes.get(part.getTable()), part.getTable());
}
- for (Table indexTable : indexTables) {
+ for (Index index : indexes.get(part.getTable())) {
+ Table indexTable = indexToIndexTable.get(index);
// get partitions that match the spec
List<Partition> matchingPartitions = hive.getPartitions(indexTable, partSpec);
if (matchingPartitions == null || matchingPartitions.size() == 0) {
LOG.info("Index table " + indexTable + "did not contain built partition that matched " + partSpec);
return false;
+ } else if (!isIndexPartitionFresh(hive, index, part)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Check the index partitions on a parttioned table exist and are fresh
+ */
+ private boolean isIndexPartitionFresh(Hive hive, Index index,
+ Partition part) throws HiveException {
+ LOG.info("checking index staleness...");
+ try {
+ FileSystem partFs = part.getPartitionPath().getFileSystem(hive.getConf());
+ FileStatus partFss = partFs.getFileStatus(part.getPartitionPath());
+ String ts = index.getParameters().get(part.getSpec().toString());
+ if (ts == null) {
+ return false;
+ }
+ long indexTs = Long.parseLong(ts);
+ LOG.info(partFss.getModificationTime());
+ LOG.info(ts);
+ if (partFss.getModificationTime() > indexTs) {
+ LOG.info("index is stale on the partitions that matched " + part.getSpec());
+ return false;
+ }
+ } catch (IOException e) {
+ LOG.info("failed to grab timestamp info");
+ throw new HiveException(e);
+ }
+ return true;
+ }
+
+ /**
+ * Check that the indexes on the unpartioned table exist and are fresh
+ */
+ private boolean isIndexTableFresh(Hive hive, List<Index> indexes, Table src)
+ throws HiveException {
+ //check that they exist
+ if (indexes == null || indexes.size() == 0) {
+ return false;
+ }
+ //check that they are not stale
+ for (Index index : indexes) {
+ LOG.info("checking index staleness...");
+ try {
+ FileSystem srcFs = src.getPath().getFileSystem(hive.getConf());
+ FileStatus srcFss= srcFs.getFileStatus(src.getPath());
+ String ts = index.getParameters().get("base_timestamp");
+ if (ts == null) {
+ return false;
+ }
+ long indexTs = Long.parseLong(ts);
+ LOG.info(srcFss.getModificationTime());
+ LOG.info(ts);
+ if (srcFss.getModificationTime() > indexTs) {
+ LOG.info("index is stale ");
+ return false;
+ }
+ } catch (IOException e) {
+ LOG.info("failed to grab timestamp info");
+ throw new HiveException(e);
}
}
return true;
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java?rev=1159499&r1=1159498&r2=1159499&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java Fri Aug 19 04:18:07 2011
@@ -42,6 +42,8 @@ import org.antlr.runtime.tree.Tree;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.Warehouse;
@@ -778,6 +780,7 @@ public class DDLSemanticAnalyzer extends
storageFormat.fillDefaultStorageFormat(shared);
+
CreateIndexDesc crtIndexDesc = new CreateIndexDesc(tableName, indexName,
indexedCols, indexTableName, deferredRebuild, storageFormat.inputFormat, storageFormat.outputFormat,
storageFormat.storageHandler, typeName, location, idxProps, tblProps,
@@ -813,12 +816,52 @@ public class DDLSemanticAnalyzer extends
String baseTableName = unescapeIdentifier(ast.getChild(0).getText());
String indexName = unescapeIdentifier(ast.getChild(1).getText());
HashMap<String, String> partSpec = null;
+ Map<Map<String, String>, Long> basePartTs = new HashMap<Map<String, String>, Long>();
+ Map<String, String> props = new HashMap<String, String>();
Tree part = ast.getChild(2);
if (part != null) {
partSpec = extractPartitionSpecs(part);
}
+ AlterIndexDesc alterIdxDesc = new AlterIndexDesc(AlterIndexTypes.ADDPROPS);
+ try {
+ long timestamp;
+ Table baseTbl = db.getTable(db.getCurrentDatabase(), baseTableName);
+ if (baseTbl.isPartitioned()) {
+ List<Partition> baseParts;
+ if (part != null) {
+ baseParts = db.getPartitions(baseTbl, partSpec);
+ } else {
+ baseParts = db.getPartitions(baseTbl);
+ }
+ if (baseParts != null) {
+ for (Partition p : baseParts) {
+ FileSystem fs = p.getPartitionPath().getFileSystem(db.getConf());
+ FileStatus fss = fs.getFileStatus(p.getPartitionPath());
+ basePartTs.put(p.getSpec(), fss.getModificationTime());
+ }
+ }
+ } else {
+ FileSystem fs = baseTbl.getPath().getFileSystem(db.getConf());
+ FileStatus fss = fs.getFileStatus(baseTbl.getPath());
+ basePartTs.put(null, fss.getModificationTime());
+ }
+ for (Map<String, String> spec : basePartTs.keySet()) {
+ if (spec != null) {
+ props.put(spec.toString(), basePartTs.get(spec).toString());
+ } else {
+ props.put("base_timestamp", basePartTs.get(null).toString());
+ }
+ }
+ alterIdxDesc.setProps(props);
+ } catch (Exception e) {
+ }
+ alterIdxDesc.setIndexName(indexName);
+ alterIdxDesc.setBaseTableName(baseTableName);
+ alterIdxDesc.setDbName(db.getCurrentDatabase());
+
List<Task<?>> indexBuilder = getIndexBuilderMapRed(baseTableName, indexName, partSpec);
rootTasks.addAll(indexBuilder);
+ rootTasks.add(TaskFactory.get(new DDLWork(alterIdxDesc), conf));
}
private void analyzeAlterIndexProps(ASTNode ast)
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterIndexDesc.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterIndexDesc.java?rev=1159499&r1=1159498&r2=1159499&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterIndexDesc.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterIndexDesc.java Fri Aug 19 04:18:07 2011
@@ -20,7 +20,7 @@ package org.apache.hadoop.hive.ql.plan;
import java.io.Serializable;
import java.util.ArrayList;
-import java.util.HashMap;
+import java.util.Map;
import java.util.List;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
@@ -37,7 +37,7 @@ public class AlterIndexDesc extends DDLD
private String indexName;
private String baseTable;
private String dbName;
- private HashMap<String, String> props;
+ private Map<String, String> props;
/**
* alterIndexTypes.
@@ -121,7 +121,7 @@ public class AlterIndexDesc extends DDLD
* @return the props
*/
@Explain(displayName = "properties")
- public HashMap<String, String> getProps() {
+ public Map<String, String> getProps() {
return props;
}
@@ -129,7 +129,7 @@ public class AlterIndexDesc extends DDLD
* @param props
* the props to set
*/
- public void setProps(HashMap<String, String> props) {
+ public void setProps(Map<String, String> props) {
this.props = props;
}
}
Added: hive/trunk/ql/src/test/queries/clientpositive/index_stale.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/index_stale.q?rev=1159499&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/index_stale.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/index_stale.q Fri Aug 19 04:18:07 2011
@@ -0,0 +1,20 @@
+-- test that stale indexes are not used
+
+CREATE TABLE temp(key STRING, val STRING) STORED AS TEXTFILE;
+INSERT OVERWRITE TABLE temp SELECT * FROM src WHERE key < 50;
+
+-- Build an index on temp.
+CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD;
+ALTER INDEX temp_index ON temp REBUILD;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.index.filter=true;
+SET hive.optimize.index.filter.compact.minsize=0;
+
+-- overwrite temp table so index is out of date
+INSERT OVERWRITE TABLE temp SELECT * FROM src;
+
+-- should return correct results bypassing index
+EXPLAIN SELECT * FROM temp WHERE key = 86;
+SELECT * FROM temp WHERE key = 86;
+DROP table temp;
Added: hive/trunk/ql/src/test/queries/clientpositive/index_stale_partitioned.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/index_stale_partitioned.q?rev=1159499&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/index_stale_partitioned.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/index_stale_partitioned.q Fri Aug 19 04:18:07 2011
@@ -0,0 +1,26 @@
+-- Test if index is actually being used.
+
+-- Create temp, and populate it with some values in src.
+CREATE TABLE temp(key STRING, val STRING) PARTITIONED BY (foo string) STORED AS TEXTFILE;
+ALTER TABLE temp ADD PARTITION (foo = 'bar');
+INSERT OVERWRITE TABLE temp PARTITION (foo = 'bar') SELECT * FROM src WHERE key < 50;
+
+-- Build an index on temp.
+CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD;
+ALTER INDEX temp_index ON temp PARTITION (foo = 'bar') REBUILD;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.index.filter=true;
+SET hive.optimize.index.filter.compact.minsize=0;
+
+-- overwrite temp table so index is out of date
+INSERT OVERWRITE TABLE temp PARTITION (foo = 'bar') SELECT * FROM src;
+
+-- query should not return any values
+SELECT * FROM default__temp_temp_index__ WHERE key = 86 AND foo='bar';
+EXPLAIN SELECT * FROM temp WHERE key = 86 AND foo = 'bar';
+SELECT * FROM temp WHERE key = 86 AND foo = 'bar';
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.index.filter=false;
+DROP table temp;
Added: hive/trunk/ql/src/test/results/clientpositive/index_stale.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/index_stale.q.out?rev=1159499&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/index_stale.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/index_stale.q.out Fri Aug 19 04:18:07 2011
@@ -0,0 +1,141 @@
+PREHOOK: query: -- test that stale indexes are not used
+
+CREATE TABLE temp(key STRING, val STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- test that stale indexes are not used
+
+CREATE TABLE temp(key STRING, val STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@temp
+PREHOOK: query: INSERT OVERWRITE TABLE temp SELECT * FROM src WHERE key < 50
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@temp
+POSTHOOK: query: INSERT OVERWRITE TABLE temp SELECT * FROM src WHERE key < 50
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@temp
+POSTHOOK: Lineage: temp.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: temp.val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- Build an index on temp.
+CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD
+PREHOOK: type: CREATEINDEX
+POSTHOOK: query: -- Build an index on temp.
+CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD
+POSTHOOK: type: CREATEINDEX
+POSTHOOK: Lineage: temp.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: temp.val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: ALTER INDEX temp_index ON temp REBUILD
+PREHOOK: type: ALTERINDEX_REBUILD
+PREHOOK: Input: default@temp
+PREHOOK: Output: default@default__temp_temp_index__
+POSTHOOK: query: ALTER INDEX temp_index ON temp REBUILD
+POSTHOOK: type: ALTERINDEX_REBUILD
+POSTHOOK: Input: default@temp
+POSTHOOK: Output: default@default__temp_temp_index__
+POSTHOOK: Lineage: default__temp_temp_index__._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: temp.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: temp.val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- overwrite temp table so index is out of date
+INSERT OVERWRITE TABLE temp SELECT * FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@temp
+POSTHOOK: query: -- overwrite temp table so index is out of date
+INSERT OVERWRITE TABLE temp SELECT * FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@temp
+POSTHOOK: Lineage: default__temp_temp_index__._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: temp.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: temp.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: temp.val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: temp.val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- should return correct results bypassing index
+EXPLAIN SELECT * FROM temp WHERE key = 86
+PREHOOK: type: QUERY
+POSTHOOK: query: -- should return correct results bypassing index
+EXPLAIN SELECT * FROM temp WHERE key = 86
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: default__temp_temp_index__._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: temp.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: temp.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: temp.val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: temp.val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME temp))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 86))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ temp
+ TableScan
+ alias: temp
+ filterExpr:
+ expr: (key = 86)
+ type: boolean
+ Filter Operator
+ predicate:
+ expr: (key = 86)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: val
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT * FROM temp WHERE key = 86
+PREHOOK: type: QUERY
+PREHOOK: Input: default@temp
+PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-08-18_03-05-26_984_9114308663658944615/-mr-10000
+POSTHOOK: query: SELECT * FROM temp WHERE key = 86
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@temp
+POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-08-18_03-05-26_984_9114308663658944615/-mr-10000
+POSTHOOK: Lineage: default__temp_temp_index__._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: temp.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: temp.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: temp.val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: temp.val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+86 val_86
+PREHOOK: query: DROP table temp
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@temp
+PREHOOK: Output: default@temp
+POSTHOOK: query: DROP table temp
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@temp
+POSTHOOK: Output: default@temp
+POSTHOOK: Lineage: default__temp_temp_index__._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: temp.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: temp.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: temp.val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: temp.val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
Added: hive/trunk/ql/src/test/results/clientpositive/index_stale_partitioned.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/index_stale_partitioned.q.out?rev=1159499&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/index_stale_partitioned.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/index_stale_partitioned.q.out Fri Aug 19 04:18:07 2011
@@ -0,0 +1,167 @@
+PREHOOK: query: -- Test if index is actually being used.
+
+-- Create temp, and populate it with some values in src.
+CREATE TABLE temp(key STRING, val STRING) PARTITIONED BY (foo string) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- Test if index is actually being used.
+
+-- Create temp, and populate it with some values in src.
+CREATE TABLE temp(key STRING, val STRING) PARTITIONED BY (foo string) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@temp
+PREHOOK: query: ALTER TABLE temp ADD PARTITION (foo = 'bar')
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Input: default@temp
+POSTHOOK: query: ALTER TABLE temp ADD PARTITION (foo = 'bar')
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Input: default@temp
+POSTHOOK: Output: default@temp@foo=bar
+PREHOOK: query: INSERT OVERWRITE TABLE temp PARTITION (foo = 'bar') SELECT * FROM src WHERE key < 50
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@temp@foo=bar
+POSTHOOK: query: INSERT OVERWRITE TABLE temp PARTITION (foo = 'bar') SELECT * FROM src WHERE key < 50
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@temp@foo=bar
+POSTHOOK: Lineage: temp PARTITION(foo=bar).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: temp PARTITION(foo=bar).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- Build an index on temp.
+CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD
+PREHOOK: type: CREATEINDEX
+POSTHOOK: query: -- Build an index on temp.
+CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD
+POSTHOOK: type: CREATEINDEX
+POSTHOOK: Lineage: temp PARTITION(foo=bar).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: temp PARTITION(foo=bar).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: ALTER INDEX temp_index ON temp PARTITION (foo = 'bar') REBUILD
+PREHOOK: type: ALTERINDEX_REBUILD
+PREHOOK: Input: default@temp@foo=bar
+PREHOOK: Output: default@default__temp_temp_index__@foo=bar
+POSTHOOK: query: ALTER INDEX temp_index ON temp PARTITION (foo = 'bar') REBUILD
+POSTHOOK: type: ALTERINDEX_REBUILD
+POSTHOOK: Input: default@temp@foo=bar
+POSTHOOK: Output: default@default__temp_temp_index__@foo=bar
+POSTHOOK: Lineage: default__temp_temp_index__ PARTITION(foo=bar)._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__ PARTITION(foo=bar)._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__ PARTITION(foo=bar).key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: temp PARTITION(foo=bar).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: temp PARTITION(foo=bar).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- overwrite temp table so index is out of date
+INSERT OVERWRITE TABLE temp PARTITION (foo = 'bar') SELECT * FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@temp@foo=bar
+POSTHOOK: query: -- overwrite temp table so index is out of date
+INSERT OVERWRITE TABLE temp PARTITION (foo = 'bar') SELECT * FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@temp@foo=bar
+POSTHOOK: Lineage: default__temp_temp_index__ PARTITION(foo=bar)._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__ PARTITION(foo=bar)._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__ PARTITION(foo=bar).key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: temp PARTITION(foo=bar).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: temp PARTITION(foo=bar).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: temp PARTITION(foo=bar).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: temp PARTITION(foo=bar).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- query should not return any values
+SELECT * FROM default__temp_temp_index__ WHERE key = 86 AND foo='bar'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@default__temp_temp_index__@foo=bar
+PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-08-18_03-05-57_509_3668444901697550522/-mr-10000
+POSTHOOK: query: -- query should not return any values
+SELECT * FROM default__temp_temp_index__ WHERE key = 86 AND foo='bar'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@default__temp_temp_index__@foo=bar
+POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-08-18_03-05-57_509_3668444901697550522/-mr-10000
+POSTHOOK: Lineage: default__temp_temp_index__ PARTITION(foo=bar)._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__ PARTITION(foo=bar)._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__ PARTITION(foo=bar).key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: temp PARTITION(foo=bar).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: temp PARTITION(foo=bar).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: temp PARTITION(foo=bar).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: temp PARTITION(foo=bar).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: EXPLAIN SELECT * FROM temp WHERE key = 86 AND foo = 'bar'
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT * FROM temp WHERE key = 86 AND foo = 'bar'
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: default__temp_temp_index__ PARTITION(foo=bar)._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__ PARTITION(foo=bar)._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__ PARTITION(foo=bar).key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: temp PARTITION(foo=bar).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: temp PARTITION(foo=bar).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: temp PARTITION(foo=bar).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: temp PARTITION(foo=bar).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME temp))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (AND (= (TOK_TABLE_OR_COL key) 86) (= (TOK_TABLE_OR_COL foo) 'bar')))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ temp
+ TableScan
+ alias: temp
+ filterExpr:
+ expr: ((key = 86) and (foo = 'bar'))
+ type: boolean
+ Filter Operator
+ predicate:
+ expr: (key = 86)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: val
+ type: string
+ expr: foo
+ type: string
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT * FROM temp WHERE key = 86 AND foo = 'bar'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@temp@foo=bar
+PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-08-18_03-06-03_020_8948524785223568173/-mr-10000
+POSTHOOK: query: SELECT * FROM temp WHERE key = 86 AND foo = 'bar'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@temp@foo=bar
+POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-08-18_03-06-03_020_8948524785223568173/-mr-10000
+POSTHOOK: Lineage: default__temp_temp_index__ PARTITION(foo=bar)._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__ PARTITION(foo=bar)._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__ PARTITION(foo=bar).key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: temp PARTITION(foo=bar).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: temp PARTITION(foo=bar).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: temp PARTITION(foo=bar).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: temp PARTITION(foo=bar).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+86 val_86 bar
+PREHOOK: query: DROP table temp
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@temp
+PREHOOK: Output: default@temp
+POSTHOOK: query: DROP table temp
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@temp
+POSTHOOK: Output: default@temp
+POSTHOOK: Lineage: default__temp_temp_index__ PARTITION(foo=bar)._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__ PARTITION(foo=bar)._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__ PARTITION(foo=bar).key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: temp PARTITION(foo=bar).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: temp PARTITION(foo=bar).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: temp PARTITION(foo=bar).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: temp PARTITION(foo=bar).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]