You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by he...@apache.org on 2011/08/24 06:18:44 UTC
svn commit: r1160975 - in /hive/trunk:
common/src/java/org/apache/hadoop/hive/conf/
ql/src/java/org/apache/hadoop/hive/ql/exec/
ql/src/java/org/apache/hadoop/hive/ql/parse/
ql/src/java/org/apache/hadoop/hive/ql/plan/
ql/src/test/queries/clientpositive/...
Author: heyongqiang
Date: Wed Aug 24 04:18:43 2011
New Revision: 1160975
URL: http://svn.apache.org/viewvc?rev=1160975&view=rev
Log:
HIVE-2354: Support automatic rebuilding of indexes when they go stale (Syed via He Yongqiang)
Added:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/IndexUpdater.java
hive/trunk/ql/src/test/queries/clientpositive/index_auto_update.q
hive/trunk/ql/src/test/results/clientpositive/index_auto_update.q.out
Modified:
hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterIndexDesc.java
hive/trunk/ql/src/test/results/clientpositive/index_stale.q.out
Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1160975&r1=1160974&r2=1160975&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Wed Aug 24 04:18:43 2011
@@ -370,6 +370,7 @@ public class HiveConf extends Configurat
// Optimizer
HIVEOPTCP("hive.optimize.cp", true), // column pruner
HIVEOPTINDEXFILTER("hive.optimize.index.filter", false), // automatically use indexes
+ HIVEINDEXAUTOUPDATE("hive.optimize.index.autoupdate", false), //automatically update stale indexes
HIVEOPTPPD("hive.optimize.ppd", true), // predicate pushdown
HIVEPPDREMOVEDUPLICATEFILTERS("hive.ppd.remove.duplicatefilters", true),
// push predicates down to storage handlers
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java?rev=1160975&r1=1160974&r2=1160975&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java Wed Aug 24 04:18:43 2011
@@ -37,6 +37,7 @@ import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import java.util.HashMap;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
@@ -828,11 +829,52 @@ public class DDLTask extends Task<DDLWor
String indexName = alterIndex.getIndexName();
Index idx = db.getIndex(dbName, baseTableName, indexName);
- if (alterIndex.getOp() == AlterIndexDesc.AlterIndexTypes.ADDPROPS) {
- idx.getParameters().putAll(alterIndex.getProps());
- } else {
- console.printError("Unsupported Alter commnad");
- return 1;
+ switch(alterIndex.getOp()) {
+ case ADDPROPS:
+ idx.getParameters().putAll(alterIndex.getProps());
+ break;
+ case UPDATETIMESTAMP:
+ try {
+ Map<String, String> props = new HashMap<String, String>();
+ Map<Map<String, String>, Long> basePartTs = new HashMap<Map<String, String>, Long>();
+ Table baseTbl = db.getTable(db.getCurrentDatabase(), baseTableName);
+ if (baseTbl.isPartitioned()) {
+ List<Partition> baseParts;
+ if (alterIndex.getSpec() != null) {
+ baseParts = db.getPartitions(baseTbl, alterIndex.getSpec());
+ } else {
+ baseParts = db.getPartitions(baseTbl);
+ }
+ if (baseParts != null) {
+ for (Partition p : baseParts) {
+ FileSystem fs = p.getPartitionPath().getFileSystem(db.getConf());
+ FileStatus fss = fs.getFileStatus(p.getPartitionPath());
+ basePartTs.put(p.getSpec(), fss.getModificationTime());
+ }
+ }
+ } else {
+ FileSystem fs = baseTbl.getPath().getFileSystem(db.getConf());
+ FileStatus fss = fs.getFileStatus(baseTbl.getPath());
+ basePartTs.put(null, fss.getModificationTime());
+ }
+ for (Map<String, String> spec : basePartTs.keySet()) {
+ if (spec != null) {
+ props.put(spec.toString(), basePartTs.get(spec).toString());
+ } else {
+ props.put("base_timestamp", basePartTs.get(null).toString());
+ }
+ }
+ idx.getParameters().putAll(props);
+ } catch (HiveException e) {
+ throw new HiveException("ERROR: Failed to update index timestamps");
+ } catch (IOException e) {
+ throw new HiveException("ERROR: Failed to look up timestamps on filesystem");
+ }
+
+ break;
+ default:
+ console.printError("Unsupported Alter commnad");
+ return 1;
}
// set last modified by properties
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java?rev=1160975&r1=1160974&r2=1160975&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java Wed Aug 24 04:18:43 2011
@@ -816,52 +816,24 @@ public class DDLSemanticAnalyzer extends
String baseTableName = unescapeIdentifier(ast.getChild(0).getText());
String indexName = unescapeIdentifier(ast.getChild(1).getText());
HashMap<String, String> partSpec = null;
- Map<Map<String, String>, Long> basePartTs = new HashMap<Map<String, String>, Long>();
- Map<String, String> props = new HashMap<String, String>();
Tree part = ast.getChild(2);
if (part != null) {
partSpec = extractPartitionSpecs(part);
}
- AlterIndexDesc alterIdxDesc = new AlterIndexDesc(AlterIndexTypes.ADDPROPS);
- try {
- long timestamp;
- Table baseTbl = db.getTable(db.getCurrentDatabase(), baseTableName);
- if (baseTbl.isPartitioned()) {
- List<Partition> baseParts;
- if (part != null) {
- baseParts = db.getPartitions(baseTbl, partSpec);
- } else {
- baseParts = db.getPartitions(baseTbl);
- }
- if (baseParts != null) {
- for (Partition p : baseParts) {
- FileSystem fs = p.getPartitionPath().getFileSystem(db.getConf());
- FileStatus fss = fs.getFileStatus(p.getPartitionPath());
- basePartTs.put(p.getSpec(), fss.getModificationTime());
- }
- }
- } else {
- FileSystem fs = baseTbl.getPath().getFileSystem(db.getConf());
- FileStatus fss = fs.getFileStatus(baseTbl.getPath());
- basePartTs.put(null, fss.getModificationTime());
- }
- for (Map<String, String> spec : basePartTs.keySet()) {
- if (spec != null) {
- props.put(spec.toString(), basePartTs.get(spec).toString());
- } else {
- props.put("base_timestamp", basePartTs.get(null).toString());
- }
- }
- alterIdxDesc.setProps(props);
- } catch (Exception e) {
- }
+ List<Task<?>> indexBuilder = getIndexBuilderMapRed(baseTableName, indexName, partSpec);
+ rootTasks.addAll(indexBuilder);
+
+ // Handle updating index timestamps
+ AlterIndexDesc alterIdxDesc = new AlterIndexDesc(AlterIndexTypes.UPDATETIMESTAMP);
alterIdxDesc.setIndexName(indexName);
alterIdxDesc.setBaseTableName(baseTableName);
alterIdxDesc.setDbName(db.getCurrentDatabase());
+ alterIdxDesc.setSpec(partSpec);
- List<Task<?>> indexBuilder = getIndexBuilderMapRed(baseTableName, indexName, partSpec);
- rootTasks.addAll(indexBuilder);
- rootTasks.add(TaskFactory.get(new DDLWork(alterIdxDesc), conf));
+ Task<?> tsTask = TaskFactory.get(new DDLWork(alterIdxDesc), conf);
+ for (Task<?> t : indexBuilder) {
+ t.addDependentTask(tsTask);
+ }
}
private void analyzeAlterIndexProps(ASTNode ast)
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/IndexUpdater.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/IndexUpdater.java?rev=1160975&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/IndexUpdater.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/IndexUpdater.java Wed Aug 24 04:18:43 2011
@@ -0,0 +1,172 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.parse;
+
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.TreeSet;
+
+import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.ContentSummary;
+import org.apache.hadoop.hive.metastore.api.Index;
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.MapRedTask;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.hooks.ReadEntity;
+import org.apache.hadoop.hive.ql.index.HiveIndexHandler;
+import org.apache.hadoop.hive.ql.index.HiveIndexQueryContext;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.HiveUtils;
+import org.apache.hadoop.hive.ql.metadata.Partition;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.FilterDesc;
+import org.apache.hadoop.hive.ql.plan.TableDesc;
+import org.apache.hadoop.hive.ql.plan.TableScanDesc;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.Driver;
+
+public class IndexUpdater {
+ private List<LoadTableDesc> loadTableWork;
+ private HiveConf conf;
+ private Hive hive;
+ private List<Task<? extends Serializable>> tasks;
+ private Set<ReadEntity> inputs;
+
+
+ public IndexUpdater(List<LoadTableDesc> loadTableWork, Set<ReadEntity> inputs, Configuration conf) {
+ this.loadTableWork = loadTableWork;
+ this.inputs = inputs;
+ this.conf = new HiveConf(conf, IndexUpdater.class);
+ this.tasks = new LinkedList<Task<? extends Serializable>>();
+ }
+
+ public IndexUpdater(LoadTableDesc loadTableWork, Set<ReadEntity> inputs,
+ Configuration conf) {
+ this.loadTableWork = new LinkedList<LoadTableDesc>();
+ this.loadTableWork.add(loadTableWork);
+ this.conf = new HiveConf(conf, IndexUpdater.class);
+ this.tasks = new LinkedList<Task<? extends Serializable>>();
+ this.inputs = inputs;
+ }
+
+ public List<Task<? extends Serializable>> generateUpdateTasks() throws
+ HiveException {
+ hive = Hive.get(this.conf);
+ for (LoadTableDesc ltd : loadTableWork) {
+ TableDesc td = ltd.getTable();
+ Table srcTable = hive.getTable(td.getTableName());
+ List<Index> tblIndexes = srcTable.getAllIndexes((short)-1);
+ Map<String, String> partSpec = ltd.getPartitionSpec();
+ if (partSpec == null || partSpec.size() == 0) {
+ //unpartitioned table, update whole index
+ doIndexUpdate(tblIndexes);
+ } else {
+ doIndexUpdate(tblIndexes, partSpec);
+ }
+ }
+ return tasks;
+ }
+
+ private void doIndexUpdate(List<Index> tblIndexes) throws HiveException {
+ Driver driver = new Driver(this.conf);
+ for (Index idx : tblIndexes) {
+ StringBuilder sb = new StringBuilder();
+ sb.append("ALTER INDEX ");
+ sb.append(idx.getIndexName());
+ sb.append(" ON ");
+ sb.append(idx.getOrigTableName());
+ sb.append(" REBUILD");
+ driver.compile(sb.toString());
+ tasks.addAll(driver.getPlan().getRootTasks());
+ inputs.addAll(driver.getPlan().getInputs());
+ }
+ }
+
+ private void doIndexUpdate(List<Index> tblIndexes, Map<String, String>
+ partSpec) throws HiveException {
+ for (Index index : tblIndexes) {
+ if (containsPartition(index, partSpec)) {
+ doIndexUpdate(index, partSpec);
+ }
+ }
+ }
+
+ private void doIndexUpdate(Index index, Map<String, String> partSpec) throws
+ HiveException {
+ StringBuilder ps = new StringBuilder();
+ boolean first = true;
+ ps.append("(");
+ for (String key : partSpec.keySet()) {
+ if (!first) {
+ ps.append(", ");
+ } else {
+ first = false;
+ }
+ ps.append(key);
+ ps.append("=");
+ ps.append(partSpec.get(key));
+ }
+ ps.append(")");
+ StringBuilder sb = new StringBuilder();
+ sb.append("ALTER INDEX ");
+ sb.append(index.getIndexName());
+ sb.append(" ON ");
+ sb.append(index.getOrigTableName());
+ sb.append(" PARTITION ");
+ sb.append(ps.toString());
+ sb.append(" REBUILD");
+ Driver driver = new Driver(this.conf);
+ driver.compile(sb.toString(), false);
+ tasks.addAll(driver.getPlan().getRootTasks());
+ inputs.addAll(driver.getPlan().getInputs());
+ }
+
+
+ private boolean containsPartition(Index index, Map<String, String> partSpec)
+ throws HiveException {
+ Table indexTable = hive.getTable(index.getIndexTableName());
+ List<Partition> parts = hive.getPartitions(indexTable, partSpec);
+ return (parts == null || parts.size() == 0);
+ }
+}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java?rev=1160975&r1=1160974&r2=1160975&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java Wed Aug 24 04:18:43 2011
@@ -271,5 +271,21 @@ public class LoadSemanticAnalyzer extend
}
rootTasks.add(rTask);
+ if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEINDEXAUTOUPDATE)) {
+ IndexUpdater indexUpdater = new IndexUpdater(loadTableWork, getInputs(), conf);
+ try {
+ List<Task<? extends Serializable>> indexUpdateTasks = indexUpdater.generateUpdateTasks();
+ for (Task<? extends Serializable> updateTask : indexUpdateTasks) {
+ //LOAD DATA will either have a copy & move or just a move, we always want the update to be dependent on the move
+ if (rTask.getChildren() == null || rTask.getChildren().size() == 0) {
+ rTask.addDependentTask(updateTask);
+ } else {
+ ((Task<? extends Serializable>)rTask.getChildren().get(0)).addDependentTask(updateTask);
+ }
+ }
+ } catch (HiveException e) {
+ console.printInfo("WARNING: could not auto-update stale indexes, indexes are not out of sync");
+ }
+ }
}
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1160975&r1=1160974&r2=1160975&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Wed Aug 24 04:18:43 2011
@@ -6857,8 +6857,21 @@ public class SemanticAnalyzer extends Ba
Task<MoveWork> tsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false),
conf);
mvTask.add(tsk);
+ // Check to see if we are stale'ing any indexes and auto-update them if we want
+ if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEINDEXAUTOUPDATE)) {
+ IndexUpdater indexUpdater = new IndexUpdater(loadTableWork, getInputs(), conf);
+ try {
+ List<Task<? extends Serializable>> indexUpdateTasks = indexUpdater.generateUpdateTasks();
+ for (Task<? extends Serializable> updateTask : indexUpdateTasks) {
+ tsk.addDependentTask(updateTask);
+ }
+ } catch (HiveException e) {
+ console.printInfo("WARNING: could not auto-update stale indexes, indexes are not in of sync");
+ }
+ }
}
+
boolean oneLoadFile = true;
for (LoadFileDesc lfd : loadFileWork) {
if (qb.isCTAS()) {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterIndexDesc.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterIndexDesc.java?rev=1160975&r1=1160974&r2=1160975&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterIndexDesc.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterIndexDesc.java Wed Aug 24 04:18:43 2011
@@ -37,6 +37,7 @@ public class AlterIndexDesc extends DDLD
private String indexName;
private String baseTable;
private String dbName;
+ private Map<String, String> partSpec; // partition specification of partitions touched
private Map<String, String> props;
/**
@@ -44,6 +45,7 @@ public class AlterIndexDesc extends DDLD
*
*/
public static enum AlterIndexTypes {
+ UPDATETIMESTAMP,
ADDPROPS};
AlterIndexTypes op;
@@ -88,6 +90,21 @@ public class AlterIndexDesc extends DDLD
}
/**
+ * @return the partition spec
+ */
+ public Map<String, String> getSpec() {
+ return partSpec;
+ }
+
+ /**
+ * @param partSpec
+ * the partition spec to set
+ */
+ public void setSpec(Map<String, String> partSpec) {
+ this.partSpec = partSpec;
+ }
+
+ /**
* @return the name of the database that the base table is in
*/
public String getDbName() {
Added: hive/trunk/ql/src/test/queries/clientpositive/index_auto_update.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/index_auto_update.q?rev=1160975&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/index_auto_update.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/index_auto_update.q Wed Aug 24 04:18:43 2011
@@ -0,0 +1,26 @@
+-- Test if index is actually being used.
+
+-- Create temp, and populate it with some values in src.
+CREATE TABLE temp(key STRING, val STRING) STORED AS TEXTFILE;
+INSERT OVERWRITE TABLE temp SELECT * FROM src WHERE key < 50;
+
+-- Build an index on temp.
+CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD;
+ALTER INDEX temp_index ON temp REBUILD;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.index.filter=true;
+SET hive.optimize.index.autoupdate=true;
+SET hive.optimize.index.filter.compact.minsize=0;
+
+-- overwrite temp table so index is out of date
+EXPLAIN INSERT OVERWRITE TABLE temp SELECT * FROM src;
+INSERT OVERWRITE TABLE temp SELECT * FROM src;
+
+-- query should return indexed values
+EXPLAIN SELECT * FROM temp WHERE key = 86;
+SELECT * FROM temp WHERE key = 86;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.index.filter=false;
+DROP table temp;
Added: hive/trunk/ql/src/test/results/clientpositive/index_auto_update.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/index_auto_update.q.out?rev=1160975&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/index_auto_update.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/index_auto_update.q.out Wed Aug 24 04:18:43 2011
@@ -0,0 +1,382 @@
+PREHOOK: query: -- Test if index is actually being used.
+
+-- Create temp, and populate it with some values in src.
+CREATE TABLE temp(key STRING, val STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- Test if index is actually being used.
+
+-- Create temp, and populate it with some values in src.
+CREATE TABLE temp(key STRING, val STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@temp
+PREHOOK: query: INSERT OVERWRITE TABLE temp SELECT * FROM src WHERE key < 50
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@temp
+POSTHOOK: query: INSERT OVERWRITE TABLE temp SELECT * FROM src WHERE key < 50
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@temp
+POSTHOOK: Lineage: temp.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: temp.val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- Build an index on temp.
+CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD
+PREHOOK: type: CREATEINDEX
+POSTHOOK: query: -- Build an index on temp.
+CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD
+POSTHOOK: type: CREATEINDEX
+POSTHOOK: Lineage: temp.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: temp.val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: ALTER INDEX temp_index ON temp REBUILD
+PREHOOK: type: ALTERINDEX_REBUILD
+PREHOOK: Input: default@temp
+PREHOOK: Output: default@default__temp_temp_index__
+POSTHOOK: query: ALTER INDEX temp_index ON temp REBUILD
+POSTHOOK: type: ALTERINDEX_REBUILD
+POSTHOOK: Input: default@temp
+POSTHOOK: Output: default@default__temp_temp_index__
+POSTHOOK: Lineage: default__temp_temp_index__._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: temp.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: temp.val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- overwrite temp table so index is out of date
+EXPLAIN INSERT OVERWRITE TABLE temp SELECT * FROM src
+PREHOOK: type: QUERY
+POSTHOOK: query: -- overwrite temp table so index is out of date
+EXPLAIN INSERT OVERWRITE TABLE temp SELECT * FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: default__temp_temp_index__._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: temp.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: temp.val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME temp))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+ Stage-4 is a root stage
+ Stage-8 depends on stages: Stage-4 , consists of Stage-7, Stage-6
+ Stage-7
+ Stage-0 depends on stages: Stage-7, Stage-6
+ Stage-1 depends on stages: Stage-0
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+ null depends on stages: Stage-1
+ Stage-3 depends on stages: Stage-1
+ Stage-5 depends on stages: Stage-0
+ Stage-6
+
+STAGE PLANS:
+ Stage: Stage-4
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src
+ TableScan
+ alias: src
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.temp
+
+ Stage: Stage-8
+ Conditional Operator
+
+ Stage: Stage-7
+ Move Operator
+ files:
+ hdfs directory: true
+ destination: pfile:/Users/salbiz/dev/hive/build/ql/scratchdir/hive_2011-08-19_12-59-47_331_8135948838308755075/-ext-10000
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.temp
+
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ temp
+ TableScan
+ alias: temp
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: INPUT__FILE__NAME
+ type: string
+ expr: BLOCK__OFFSET__INSIDE__FILE
+ type: bigint
+ outputColumnNames: key, INPUT__FILE__NAME, BLOCK__OFFSET__INSIDE__FILE
+ Group By Operator
+ aggregations:
+ expr: collect_set(BLOCK__OFFSET__INSIDE__FILE)
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ expr: INPUT__FILE__NAME
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col2
+ type: array<bigint>
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: collect_set(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: array<bigint>
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.default__temp_temp_index__
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.default__temp_temp_index__
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+ Stage: null
+
+ Stage: Stage-3
+
+ Stage: Stage-5
+ Stats-Aggr Operator
+
+ Stage: Stage-6
+ Map Reduce
+ Alias -> Map Operator Tree:
+ pfile:/Users/salbiz/dev/hive/build/ql/scratchdir/hive_2011-08-19_12-59-47_331_8135948838308755075/-ext-10002
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.temp
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE temp SELECT * FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@temp
+PREHOOK: Output: default@temp
+POSTHOOK: query: INSERT OVERWRITE TABLE temp SELECT * FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@temp
+POSTHOOK: Output: default@temp
+POSTHOOK: Lineage: default__temp_temp_index__._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: temp.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: temp.val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- query should return indexed values
+EXPLAIN SELECT * FROM temp WHERE key = 86
+PREHOOK: type: QUERY
+POSTHOOK: query: -- query should return indexed values
+EXPLAIN SELECT * FROM temp WHERE key = 86
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: default__temp_temp_index__._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: temp.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: temp.val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME temp))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 86))))
+
+STAGE DEPENDENCIES:
+ Stage-3 is a root stage
+ Stage-6 depends on stages: Stage-3 , consists of Stage-5, Stage-4
+ Stage-5
+ Stage-2 depends on stages: Stage-5, Stage-4
+ Stage-1 depends on stages: Stage-2
+ Stage-4
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-3
+ Map Reduce
+ Alias -> Map Operator Tree:
+ default__temp_temp_index__
+ TableScan
+ alias: default__temp_temp_index__
+ filterExpr:
+ expr: (key = 86)
+ type: boolean
+ Filter Operator
+ predicate:
+ expr: (key = 86)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: _bucketname
+ type: string
+ expr: _offsets
+ type: array<bigint>
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-6
+ Conditional Operator
+
+ Stage: Stage-5
+ Move Operator
+ files:
+ hdfs directory: true
+ destination: file:/Users/salbiz/dev/hive/build/ql/scratchdir/hive_2011-08-19_13-00-03_428_2545956864627892502/-ext-10000
+
+ Stage: Stage-2
+ Move Operator
+ files:
+ hdfs directory: true
+ destination: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-08-19_13-00-03_256_7857629799023395566/-mr-10002
+
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ temp
+ TableScan
+ alias: temp
+ filterExpr:
+ expr: (key = 86)
+ type: boolean
+ Filter Operator
+ predicate:
+ expr: (key = 86)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: val
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-4
+ Map Reduce
+ Alias -> Map Operator Tree:
+ file:/Users/salbiz/dev/hive/build/ql/scratchdir/hive_2011-08-19_13-00-03_428_2545956864627892502/-ext-10001
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT * FROM temp WHERE key = 86
+PREHOOK: type: QUERY
+PREHOOK: Input: default@default__temp_temp_index__
+PREHOOK: Input: default@temp
+PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-08-19_13-00-03_545_519763470928530010/-mr-10000
+POSTHOOK: query: SELECT * FROM temp WHERE key = 86
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@default__temp_temp_index__
+POSTHOOK: Input: default@temp
+POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-08-19_13-00-03_545_519763470928530010/-mr-10000
+POSTHOOK: Lineage: default__temp_temp_index__._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: temp.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: temp.val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+86 val_86
+PREHOOK: query: DROP table temp
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@temp
+PREHOOK: Output: default@temp
+POSTHOOK: query: DROP table temp
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@temp
+POSTHOOK: Output: default@temp
+POSTHOOK: Lineage: default__temp_temp_index__._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: temp.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: temp.val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
Modified: hive/trunk/ql/src/test/results/clientpositive/index_stale.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/index_stale.q.out?rev=1160975&r1=1160974&r2=1160975&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/index_stale.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/index_stale.q.out Wed Aug 24 04:18:43 2011
@@ -111,11 +111,11 @@ STAGE PLANS:
PREHOOK: query: SELECT * FROM temp WHERE key = 86
PREHOOK: type: QUERY
PREHOOK: Input: default@temp
-PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-08-18_03-05-26_984_9114308663658944615/-mr-10000
+PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-08-19_11-59-43_705_6075260497976129969/-mr-10000
POSTHOOK: query: SELECT * FROM temp WHERE key = 86
POSTHOOK: type: QUERY
POSTHOOK: Input: default@temp
-POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-08-18_03-05-26_984_9114308663658944615/-mr-10000
+POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-08-19_11-59-43_705_6075260497976129969/-mr-10000
POSTHOOK: Lineage: default__temp_temp_index__._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ]