You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2014/01/28 06:48:10 UTC
svn commit: r1561947 [10/17] - in /hive/branches/tez: ./ ant/
ant/src/org/apache/hadoop/hive/ant/ beeline/ cli/
cli/src/java/org/apache/hadoop/hive/cli/ common/ common/src/
common/src/java/org/apache/hadoop/hive/common/type/
common/src/java/org/apache/...
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HashTableLoader.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HashTableLoader.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HashTableLoader.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HashTableLoader.java Tue Jan 28 05:48:03 2014
@@ -55,13 +55,13 @@ public class HashTableLoader implements
MapJoinTableContainer[] mapJoinTables,
MapJoinTableContainerSerDe[] mapJoinTableSerdes) throws HiveException {
- String baseDir = null;
+ Path baseDir = null;
Path currentInputPath = context.getCurrentInputPath();
LOG.info("******* Load from HashTable File: input : " + currentInputPath);
String fileName = context.getLocalWork().getBucketFileName(currentInputPath.toString());
try {
if (ShimLoader.getHadoopShims().isLocalMode(hconf)) {
- baseDir = context.getLocalWork().getTmpFileURI();
+ baseDir = context.getLocalWork().getTmpPath();
} else {
Path[] localArchives;
String stageID = context.getLocalWork().getStageID();
@@ -74,8 +74,7 @@ public class HashTableLoader implements
if (!archive.getName().endsWith(suffix)) {
continue;
}
- Path archiveLocalLink = archive.makeQualified(localFs);
- baseDir = archiveLocalLink.toUri().getPath();
+ baseDir = archive.makeQualified(localFs);
}
}
for (int pos = 0; pos < mapJoinTables.length; pos++) {
@@ -85,8 +84,7 @@ public class HashTableLoader implements
if(baseDir == null) {
throw new IllegalStateException("baseDir cannot be null");
}
- String filePath = Utilities.generatePath(baseDir, desc.getDumpFilePrefix(), (byte)pos, fileName);
- Path path = new Path(filePath);
+ Path path = Utilities.generatePath(baseDir, desc.getDumpFilePrefix(), (byte)pos, fileName);
LOG.info("\tLoad back 1 hashtable file from tmp file uri:" + path);
ObjectInputStream in = new ObjectInputStream(new BufferedInputStream(
new FileInputStream(path.toUri().getPath()), 4096));
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapRedTask.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapRedTask.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapRedTask.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapRedTask.java Tue Jan 28 05:48:03 2014
@@ -169,7 +169,7 @@ public class MapRedTask extends ExecDriv
String hiveConfArgs = generateCmdLine(conf, ctx);
// write out the plan to a local file
- Path planPath = new Path(ctx.getLocalTmpFileURI(), "plan.xml");
+ Path planPath = new Path(ctx.getLocalTmpPath(), "plan.xml");
OutputStream out = FileSystem.getLocal(conf).create(planPath);
MapredWork plan = getWork();
LOG.info("Generating plan file " + planPath.toString());
@@ -188,7 +188,7 @@ public class MapRedTask extends ExecDriv
if (!files.isEmpty()) {
cmdLine = cmdLine + " -files " + files;
- workDir = (new Path(ctx.getLocalTmpFileURI())).toUri().getPath();
+ workDir = ctx.getLocalTmpPath().toUri().getPath();
if (! (new File(workDir)).mkdir()) {
throw new IOException ("Cannot create tmp working dir: " + workDir);
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java Tue Jan 28 05:48:03 2014
@@ -136,7 +136,7 @@ public class MapredLocalTask extends Tas
String hadoopExec = conf.getVar(HiveConf.ConfVars.HADOOPBIN);
// write out the plan to a local file
- Path planPath = new Path(ctx.getLocalTmpFileURI(), "plan.xml");
+ Path planPath = new Path(ctx.getLocalTmpPath(), "plan.xml");
OutputStream out = FileSystem.getLocal(conf).create(planPath);
MapredLocalWork plan = getWork();
LOG.info("Generating plan file " + planPath.toString());
@@ -157,7 +157,7 @@ public class MapredLocalTask extends Tas
if (!files.isEmpty()) {
cmdLine = cmdLine + " -files " + files;
- workDir = (new Path(ctx.getLocalTmpFileURI())).toUri().getPath();
+ workDir = ctx.getLocalTmpPath().toUri().getPath();
if (!(new File(workDir)).mkdir()) {
throw new IOException("Cannot create tmp working dir: " + workDir);
@@ -427,15 +427,14 @@ public class MapredLocalTask extends Tas
byte tag = (byte) childOp.getParentOperators().indexOf(parentOp);
// generate empty hashtable for this (byte)tag
- String tmpURI = this.getWork().getTmpFileURI();
+ Path tmpPath = this.getWork().getTmpPath();
String fileName = work.getBucketFileName(bigBucketFileName);
HashTableSinkOperator htso = (HashTableSinkOperator)childOp;
- String tmpURIPath = Utilities.generatePath(tmpURI, htso.getConf().getDumpFilePrefix(),
+ Path path = Utilities.generatePath(tmpPath, htso.getConf().getDumpFilePrefix(),
tag, fileName);
- console.printInfo(Utilities.now() + "\tDump the hashtable into file: " + tmpURIPath);
- Path path = new Path(tmpURIPath);
+ console.printInfo(Utilities.now() + "\tDump the hashtable into file: " + path);
FileSystem fs = path.getFileSystem(job);
ObjectOutputStream out = new ObjectOutputStream(fs.create(path));
try {
@@ -443,7 +442,7 @@ public class MapredLocalTask extends Tas
} finally {
out.close();
}
- console.printInfo(Utilities.now() + "\tUpload 1 File to: " + tmpURIPath + " File size: "
+ console.printInfo(Utilities.now() + "\tUpload 1 File to: " + path + " File size: "
+ fs.getFileStatus(path).getLen());
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java Tue Jan 28 05:48:03 2014
@@ -207,13 +207,7 @@ public class DagUtils {
Path tezDir = getTezDir(mrScratchDir);
// set up the operator plan
- Path planPath = Utilities.setMapWork(conf, mapWork,
- mrScratchDir.toUri().toString(), false);
-
- // setup input paths and split info
- List<Path> inputPaths = Utilities.getInputPaths(conf, mapWork,
- mrScratchDir.toUri().toString(), ctx);
- Utilities.setInputPaths(conf, inputPaths);
+ Utilities.setMapWork(conf, mapWork, mrScratchDir, false);
// create the directories FileSinkOperators need
Utilities.createTmpDirs(conf, mapWork);
@@ -317,8 +311,7 @@ public class DagUtils {
Path mrScratchDir, Context ctx) throws Exception {
// set up operator plan
- Path planPath = Utilities.setReduceWork(conf, reduceWork,
- mrScratchDir.toUri().toString(), false);
+ Utilities.setReduceWork(conf, reduceWork, mrScratchDir, false);
// create the directories FileSinkOperators need
Utilities.createTmpDirs(conf, reduceWork);
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java Tue Jan 28 05:48:03 2014
@@ -114,7 +114,8 @@ public class TezTask extends Task<TezWor
// if we don't have one yet create it.
if (session == null) {
- ss.setTezSession(new TezSessionState());
+ session = new TezSessionState();
+ ss.setTezSession(session);
}
// if it's not running start it.
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java Tue Jan 28 05:48:03 2014
@@ -18,13 +18,14 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.common.type.Decimal128;
import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
-
/**
* Utility functions to handle null propagation.
*/
@@ -317,4 +318,57 @@ public class NullUtil {
}
}
}
+
+ // Initialize any entries that could be used in an output vector to have false for null value.
+ public static void initOutputNullsToFalse(ColumnVector v, boolean isRepeating, boolean selectedInUse,
+ int[] sel, int n) {
+ if (v.isRepeating) {
+ v.isNull[0] = false;
+ return;
+ }
+
+ if (selectedInUse) {
+ for (int j = 0; j != n; j++) {
+ int i = sel[j];
+ v.isNull[i] = false;
+ }
+ } else {
+ Arrays.fill(v.isNull, 0, n, false);
+ }
+ }
+
+ /**
+ * Filter out rows with null values. Return the number of rows in the batch.
+ */
+ public static int filterNulls(ColumnVector v, boolean selectedInUse, int[] sel, int n) {
+ int newSize = 0;
+
+ if (v.noNulls) {
+
+ // no rows will be filtered
+ return n;
+ }
+
+ if (v.isRepeating) {
+
+ // all rows are filtered if repeating null, otherwise no rows are filtered
+ return v.isNull[0] ? 0 : n;
+ }
+
+ if (selectedInUse) {
+ for (int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (!v.isNull[i]) {
+ sel[newSize++] = i;
+ }
+ }
+ } else {
+ for (int i = 0; i != n; i++) {
+ if (!v.isNull[i]) {
+ sel[newSize++] = i;
+ }
+ }
+ }
+ return newSize;
+ }
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/hooks/Entity.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/hooks/Entity.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/hooks/Entity.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/hooks/Entity.java Tue Jan 28 05:48:03 2014
@@ -23,8 +23,8 @@ import java.net.URI;
import java.util.Map;
import org.apache.hadoop.hive.metastore.api.Database;
-import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.DummyPartition;
+import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
/**
@@ -236,11 +236,11 @@ public class Entity implements Serializa
}
if (typ == Type.TABLE) {
- return t.getDataLocation();
+ return t.getDataLocation().toUri();
}
if (typ == Type.PARTITION) {
- return p.getDataLocation();
+ return p.getDataLocation().toUri();
}
if (typ == Type.DFS_DIR || typ == Type.LOCAL_DIR) {
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/index/IndexMetadataChangeTask.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/index/IndexMetadataChangeTask.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/index/IndexMetadataChangeTask.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/index/IndexMetadataChangeTask.java Tue Jan 28 05:48:03 2014
@@ -23,7 +23,6 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.Warehouse;
-import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.DriverContext;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.metadata.Hive;
@@ -67,9 +66,9 @@ public class IndexMetadataChangeTask ext
return 1;
}
- Path url = new Path(part.getPartitionPath().toString());
- FileSystem fs = url.getFileSystem(conf);
- FileStatus fstat = fs.getFileStatus(url);
+ Path path = part.getDataLocation();
+ FileSystem fs = path.getFileSystem(conf);
+ FileStatus fstat = fs.getFileStatus(path);
part.getParameters().put(HiveIndex.INDEX_TABLE_CREATETIME, Long.toString(fstat.getModificationTime()));
db.alterPartition(tbl.getTableName(), part);
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/index/IndexPredicateAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/index/IndexPredicateAnalyzer.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/index/IndexPredicateAnalyzer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/index/IndexPredicateAnalyzer.java Tue Jan 28 05:48:03 2014
@@ -26,12 +26,7 @@ import java.util.Map;
import java.util.Set;
import java.util.Stack;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
-import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
-import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
@@ -44,13 +39,12 @@ import org.apache.hadoop.hive.ql.parse.S
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare;
/**
* IndexPredicateAnalyzer decomposes predicates, separating the parts
@@ -59,15 +53,14 @@ import org.apache.hadoop.util.Reflection
* comparing a column reference with a constant value. It is assumed
* that all column aliases encountered refer to the same table.
*/
-public class IndexPredicateAnalyzer
-{
- private static final Log LOG = LogFactory.getLog(IndexPredicateAnalyzer.class.getName());
- private final Set<String> udfNames;
+public class IndexPredicateAnalyzer {
- private Set<String> allowedColumnNames;
+ private final Set<String> udfNames;
+ private final Set<String> allowedColumnNames;
public IndexPredicateAnalyzer() {
udfNames = new HashSet<String>();
+ allowedColumnNames = new HashSet<String>();
}
/**
@@ -88,7 +81,7 @@ public class IndexPredicateAnalyzer
* column names are allowed.)
*/
public void clearAllowedColumnNames() {
- allowedColumnNames = new HashSet<String>();
+ allowedColumnNames.clear();
}
/**
@@ -97,9 +90,6 @@ public class IndexPredicateAnalyzer
* @param columnName name of column to be allowed
*/
public void allowColumnName(String columnName) {
- if (allowedColumnNames == null) {
- clearAllowedColumnNames();
- }
allowedColumnNames.add(columnName);
}
@@ -159,7 +149,6 @@ public class IndexPredicateAnalyzer
List<IndexSearchCondition> searchConditions,
Object... nodeOutputs) {
- expr = (ExprNodeGenericFuncDesc) expr;
if (FunctionRegistry.isOpAnd(expr)) {
assert(nodeOutputs.length == 2);
ExprNodeDesc residual1 = (ExprNodeDesc) nodeOutputs[0];
@@ -179,40 +168,31 @@ public class IndexPredicateAnalyzer
residuals);
}
- String udfName;
- if (expr.getGenericUDF() instanceof GenericUDFBridge) {
- GenericUDFBridge func = (GenericUDFBridge) expr.getGenericUDF();
- udfName = func.getUdfName();
- } else {
- udfName = expr.getGenericUDF().getClass().getName();
+ GenericUDF genericUDF = expr.getGenericUDF();
+ if (!(genericUDF instanceof GenericUDFBaseCompare)) {
+ return expr;
}
- if (!udfNames.contains(udfName)) {
+ ExprNodeDesc expr1 = (ExprNodeDesc) nodeOutputs[0];
+ ExprNodeDesc expr2 = (ExprNodeDesc) nodeOutputs[1];
+ ExprNodeDesc[] extracted = ExprNodeDescUtils.extractComparePair(expr1, expr2);
+ if (extracted == null) {
return expr;
}
-
- ExprNodeDesc child1 = extractConstant((ExprNodeDesc) nodeOutputs[0]);
- ExprNodeDesc child2 = extractConstant((ExprNodeDesc) nodeOutputs[1]);
- ExprNodeColumnDesc columnDesc = null;
- ExprNodeConstantDesc constantDesc = null;
- if ((child1 instanceof ExprNodeColumnDesc)
- && (child2 instanceof ExprNodeConstantDesc)) {
- // COL <op> CONSTANT
- columnDesc = (ExprNodeColumnDesc) child1;
- constantDesc = (ExprNodeConstantDesc) child2;
- } else if ((child2 instanceof ExprNodeColumnDesc)
- && (child1 instanceof ExprNodeConstantDesc)) {
- // CONSTANT <op> COL
- columnDesc = (ExprNodeColumnDesc) child2;
- constantDesc = (ExprNodeConstantDesc) child1;
+ if (extracted.length > 2) {
+ genericUDF = genericUDF.flip();
}
- if (columnDesc == null) {
+
+ String udfName = genericUDF.getUdfName();
+ if (!udfNames.contains(genericUDF.getUdfName())) {
return expr;
}
- if (allowedColumnNames != null) {
- if (!allowedColumnNames.contains(columnDesc.getColumn())) {
- return expr;
- }
+
+ ExprNodeColumnDesc columnDesc = (ExprNodeColumnDesc) extracted[0];
+ ExprNodeConstantDesc constantDesc = (ExprNodeConstantDesc) extracted[1];
+ if (!allowedColumnNames.contains(columnDesc.getColumn())) {
+ return expr;
}
+
searchConditions.add(
new IndexSearchCondition(
columnDesc,
@@ -225,55 +205,6 @@ public class IndexPredicateAnalyzer
return null;
}
- private ExprNodeDesc extractConstant(ExprNodeDesc expr) {
- if (!(expr instanceof ExprNodeGenericFuncDesc)) {
- return expr;
- }
- ExprNodeConstantDesc folded = foldConstant(((ExprNodeGenericFuncDesc) expr));
- return folded == null ? expr : folded;
- }
-
- private ExprNodeConstantDesc foldConstant(ExprNodeGenericFuncDesc func) {
- GenericUDF udf = func.getGenericUDF();
- if (!FunctionRegistry.isDeterministic(udf) || FunctionRegistry.isStateful(udf)) {
- return null;
- }
- try {
- // If the UDF depends on any external resources, we can't fold because the
- // resources may not be available at compile time.
- if (udf instanceof GenericUDFBridge) {
- UDF internal = ReflectionUtils.newInstance(((GenericUDFBridge) udf).getUdfClass(), null);
- if (internal.getRequiredFiles() != null || internal.getRequiredJars() != null) {
- return null;
- }
- } else {
- if (udf.getRequiredFiles() != null || udf.getRequiredJars() != null) {
- return null;
- }
- }
-
- for (ExprNodeDesc child : func.getChildren()) {
- if (child instanceof ExprNodeConstantDesc) {
- continue;
- } else if (child instanceof ExprNodeGenericFuncDesc) {
- if (foldConstant((ExprNodeGenericFuncDesc) child) != null) {
- continue;
- }
- }
- return null;
- }
- ExprNodeEvaluator evaluator = ExprNodeEvaluatorFactory.get(func);
- ObjectInspector output = evaluator.initialize(null);
-
- Object constant = evaluator.evaluate(null);
- Object java = ObjectInspectorUtils.copyToStandardJavaObject(constant, output);
-
- return new ExprNodeConstantDesc(java);
- } catch (Exception e) {
- return null;
- }
- }
-
/**
* Translates search conditions back to ExprNodeDesc form (as
* a left-deep conjunction).
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java Tue Jan 28 05:48:03 2014
@@ -97,7 +97,7 @@ public class BitmapIndexHandler extends
// Build reentrant QL for index query
StringBuilder qlCommand = new StringBuilder("INSERT OVERWRITE DIRECTORY ");
- String tmpFile = pctx.getContext().getMRTmpFileURI();
+ String tmpFile = pctx.getContext().getMRTmpPath().toUri().toString();
qlCommand.append( "\"" + tmpFile + "\" "); // QL includes " around file name
qlCommand.append("SELECT bucketname AS `_bucketname` , COLLECT_SET(offset) AS `_offsets` FROM ");
qlCommand.append("(SELECT `_bucketname` AS bucketname , `_offset` AS offset FROM ");
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java Tue Jan 28 05:48:03 2014
@@ -170,7 +170,7 @@ public class CompactIndexHandler extends
// Build reentrant QL for index query
StringBuilder qlCommand = new StringBuilder("INSERT OVERWRITE DIRECTORY ");
- String tmpFile = pctx.getContext().getMRTmpFileURI();
+ String tmpFile = pctx.getContext().getMRTmpPath().toUri().toString();
queryContext.setIndexIntermediateFile(tmpFile);
qlCommand.append( "\"" + tmpFile + "\" "); // QL includes " around file name
qlCommand.append("SELECT `_bucketname` , `_offsets` FROM ");
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java Tue Jan 28 05:48:03 2014
@@ -28,7 +28,9 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.FileUtils;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.FileInputFormat;
@@ -114,8 +116,23 @@ public class BucketizedHiveInputFormat<K
Path[] dirs = FileInputFormat.getInputPaths(job);
if (dirs.length == 0) {
- throw new IOException("No input paths specified in job");
+ // on tez we're avoiding to duplicate the file info in FileInputFormat.
+ if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
+ try {
+ List<Path> paths = Utilities.getInputPathsTez(job, mrwork);
+ dirs = paths.toArray(new Path[paths.size()]);
+ if (dirs.length == 0) {
+ // if we still don't have any files it's time to fail.
+ throw new IOException("No input paths specified in job");
+ }
+ } catch (Exception e) {
+ throw new IOException("Could not create input paths", e);
+ }
+ } else {
+ throw new IOException("No input paths specified in job");
+ }
}
+
JobConf newjob = new JobConf(job);
ArrayList<InputSplit> result = new ArrayList<InputSplit>();
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java Tue Jan 28 05:48:03 2014
@@ -37,6 +37,7 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.log.PerfLogger;
@@ -271,6 +272,17 @@ public class CombineHiveInputFormat<K ex
mrwork.getAliasToWork();
CombineFileInputFormatShim combine = ShimLoader.getHadoopShims()
.getCombineFileInputFormat();
+
+ // on tez we're avoiding duplicating path info since the info will go over
+ // rpc
+ if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
+ try {
+ List<Path> dirs = Utilities.getInputPathsTez(job, mrwork);
+ Utilities.setInputPaths(job, dirs);
+ } catch (Exception e) {
+ throw new IOException("Could not create input paths", e);
+ }
+ }
InputSplit[] splits = null;
if (combine == null) {
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java Tue Jan 28 05:48:03 2014
@@ -380,23 +380,22 @@ public final class HiveFileFormatUtils {
}
if (part == null) {
- String dirStr = dir.toString();
- int dirPathIndex = dirPath.lastIndexOf(Path.SEPARATOR);
- int dirStrIndex = dirStr.lastIndexOf(Path.SEPARATOR);
- while (dirPathIndex >= 0 && dirStrIndex >= 0) {
- dirStr = dirStr.substring(0, dirStrIndex);
- dirPath = dirPath.substring(0, dirPathIndex);
- //first try full match
- part = pathToPartitionInfo.get(dirStr);
+ Path curPath = new Path(dir.toUri().getPath()).getParent();
+ dir = dir.getParent();
+ while (dir != null) {
+
+ // first try full match
+ part = pathToPartitionInfo.get(dir.toString());
if (part == null) {
- // LOG.warn("exact match not found, try ripping input path's theme and authority");
- part = pathToPartitionInfo.get(dirPath);
+
+ // exact match not found, try ripping input path's scheme and authority
+ part = pathToPartitionInfo.get(curPath.toString());
}
if (part != null) {
break;
}
- dirPathIndex = dirPath.lastIndexOf(Path.SEPARATOR);
- dirStrIndex = dirStr.lastIndexOf(Path.SEPARATOR);
+ dir = dir.getParent();
+ curPath = curPath.getParent();
}
}
return part;
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java Tue Jan 28 05:48:03 2014
@@ -33,6 +33,7 @@ import org.apache.commons.logging.LogFac
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.exec.Operator;
@@ -300,7 +301,21 @@ public class HiveInputFormat<K extends W
Path[] dirs = FileInputFormat.getInputPaths(job);
if (dirs.length == 0) {
- throw new IOException("No input paths specified in job");
+ // on tez we're avoiding to duplicate the file info in FileInputFormat.
+ if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
+ try {
+ List<Path> paths = Utilities.getInputPathsTez(job, mrwork);
+ dirs = paths.toArray(new Path[paths.size()]);
+ if (dirs.length == 0) {
+ // if we still don't have any files it's time to fail.
+ throw new IOException("No input paths specified in job");
+ }
+ } catch (Exception e) {
+ throw new IOException("Could not create input files", e);
+ }
+ } else {
+ throw new IOException("No input paths specified in job");
+ }
}
JobConf newjob = new JobConf(job);
List<InputSplit> result = new ArrayList<InputSplit>();
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java Tue Jan 28 05:48:03 2014
@@ -299,11 +299,7 @@ public class OrcInputFormat implements
}
private boolean isVectorMode(Configuration conf) {
- if (Utilities.getPlanPath(conf) != null && Utilities
- .getMapRedWork(conf).getMapWork().getVectorMode()) {
- return true;
- }
- return false;
+ return Utilities.isVectorMode(conf);
}
/**
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Tue Jan 28 05:48:03 2014
@@ -1018,6 +1018,23 @@ class RecordReaderImpl implements Record
}
@Override
+ Object nextVector(Object previousVector, long batchSize) throws IOException {
+ LongColumnVector result = null;
+ if (previousVector == null) {
+ result = new LongColumnVector();
+ } else {
+ result = (LongColumnVector) previousVector;
+ }
+
+ // Read present/isNull stream
+ super.nextVector(result, batchSize);
+
+ // Read value entries based on isNull entries
+ reader.nextVector(result, batchSize);
+ return result;
+ }
+
+ @Override
void skipRows(long items) throws IOException {
reader.skip(countNonNulls(items));
}
@@ -2174,7 +2191,7 @@ class RecordReaderImpl implements Record
if (sarg == null || rowIndexStride == 0) {
return null;
}
- readRowIndex();
+ readRowIndex(currentStripe);
long rowsInStripe = stripes.get(currentStripe).getNumberOfRows();
int groupsInStripe = (int) ((rowsInStripe + rowIndexStride - 1) /
rowIndexStride);
@@ -2768,8 +2785,18 @@ class RecordReaderImpl implements Record
throw new IllegalArgumentException("Seek after the end of reader range");
}
- private void readRowIndex() throws IOException {
- long offset = stripes.get(currentStripe).getOffset();
+ OrcProto.RowIndex[] readRowIndex(int stripeIndex) throws IOException {
+ long offset = stripes.get(stripeIndex).getOffset();
+ OrcProto.StripeFooter stripeFooter;
+ OrcProto.RowIndex[] indexes;
+ // if this is the current stripe, use the cached objects.
+ if (stripeIndex == currentStripe) {
+ stripeFooter = this.stripeFooter;
+ indexes = this.indexes;
+ } else {
+ stripeFooter = readStripeFooter(stripes.get(stripeIndex));
+ indexes = new OrcProto.RowIndex[this.indexes.length];
+ }
for(OrcProto.Stream stream: stripeFooter.getStreamsList()) {
if (stream.getKind() == OrcProto.Stream.Kind.ROW_INDEX) {
int col = stream.getColumn();
@@ -2784,6 +2811,7 @@ class RecordReaderImpl implements Record
}
offset += stream.getLength();
}
+ return indexes;
}
private void seekToRowEntry(int rowEntry) throws IOException {
@@ -2815,7 +2843,7 @@ class RecordReaderImpl implements Record
currentStripe = rightStripe;
readStripe();
}
- readRowIndex();
+ readRowIndex(currentStripe);
// if we aren't to the right row yet, advanance in the stripe.
advanceToNextRow(rowNumber);
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/BlockMergeTask.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/BlockMergeTask.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/BlockMergeTask.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/BlockMergeTask.java Tue Jan 28 05:48:03 2014
@@ -192,7 +192,7 @@ public class BlockMergeTask extends Task
try {
addInputPaths(job, work);
- Utilities.setMapWork(job, work, ctx.getMRTmpFileURI(), true);
+ Utilities.setMapWork(job, work, ctx.getMRTmpPath(), true);
// remove the pwd from conf file so that job tracker doesn't show this
// logs
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/RCFileMergeMapper.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/RCFileMergeMapper.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/RCFileMergeMapper.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/merge/RCFileMergeMapper.java Tue Jan 28 05:48:03 2014
@@ -32,7 +32,6 @@ import org.apache.hadoop.hive.ql.metadat
import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.hadoop.hive.shims.CombineHiveKey;
-import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.mapred.JobConf;
@@ -84,13 +83,12 @@ public class RCFileMergeMapper extends M
listBucketingDepth = HiveConf.getIntVar(job,
HiveConf.ConfVars.HIVEMERGECURRENTJOBCONCATENATELISTBUCKETINGDEPTH);
- String specPath = RCFileBlockMergeOutputFormat.getMergeOutputPath(job)
- .toString();
+ Path specPath = RCFileBlockMergeOutputFormat.getMergeOutputPath(job);
Path tmpPath = Utilities.toTempPath(specPath);
Path taskTmpPath = Utilities.toTaskTempPath(specPath);
updatePaths(tmpPath, taskTmpPath);
try {
- fs = (new Path(specPath)).getFileSystem(job);
+ fs = specPath.getFileSystem(job);
autoDelete = fs.deleteOnExit(outPath);
} catch (IOException e) {
this.exception = true;
@@ -316,7 +314,7 @@ public class RCFileMergeMapper extends M
) throws HiveException, IOException {
FileSystem fs = outputPath.getFileSystem(job);
Path backupPath = backupOutputPath(fs, outputPath, job);
- Utilities.mvFileToFinalPath(outputPath.toUri().toString(), job, success, LOG, dynPartCtx, null,
+ Utilities.mvFileToFinalPath(outputPath, job, success, LOG, dynPartCtx, null,
reporter);
fs.delete(backupPath, true);
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanMapper.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanMapper.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanMapper.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanMapper.java Tue Jan 28 05:48:03 2014
@@ -32,6 +32,7 @@ import org.apache.hadoop.hive.ql.io.RCFi
import org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileKeyBufferWrapper;
import org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileValueBufferWrapper;
import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.stats.CounterStatsPublisher;
import org.apache.hadoop.hive.ql.stats.StatsFactory;
import org.apache.hadoop.hive.ql.stats.StatsPublisher;
import org.apache.hadoop.hive.shims.CombineHiveKey;
@@ -139,11 +140,13 @@ public class PartialScanMapper extends M
throw new HiveException(ErrorMsg.STATSPUBLISHER_CONNECTION_ERROR.getErrorCodedMsg());
}
+ int maxPrefixLength = StatsFactory.getMaxPrefixLength(jc);
// construct key used to store stats in intermediate db
- String taskID = Utilities.getTaskIdFromFilename(Utilities.getTaskId(jc));
- String keyPrefix = Utilities.getHashedStatsPrefix(
- statsAggKeyPrefix, StatsFactory.getMaxPrefixLength(jc), taskID.length());
- String key = keyPrefix + taskID;
+ String key = Utilities.getHashedStatsPrefix(statsAggKeyPrefix, maxPrefixLength);
+ if (!(statsPublisher instanceof CounterStatsPublisher)) {
+ String taskID = Utilities.getTaskIdFromFilename(Utilities.getTaskId(jc));
+ key = Utilities.join(key, taskID);
+ }
// construct statistics to be stored
Map<String, String> statsToPublish = new HashMap<String, String>();
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanTask.java Tue Jan 28 05:48:03 2014
@@ -184,7 +184,7 @@ public class PartialScanTask extends Tas
MapredWork mrWork = new MapredWork();
mrWork.setMapWork(work);
- Utilities.setMapRedWork(job, mrWork, ctx.getMRTmpFileURI());
+ Utilities.setMapRedWork(job, mrWork, ctx.getMRTmpPath());
// remove the pwd from conf file so that job tracker doesn't show this
// logs
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateMapper.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateMapper.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateMapper.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateMapper.java Tue Jan 28 05:48:03 2014
@@ -34,7 +34,6 @@ import org.apache.hadoop.hive.ql.metadat
import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.hadoop.hive.shims.CombineHiveKey;
-import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.mapred.JobConf;
@@ -234,7 +233,7 @@ public class ColumnTruncateMapper extend
) throws HiveException, IOException {
FileSystem fs = outputPath.getFileSystem(job);
Path backupPath = backupOutputPath(fs, outputPath, job);
- Utilities.mvFileToFinalPath(outputPath.toUri().toString(), job, success, LOG, dynPartCtx, null,
+ Utilities.mvFileToFinalPath(outputPath, job, success, LOG, dynPartCtx, null,
reporter);
fs.delete(backupPath, true);
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java Tue Jan 28 05:48:03 2014
@@ -168,7 +168,7 @@ public class ColumnTruncateTask extends
MapredWork mrWork = new MapredWork();
mrWork.setMapWork(work);
- Utilities.setMapRedWork(job, mrWork, ctx.getMRTmpFileURI());
+ Utilities.setMapRedWork(job, mrWork, ctx.getMRTmpPath());
// remove the pwd from conf file so that job tracker doesn't show this
// logs
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java Tue Jan 28 05:48:03 2014
@@ -85,6 +85,7 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.index.HiveIndexHandler;
import org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPrunerUtils;
+import org.apache.hadoop.hive.ql.plan.AddPartitionDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.session.CreateTableAutomaticGrant;
import org.apache.hadoop.hive.ql.session.SessionState;
@@ -1195,13 +1196,13 @@ public class Hive {
Partition oldPart = getPartition(tbl, partSpec, false);
Path oldPartPath = null;
if(oldPart != null) {
- oldPartPath = oldPart.getPartitionPath();
+ oldPartPath = oldPart.getDataLocation();
}
Path newPartPath = null;
if (inheritTableSpecs) {
- Path partPath = new Path(tbl.getDataLocation().getPath(),
+ Path partPath = new Path(tbl.getDataLocation(),
Warehouse.makePartPath(partSpec));
newPartPath = new Path(loadPath.toUri().getScheme(), loadPath.toUri().getAuthority(),
partPath.toUri().getPath());
@@ -1227,7 +1228,7 @@ public class Hive {
if (replace) {
Hive.replaceFiles(loadPath, newPartPath, oldPartPath, getConf());
} else {
- FileSystem fs = FileSystem.get(tbl.getDataLocation(), getConf());
+ FileSystem fs = tbl.getDataLocation().getFileSystem(conf);
Hive.copyFiles(conf, loadPath, newPartPath, fs);
}
@@ -1469,92 +1470,72 @@ private void constructOneLBLocationMap(F
* @throws HiveException
* if table doesn't exist or partition already exists
*/
- public Partition createPartition(Table tbl, Map<String, String> partSpec)
- throws HiveException {
- return createPartition(tbl, partSpec, null, null, null, null, -1,
- null, null, null, null, null);
+ public Partition createPartition(Table tbl, Map<String, String> partSpec) throws HiveException {
+ try {
+ return new Partition(tbl, getMSC().add_partition(
+ Partition.createMetaPartitionObject(tbl, partSpec, null)));
+ } catch (Exception e) {
+ LOG.error(StringUtils.stringifyException(e));
+ throw new HiveException(e);
+ }
}
- /**
- * Creates a partition
- *
- * @param tbl
- * table for which partition needs to be created
- * @param partSpec
- * partition keys and their values
- * @param location
- * location of this partition
- * @param partParams
- * partition parameters
- * @param inputFormat the inputformat class
- * @param outputFormat the outputformat class
- * @param numBuckets the number of buckets
- * @param cols the column schema
- * @param serializationLib the serde class
- * @param serdeParams the serde parameters
- * @param bucketCols the bucketing columns
- * @param sortCols sort columns and order
- *
- * @return created partition object
- * @throws HiveException
- * if table doesn't exist or partition already exists
- */
- public Partition createPartition(Table tbl, Map<String, String> partSpec,
- Path location, Map<String, String> partParams, String inputFormat, String outputFormat,
- int numBuckets, List<FieldSchema> cols,
- String serializationLib, Map<String, String> serdeParams,
- List<String> bucketCols, List<Order> sortCols) throws HiveException {
-
- org.apache.hadoop.hive.metastore.api.Partition partition = null;
-
- for (FieldSchema field : tbl.getPartCols()) {
- String val = partSpec.get(field.getName());
- if (val == null || val.length() == 0) {
- throw new HiveException("add partition: Value for key "
- + field.getName() + " is null or empty");
+ public List<Partition> createPartitions(AddPartitionDesc addPartitionDesc) throws HiveException {
+ Table tbl = getTable(addPartitionDesc.getDbName(), addPartitionDesc.getTableName());
+ int size = addPartitionDesc.getPartitionCount();
+ List<org.apache.hadoop.hive.metastore.api.Partition> in =
+ new ArrayList<org.apache.hadoop.hive.metastore.api.Partition>(size);
+ for (int i = 0; i < size; ++i) {
+ in.add(convertAddSpecToMetaPartition(tbl, addPartitionDesc.getPartition(i)));
+ }
+ List<Partition> out = new ArrayList<Partition>();
+ try {
+ // TODO: normally, the result is not necessary; might make sense to pass false
+ for (org.apache.hadoop.hive.metastore.api.Partition outPart
+ : getMSC().add_partitions(in, addPartitionDesc.isIfNotExists(), true)) {
+ out.add(new Partition(tbl, outPart));
}
- }
-
- try {
- Partition tmpPart = new Partition(tbl, partSpec, location);
- // No need to clear DDL_TIME in parameters since we know it's
- // not populated on construction.
- org.apache.hadoop.hive.metastore.api.Partition inPart
- = tmpPart.getTPartition();
- if (partParams != null) {
- inPart.setParameters(partParams);
- }
- if (inputFormat != null) {
- inPart.getSd().setInputFormat(inputFormat);
- }
- if (outputFormat != null) {
- inPart.getSd().setOutputFormat(outputFormat);
- }
- if (numBuckets != -1) {
- inPart.getSd().setNumBuckets(numBuckets);
- }
- if (cols != null) {
- inPart.getSd().setCols(cols);
- }
- if (serializationLib != null) {
- inPart.getSd().getSerdeInfo().setSerializationLib(serializationLib);
- }
- if (serdeParams != null) {
- inPart.getSd().getSerdeInfo().setParameters(serdeParams);
- }
- if (bucketCols != null) {
- inPart.getSd().setBucketCols(bucketCols);
- }
- if (sortCols != null) {
- inPart.getSd().setSortCols(sortCols);
- }
- partition = getMSC().add_partition(inPart);
} catch (Exception e) {
LOG.error(StringUtils.stringifyException(e));
throw new HiveException(e);
}
+ return out;
+ }
- return new Partition(tbl, partition);
+ private static org.apache.hadoop.hive.metastore.api.Partition convertAddSpecToMetaPartition(
+ Table tbl, AddPartitionDesc.OnePartitionDesc addSpec) throws HiveException {
+ Path location = addSpec.getLocation() != null
+ ? new Path(tbl.getPath(), addSpec.getLocation()) : null;
+ org.apache.hadoop.hive.metastore.api.Partition part =
+ Partition.createMetaPartitionObject(tbl, addSpec.getPartSpec(), location);
+ if (addSpec.getPartParams() != null) {
+ part.setParameters(addSpec.getPartParams());
+ }
+ if (addSpec.getInputFormat() != null) {
+ part.getSd().setInputFormat(addSpec.getInputFormat());
+ }
+ if (addSpec.getOutputFormat() != null) {
+ part.getSd().setOutputFormat(addSpec.getOutputFormat());
+ }
+ if (addSpec.getNumBuckets() != -1) {
+ part.getSd().setNumBuckets(addSpec.getNumBuckets());
+ }
+ if (addSpec.getCols() != null) {
+ part.getSd().setCols(addSpec.getCols());
+ }
+ if (addSpec.getSerializationLib() != null) {
+ part.getSd().getSerdeInfo().setSerializationLib(addSpec.getSerializationLib());
+ }
+ if (addSpec.getSerdeParams() != null) {
+ part.getSd().getSerdeInfo().setParameters(addSpec.getSerdeParams());
+ }
+ if (addSpec.getBucketCols() != null) {
+ part.getSd().setBucketCols(addSpec.getBucketCols());
+ }
+ if (addSpec.getSortCols() != null) {
+ part.getSd().setSortCols(addSpec.getSortCols());
+ }
+ return part;
}
public Partition getPartition(Table tbl, Map<String, String> partSpec,
@@ -1590,15 +1571,13 @@ private void constructOneLBLocationMap(F
*/
public Partition getPartition(Table tbl, Map<String, String> partSpec,
boolean forceCreate, String partPath, boolean inheritTableSpecs) throws HiveException {
- if (!tbl.isValidSpec(partSpec)) {
- throw new HiveException("Invalid partition: " + partSpec);
- }
+ tbl.validatePartColumnNames(partSpec, true);
List<String> pvals = new ArrayList<String>();
for (FieldSchema field : tbl.getPartCols()) {
String val = partSpec.get(field.getName());
// enable dynamic partitioning
- if (val == null && !HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING)
- || val.length() == 0) {
+ if ((val == null && !HiveConf.getBoolVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONING))
+ || (val != null && val.length() == 0)) {
throw new HiveException("get partition: Value for key "
+ field.getName() + " is null or empty");
} else if (val != null){
@@ -2395,6 +2374,7 @@ private void constructOneLBLocationMap(F
private IMetaStoreClient createMetaStoreClient() throws MetaException {
HiveMetaHookLoader hookLoader = new HiveMetaHookLoader() {
+ @Override
public HiveMetaHook getHook(
org.apache.hadoop.hive.metastore.api.Table tbl)
throws MetaException {
@@ -2534,20 +2514,8 @@ private void constructOneLBLocationMap(F
}
public Table newTable(String tableName) throws HiveException {
- String[] names = getQualifiedNames(tableName);
- switch (names.length) {
- case 2:
- return new Table(names[0], names[1]);
- case 1:
- return new Table(SessionState.get().getCurrentDatabase(), names[0]);
- default:
- try{
- throw new HiveException("Invalid table name: " + tableName);
- }catch(Exception e) {
- e.printStackTrace();
- }
- throw new HiveException("Invalid table name: " + tableName);
- }
+ String[] names = Utilities.getDbTableName(tableName);
+ return new Table(names[0], names[1]);
}
public String getDelegationToken(String owner, String renewer)
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java Tue Jan 28 05:48:03 2014
@@ -254,7 +254,7 @@ public class HiveMetaStoreChecker {
// most likely the user specified an invalid partition
continue;
}
- Path partPath = partition.getPartitionPath();
+ Path partPath = partition.getDataLocation();
fs = partPath.getFileSystem(conf);
if (!fs.exists(partPath)) {
PartitionResult pr = new PartitionResult();
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java Tue Jan 28 05:48:03 2014
@@ -29,6 +29,9 @@ import org.apache.hadoop.hive.ql.securit
import org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider;
import org.apache.hadoop.hive.ql.security.authorization.DefaultHiveAuthorizationProvider;
import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider;
+import org.apache.hadoop.hive.ql.security.authorization.plugin.DefaultHiveAuthorizerFactory;
+import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizer;
+import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizerFactory;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.ReflectionUtils;
@@ -334,6 +337,23 @@ public final class HiveUtils {
public static HiveAuthorizationProvider getAuthorizeProviderManager(
Configuration conf, HiveConf.ConfVars authorizationProviderConfKey,
HiveAuthenticationProvider authenticator) throws HiveException {
+ return getAuthorizeProviderManager(conf, authorizationProviderConfKey, authenticator, false);
+ }
+
+ /**
+ * Create a new instance of HiveAuthorizationProvider
+ * @param conf
+ * @param authorizationProviderConfKey
+ * @param authenticator
+ * @param nullIfOtherClass - return null if configuration
+ * does not point to a HiveAuthorizationProvider subclass
+ * @return new instance of HiveAuthorizationProvider
+ * @throws HiveException
+ */
+ @SuppressWarnings("unchecked")
+ public static HiveAuthorizationProvider getAuthorizeProviderManager(
+ Configuration conf, HiveConf.ConfVars authorizationProviderConfKey,
+ HiveAuthenticationProvider authenticator, boolean nullIfOtherClass) throws HiveException {
String clsStr = HiveConf.getVar(conf, authorizationProviderConfKey);
@@ -343,8 +363,11 @@ public final class HiveUtils {
if (clsStr == null || clsStr.trim().equals("")) {
cls = DefaultHiveAuthorizationProvider.class;
} else {
- cls = (Class<? extends HiveAuthorizationProvider>) Class.forName(
- clsStr, true, JavaUtils.getClassLoader());
+ Class<?> configClass = Class.forName(clsStr, true, JavaUtils.getClassLoader());
+ if(nullIfOtherClass && !HiveAuthorizationProvider.class.isAssignableFrom(configClass) ){
+ return null;
+ }
+ cls = (Class<? extends HiveAuthorizationProvider>)configClass;
}
if (cls != null) {
ret = ReflectionUtils.newInstance(cls, conf);
@@ -356,6 +379,31 @@ public final class HiveUtils {
return ret;
}
+
+ /**
+ * Return HiveAuthorizerFactory used by new authorization plugin interface.
+ * @param conf
+ * @param authorizationProviderConfKey
+ * @return
+ * @throws HiveException if HiveAuthorizerFactory specified in configuration could not
+ */
+ public static HiveAuthorizerFactory getAuthorizerFactory(
+ Configuration conf, HiveConf.ConfVars authorizationProviderConfKey)
+ throws HiveException {
+
+ Class<? extends HiveAuthorizerFactory> cls = conf.getClass(authorizationProviderConfKey.varname,
+ DefaultHiveAuthorizerFactory.class, HiveAuthorizerFactory.class);
+
+ if(cls == null){
+ //should not happen as default value is set
+ throw new HiveException("Configuration value " + authorizationProviderConfKey.varname
+ + " is not set to valid HiveAuthorizerFactory subclass" );
+ }
+
+ HiveAuthorizerFactory authFactory = ReflectionUtils.newInstance(cls, conf);
+ return authFactory;
+ }
+
@SuppressWarnings("unchecked")
public static HiveAuthenticationProvider getAuthenticator(
Configuration conf, HiveConf.ConfVars authenticatorConfKey
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java Tue Jan 28 05:48:03 2014
@@ -19,7 +19,6 @@
package org.apache.hadoop.hive.ql.metadata;
import java.io.Serializable;
-import java.net.URI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
@@ -74,7 +73,6 @@ public class Partition implements Serial
private Deserializer deserializer;
private Class<? extends HiveOutputFormat> outputFormatClass;
private Class<? extends InputFormat> inputFormatClass;
- private URI uri;
/**
* @return The values of the partition
@@ -120,50 +118,53 @@ public class Partition implements Serial
* @throws HiveException
* Thrown if we could not create the partition.
*/
- public Partition(Table tbl, Map<String, String> partSpec, Path location)
- throws HiveException {
+ public Partition(Table tbl, Map<String, String> partSpec, Path location) throws HiveException {
+ initialize(tbl, createMetaPartitionObject(tbl, partSpec, location));
+ }
+ public static org.apache.hadoop.hive.metastore.api.Partition createMetaPartitionObject(
+ Table tbl, Map<String, String> partSpec, Path location) throws HiveException {
List<String> pvals = new ArrayList<String>();
for (FieldSchema field : tbl.getPartCols()) {
String val = partSpec.get(field.getName());
- if (val == null) {
- throw new HiveException(
- "partition spec is invalid. field.getName() does not exist in input.");
+ if (val == null || val.isEmpty()) {
+ throw new HiveException("partition spec is invalid; field "
+ + field.getName() + " does not exist or is empty");
}
pvals.add(val);
}
- org.apache.hadoop.hive.metastore.api.Partition tpart = new org.apache.hadoop.hive.metastore.api.Partition();
+ org.apache.hadoop.hive.metastore.api.Partition tpart =
+ new org.apache.hadoop.hive.metastore.api.Partition();
tpart.setDbName(tbl.getDbName());
tpart.setTableName(tbl.getTableName());
tpart.setValues(pvals);
- if (tbl.isView()) {
- initialize(tbl, tpart);
- return;
+ if (!tbl.isView()) {
+ tpart.setSd(cloneSd(tbl));
+ tpart.getSd().setLocation((location != null) ? location.toString() : null);
}
+ return tpart;
+ }
+ /**
+ * We already have methods that clone stuff using XML or Kryo.
+ * And now for something completely different - let's clone SD using Thrift!
+ * Refactored into a method.
+ */
+ public static StorageDescriptor cloneSd(Table tbl) throws HiveException {
StorageDescriptor sd = new StorageDescriptor();
try {
// replace with THRIFT-138
TMemoryBuffer buffer = new TMemoryBuffer(1024);
TBinaryProtocol prot = new TBinaryProtocol(buffer);
tbl.getTTable().getSd().write(prot);
-
sd.read(prot);
} catch (TException e) {
LOG.error("Could not create a copy of StorageDescription");
throw new HiveException("Could not create a copy of StorageDescription",e);
}
-
- tpart.setSd(sd);
- if (location != null) {
- tpart.getSd().setLocation(location.toString());
- } else {
- tpart.getSd().setLocation(null);
- }
-
- initialize(tbl, tpart);
+ return sd;
}
/**
@@ -227,11 +228,11 @@ public class Partition implements Serial
}
public Path[] getPath() {
- Path[] ret = new Path[]{getPartitionPath()};
+ Path[] ret = new Path[]{getDataLocation()};
return ret;
}
- public Path getPartitionPath() {
+ public Path getDataLocation() {
if (table.isPartitioned()) {
return new Path(tPartition.getSd().getLocation());
} else {
@@ -239,13 +240,6 @@ public class Partition implements Serial
}
}
- final public URI getDataLocation() {
- if (uri == null) {
- uri = getPartitionPath().toUri();
- }
- return uri;
- }
-
final public Deserializer getDeserializer() {
if (deserializer == null) {
try {
@@ -379,9 +373,8 @@ public class Partition implements Serial
try {
// Previously, this got the filesystem of the Table, which could be
// different from the filesystem of the partition.
- FileSystem fs = FileSystem.get(getPartitionPath().toUri(), Hive.get()
- .getConf());
- String pathPattern = getPartitionPath().toString();
+ FileSystem fs = getDataLocation().getFileSystem(Hive.get().getConf());
+ String pathPattern = getDataLocation().toString();
if (getBucketCount() > 0) {
pathPattern = pathPattern + "/*";
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java Tue Jan 28 05:48:03 2014
@@ -20,15 +20,16 @@ package org.apache.hadoop.hive.ql.metada
import java.io.IOException;
import java.io.Serializable;
-import java.net.URI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
+import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -36,7 +37,6 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.JavaUtils;
-import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.ProtectMode;
import org.apache.hadoop.hive.metastore.TableType;
@@ -47,9 +47,12 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.metastore.api.SerDeInfo;
import org.apache.hadoop.hive.metastore.api.SkewedInfo;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
+import org.apache.hadoop.hive.ql.io.HivePassThroughOutputFormat;
import org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe;
@@ -83,7 +86,7 @@ public class Table implements Serializab
private Deserializer deserializer;
private Class<? extends HiveOutputFormat> outputFormatClass;
private Class<? extends InputFormat> inputFormatClass;
- private URI uri;
+ private Path path;
private HiveStorageHandler storageHandler;
/**
@@ -251,14 +254,11 @@ public class Table implements Serializab
return tTable.getTableName();
}
- final public URI getDataLocation() {
- if (uri == null) {
- Path path = getPath();
- if (path != null) {
- uri = path.toUri();
- }
+ final public Path getDataLocation() {
+ if (path == null) {
+ path = getPath();
}
- return uri;
+ return path;
}
final public Deserializer getDeserializer() {
@@ -326,8 +326,22 @@ public class Table implements Serializab
}
c = getStorageHandler().getOutputFormatClass();
} else {
- c = Class.forName(className, true,
- JavaUtils.getClassLoader());
+ // if HivePassThroughOutputFormat
+ if (className.equals(
+ HivePassThroughOutputFormat.HIVE_PASSTHROUGH_OF_CLASSNAME)) {
+ if (getStorageHandler() != null) {
+ // get the storage handler real output format class
+ c = getStorageHandler().getOutputFormatClass();
+ }
+ else {
+ //should not happen
+ return null;
+ }
+ }
+ else {
+ c = Class.forName(className, true,
+ JavaUtils.getClassLoader());
+ }
}
if (!HiveOutputFormat.class.isAssignableFrom(c)) {
if (getStorageHandler() != null) {
@@ -347,35 +361,34 @@ public class Table implements Serializab
return outputFormatClass;
}
- final public boolean isValidSpec(Map<String, String> spec)
- throws HiveException {
-
- // TODO - types need to be checked.
+ final public void validatePartColumnNames(
+ Map<String, String> spec, boolean shouldBeFull) throws SemanticException {
List<FieldSchema> partCols = tTable.getPartitionKeys();
if (partCols == null || (partCols.size() == 0)) {
if (spec != null) {
- throw new HiveException(
- "table is not partitioned but partition spec exists: " + spec);
- } else {
- return true;
+ throw new SemanticException("table is not partitioned but partition spec exists: " + spec);
}
- }
-
- if ((spec == null) || (spec.size() != partCols.size())) {
- throw new HiveException(
- "table is partitioned but partition spec is not specified or"
- + " does not fully match table partitioning: "
- + spec);
- }
-
- for (FieldSchema field : partCols) {
- if (spec.get(field.getName()) == null) {
- throw new HiveException(field.getName()
- + " not found in table's partition spec: " + spec);
+ return;
+ } else if (spec == null) {
+ if (shouldBeFull) {
+ throw new SemanticException("table is partitioned but partition spec is not specified");
}
+ return;
+ }
+ int columnsFound = 0;
+ for (FieldSchema fs : partCols) {
+ if (spec.containsKey(fs.getName())) {
+ ++columnsFound;
+ }
+ if (columnsFound == spec.size()) break;
+ }
+ if (columnsFound < spec.size()) {
+ throw new SemanticException("Partition spec " + spec + " contains non-partition columns");
+ }
+ if (shouldBeFull && (spec.size() != partCols.size())) {
+ throw new SemanticException("partition spec " + spec
+ + " doesn't contain all (" + partCols.size() + ") partition columns");
}
-
- return true;
}
public void setProperty(String name, String value) {
@@ -499,13 +512,13 @@ public class Table implements Serializab
return bcols.get(0);
}
- public void setDataLocation(URI uri) {
- this.uri = uri;
- tTable.getSd().setLocation(uri.toString());
+ public void setDataLocation(Path path) {
+ this.path = path;
+ tTable.getSd().setLocation(path.toString());
}
public void unsetDataLocation() {
- this.uri = null;
+ this.path = null;
tTable.getSd().unsetLocation();
}
@@ -636,7 +649,7 @@ public class Table implements Serializab
* Source directory
*/
protected void replaceFiles(Path srcf) throws HiveException {
- Path tableDest = new Path(getDataLocation().getPath());
+ Path tableDest = getPath();
Hive.replaceFiles(srcf, tableDest, tableDest, Hive.get().getConf());
}
@@ -649,8 +662,8 @@ public class Table implements Serializab
protected void copyFiles(Path srcf) throws HiveException {
FileSystem fs;
try {
- fs = FileSystem.get(getDataLocation(), Hive.get().getConf());
- Hive.copyFiles(Hive.get().getConf(), srcf, new Path(getDataLocation().getPath()), fs);
+ fs = getDataLocation().getFileSystem(Hive.get().getConf());
+ Hive.copyFiles(Hive.get().getConf(), srcf, getPath(), fs);
} catch (IOException e) {
throw new HiveException("addFiles: filesystem error in check phase", e);
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java Tue Jan 28 05:48:03 2014
@@ -19,14 +19,15 @@
package org.apache.hadoop.hive.ql.metadata.formatting;
import java.io.DataOutputStream;
-import java.io.OutputStream;
import java.io.IOException;
+import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
+
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -39,8 +40,6 @@ import org.apache.hadoop.hive.ql.metadat
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
-import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
-import org.apache.hadoop.hive.shims.ShimLoader;
import org.codehaus.jackson.map.ObjectMapper;
/**
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java Tue Jan 28 05:48:03 2014
@@ -42,8 +42,6 @@ import org.apache.hadoop.hive.ql.metadat
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.session.SessionState;
-import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
-import org.apache.hadoop.hive.shims.ShimLoader;
/**
* Format table and index information for human readability using
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java Tue Jan 28 05:48:03 2014
@@ -18,7 +18,6 @@
package org.apache.hadoop.hive.ql.optimizer;
import java.io.IOException;
-import java.net.URI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@@ -77,10 +76,10 @@ abstract public class AbstractBucketJoin
Object... nodeOutputs) throws SemanticException;
private static List<String> getBucketFilePathsOfPartition(
- URI location, ParseContext pGraphContext) throws SemanticException {
+ Path location, ParseContext pGraphContext) throws SemanticException {
List<String> fileNames = new ArrayList<String>();
try {
- FileSystem fs = FileSystem.get(location, pGraphContext.getConf());
+ FileSystem fs = location.getFileSystem(pGraphContext.getConf());
FileStatus[] files = fs.listStatus(new Path(location.toString()));
if (files != null) {
for (FileStatus file : files) {
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java Tue Jan 28 05:48:03 2014
@@ -185,9 +185,9 @@ public class GenMRTableScan1 implements
aggregationKey += Warehouse.makePartPath(part.getSpec());
} catch (MetaException e) {
throw new SemanticException(ErrorMsg.ANALYZE_TABLE_PARTIALSCAN_AGGKEY.getMsg(
- part.getPartitionPath().toString() + e.getMessage()));
+ part.getDataLocation().toString() + e.getMessage()));
}
- inputPaths.add(part.getPartitionPath());
+ inputPaths.add(part.getDataLocation());
break;
default:
assert false;
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java Tue Jan 28 05:48:03 2014
@@ -22,6 +22,7 @@ import java.io.Serializable;
import java.util.Map;
import java.util.Stack;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
@@ -124,7 +125,7 @@ public class GenMRUnion1 implements Node
// generate the temporary file
Context baseCtx = parseCtx.getContext();
- String taskTmpDir = baseCtx.getMRTmpFileURI();
+ Path taskTmpDir = baseCtx.getMRTmpPath();
// Create the temporary file, its corresponding FileSinkOperaotr, and
// its corresponding TableScanOperator.
@@ -132,7 +133,7 @@ public class GenMRUnion1 implements Node
GenMapRedUtils.createTemporaryFile(parent, child, taskTmpDir, tt_desc, parseCtx);
// Add the path to alias mapping
- uCtxTask.addTaskTmpDir(taskTmpDir);
+ uCtxTask.addTaskTmpDir(taskTmpDir.toUri().toString());
uCtxTask.addTTDesc(tt_desc);
uCtxTask.addListTopOperators(tableScanOp);
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java Tue Jan 28 05:48:03 2014
@@ -946,7 +946,7 @@ public final class GenMapRedUtils {
*/
protected static TableScanOperator createTemporaryFile(
Operator<? extends OperatorDesc> parent, Operator<? extends OperatorDesc> child,
- String taskTmpDir, TableDesc tt_desc, ParseContext parseCtx) {
+ Path taskTmpDir, TableDesc tt_desc, ParseContext parseCtx) {
// Create a FileSinkOperator for the file name of taskTmpDir
boolean compressIntermediate =
@@ -1008,7 +1008,7 @@ public final class GenMapRedUtils {
// Generate the temporary file name
Context baseCtx = parseCtx.getContext();
- String taskTmpDir = baseCtx.getMRTmpFileURI();
+ Path taskTmpDir = baseCtx.getMRTmpPath();
Operator<? extends OperatorDesc> parent = op.getParentOperators().get(0);
TableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils
@@ -1023,7 +1023,7 @@ public final class GenMapRedUtils {
opProcCtx.getMapCurrCtx();
mapCurrCtx.put(tableScanOp, new GenMapRedCtx(childTask, null));
- String streamDesc = taskTmpDir;
+ String streamDesc = taskTmpDir.toUri().toString();
MapredWork cplan = (MapredWork) childTask.getWork();
if (needsTagging(cplan.getReduceWork())) {
@@ -1055,7 +1055,7 @@ public final class GenMapRedUtils {
}
// Add the path to alias mapping
- setTaskPlan(taskTmpDir, streamDesc, tableScanOp, cplan.getMapWork(), false, tt_desc);
+ setTaskPlan(taskTmpDir.toUri().toString(), streamDesc, tableScanOp, cplan.getMapWork(), false, tt_desc);
opProcCtx.setCurrTopOp(null);
opProcCtx.setCurrAliasId(null);
opProcCtx.setCurrTask(childTask);
@@ -1195,7 +1195,7 @@ public final class GenMapRedUtils {
// Create a FileSink operator
TableDesc ts = (TableDesc) fsInputDesc.getTableInfo().clone();
- FileSinkDesc fsOutputDesc = new FileSinkDesc(finalName.toUri().toString(), ts,
+ FileSinkDesc fsOutputDesc = new FileSinkDesc(finalName, ts,
conf.getBoolVar(ConfVars.COMPRESSRESULT));
FileSinkOperator fsOutput = (FileSinkOperator) OperatorFactory.getAndMakeChild(
fsOutputDesc, inputRS, tsMerge);
@@ -1240,7 +1240,7 @@ public final class GenMapRedUtils {
// 2. Constructing a conditional task consisting of a move task and a map reduce task
//
MoveWork dummyMv = new MoveWork(null, null, null,
- new LoadFileDesc(new Path(fsInputDesc.getFinalDirName()), finalName, true, null, null), false);
+ new LoadFileDesc(fsInputDesc.getFinalDirName(), finalName, true, null, null), false);
MapWork cplan;
Serializable work;
@@ -1277,7 +1277,7 @@ public final class GenMapRedUtils {
// NOTE: we should gather stats in MR1 rather than MR2 at merge job since we don't
// know if merge MR2 will be triggered at execution time
ConditionalTask cndTsk = GenMapRedUtils.createCondTask(conf, currTask, dummyMv, work,
- fsInputDesc.getFinalDirName());
+ fsInputDesc.getFinalDirName().toString());
// keep the dynamic partition context in conditional task resolver context
ConditionalResolverMergeFilesCtx mrCtx =
@@ -1457,7 +1457,7 @@ public final class GenMapRedUtils {
Operator<? extends OperatorDesc> topOp, FileSinkDesc fsDesc) {
ArrayList<String> aliases = new ArrayList<String>();
- String inputDir = fsDesc.getFinalDirName();
+ String inputDir = fsDesc.getFinalDirName().toString();
TableDesc tblDesc = fsDesc.getTableInfo();
aliases.add(inputDir); // dummy alias: just use the input path
@@ -1483,7 +1483,7 @@ public final class GenMapRedUtils {
public static MapWork createRCFileMergeTask(FileSinkDesc fsInputDesc,
Path finalName, boolean hasDynamicPartitions) throws SemanticException {
- String inputDir = fsInputDesc.getFinalDirName();
+ Path inputDir = fsInputDesc.getFinalDirName();
TableDesc tblDesc = fsInputDesc.getTableInfo();
if (tblDesc.getInputFileFormatClass().equals(RCFileInputFormat.class)) {
@@ -1491,22 +1491,22 @@ public final class GenMapRedUtils {
ArrayList<String> inputDirstr = new ArrayList<String>(1);
if (!hasDynamicPartitions
&& !GenMapRedUtils.isSkewedStoredAsDirs(fsInputDesc)) {
- inputDirs.add(new Path(inputDir));
- inputDirstr.add(inputDir);
+ inputDirs.add(inputDir);
+ inputDirstr.add(inputDir.toString());
}
MergeWork work = new MergeWork(inputDirs, finalName,
hasDynamicPartitions, fsInputDesc.getDynPartCtx());
LinkedHashMap<String, ArrayList<String>> pathToAliases =
new LinkedHashMap<String, ArrayList<String>>();
- pathToAliases.put(inputDir, (ArrayList<String>) inputDirstr.clone());
+ pathToAliases.put(inputDir.toString(), (ArrayList<String>) inputDirstr.clone());
work.setMapperCannotSpanPartns(true);
work.setPathToAliases(pathToAliases);
work.setAliasToWork(
new LinkedHashMap<String, Operator<? extends OperatorDesc>>());
if (hasDynamicPartitions
|| GenMapRedUtils.isSkewedStoredAsDirs(fsInputDesc)) {
- work.getPathToPartitionInfo().put(inputDir,
+ work.getPathToPartitionInfo().put(inputDir.toString(),
new PartitionDesc(tblDesc, null));
}
work.setListBucketingCtx(fsInputDesc.getLbCtx());
@@ -1603,7 +1603,7 @@ public final class GenMapRedUtils {
}
if ((srcDir != null)
- && (srcDir.equals(new Path(fsOp.getConf().getFinalDirName())))) {
+ && (srcDir.equals(fsOp.getConf().getFinalDirName()))) {
return mvTsk;
}
}
@@ -1687,20 +1687,19 @@ public final class GenMapRedUtils {
Path dest = null;
if (chDir) {
- dest = new Path(fsOp.getConf().getFinalDirName());
+ dest = fsOp.getConf().getFinalDirName();
// generate the temporary file
// it must be on the same file system as the current destination
Context baseCtx = parseCtx.getContext();
- String tmpDir = baseCtx.getExternalTmpFileURI(dest.toUri());
+ Path tmpDir = baseCtx.getExternalTmpPath(dest.toUri());
FileSinkDesc fileSinkDesc = fsOp.getConf();
// Change all the linked file sink descriptors
if (fileSinkDesc.isLinkedFileSink()) {
for (FileSinkDesc fsConf:fileSinkDesc.getLinkedFileSinkDesc()) {
- String fileName = Utilities.getFileNameFromDirName(fsConf.getDirName());
fsConf.setParentDir(tmpDir);
- fsConf.setDirName(tmpDir + Path.SEPARATOR + fileName);
+ fsConf.setDirName(new Path(tmpDir, fsConf.getDirName().getName()));
}
} else {
fileSinkDesc.setDirName(tmpDir);
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java Tue Jan 28 05:48:03 2014
@@ -165,8 +165,8 @@ public final class IndexUtils {
Partition part) throws HiveException {
LOG.info("checking index staleness...");
try {
- FileSystem partFs = part.getPartitionPath().getFileSystem(hive.getConf());
- FileStatus partFss = partFs.getFileStatus(part.getPartitionPath());
+ FileSystem partFs = part.getDataLocation().getFileSystem(hive.getConf());
+ FileStatus partFss = partFs.getFileStatus(part.getDataLocation());
String ts = index.getParameters().get(part.getSpec().toString());
if (ts == null) {
return false;
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SamplePruner.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SamplePruner.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SamplePruner.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SamplePruner.java Tue Jan 28 05:48:03 2014
@@ -319,9 +319,8 @@ public class SamplePruner implements Tra
throws SemanticException {
try {
- FileSystem fs = FileSystem.get(part.getPartitionPath().toUri(), Hive.get()
- .getConf());
- String pathPattern = part.getPartitionPath().toString() + "/*";
+ FileSystem fs = part.getDataLocation().getFileSystem(Hive.get().getConf());
+ String pathPattern = part.getDataLocation().toString() + "/*";
AddPathReturnStatus ret = addPath(fs, pathPattern, sizeLimit, fileLimit, retPathList);
if (ret == null) {
return LimitPruneRetStatus.NotQualify;
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchAggregation.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchAggregation.java?rev=1561947&r1=1561946&r2=1561947&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchAggregation.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchAggregation.java Tue Jan 28 05:48:03 2014
@@ -24,6 +24,7 @@ import java.util.List;
import java.util.Map;
import java.util.Stack;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
@@ -94,7 +95,7 @@ public class SimpleFetchAggregation impl
}
GroupByOperator pGBY = (GroupByOperator) stack.get(stack.size() - 5);
- String fileName = FS.getConf().getFinalDirName();
+ Path fileName = FS.getConf().getFinalDirName();
TableDesc tsDesc = createIntermediateFS(pGBY, fileName);
for (AggregationDesc aggregation : cGBY.getConf().getAggregators()) {
@@ -112,7 +113,7 @@ public class SimpleFetchAggregation impl
return null;
}
- private TableDesc createIntermediateFS(Operator<?> parent, String fileName) {
+ private TableDesc createIntermediateFS(Operator<?> parent, Path fileName) {
TableDesc tsDesc = PlanUtils.getIntermediateFileTableDesc(PlanUtils
.getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol"));