You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2016/07/16 18:42:58 UTC
[1/2] hive git commit: HIVE-12244 : Refactoring code for avoiding of
comparison of Strings and do comparison on Path (Zoltan Haindrich via
Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master 70a972205 -> 47b5b5cdb
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenSparkSkewJoinProcessor.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenSparkSkewJoinProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenSparkSkewJoinProcessor.java
index ded9231..405c3ca 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenSparkSkewJoinProcessor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenSparkSkewJoinProcessor.java
@@ -290,10 +290,10 @@ public class GenSparkSkewJoinProcessor {
} else {
path = smallTblDirs.get(tags[j]);
}
- mapWork.getPathToAliases().put(path.toString(), aliases);
+ mapWork.addPathToAlias(path, aliases);
mapWork.getAliasToWork().put(alias, tableScan);
PartitionDesc partitionDesc = new PartitionDesc(tableDescList.get(tags[j]), null);
- mapWork.getPathToPartitionInfo().put(path.toString(), partitionDesc);
+ mapWork.addPathToPartitionInfo(path, partitionDesc);
mapWork.getAliasToPartnInfo().put(alias, partitionDesc);
mapWork.setName("Map " + GenSparkUtils.getUtils().getNextSeqNumber());
}
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapDecider.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapDecider.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapDecider.java
index 4d50c64..1ab7137 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapDecider.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapDecider.java
@@ -418,8 +418,7 @@ public class LlapDecider implements PhysicalPlanResolver {
}
private boolean checkInputsVectorized(MapWork mapWork) {
- for (String path : mapWork.getPathToPartitionInfo().keySet()) {
- PartitionDesc pd = mapWork.getPathToPartitionInfo().get(path);
+ for( PartitionDesc pd : mapWork.getPathToPartitionInfo().values()) {
List<Class<?>> interfaceList =
Arrays.asList(pd.getInputFileFormatClass().getInterfaces());
if (!interfaceList.contains(VectorizedInputFormatInterface.class)) {
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java
index 0b82596..1f21428 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java
@@ -96,7 +96,7 @@ public class NullScanTaskDispatcher implements Dispatcher {
return desc;
}
- private void processAlias(MapWork work, String path, ArrayList<String> aliasesAffected,
+ private void processAlias(MapWork work, Path path, ArrayList<String> aliasesAffected,
ArrayList<String> aliases) {
// the aliases that are allowed to map to a null scan.
ArrayList<String> allowed = new ArrayList<String>();
@@ -111,13 +111,12 @@ public class NullScanTaskDispatcher implements Dispatcher {
// Prefix partition with something to avoid it being a hidden file.
Path fakePath = new Path(NullScanFileSystem.getBase() + newPartition.getTableName()
+ "/part" + encode(newPartition.getPartSpec()));
- String fakeStr = fakePath.toString();
- work.getPathToPartitionInfo().put(fakeStr, newPartition);
- work.getPathToAliases().put(fakeStr, new ArrayList<String>(allowed));
+ work.addPathToPartitionInfo(fakePath, newPartition);
+ work.addPathToAlias(fakePath, new ArrayList<String>(allowed));
aliasesAffected.removeAll(allowed);
if (aliasesAffected.isEmpty()) {
- work.getPathToAliases().remove(path);
- work.getPathToPartitionInfo().remove(path);
+ work.removePathToAlias(path);
+ work.removePathToPartitionInfo(path);
}
}
}
@@ -132,14 +131,14 @@ public class NullScanTaskDispatcher implements Dispatcher {
tso.getConf().setIsMetadataOnly(true);
}
// group path alias according to work
- LinkedHashMap<String, ArrayList<String>> candidates = new LinkedHashMap<String, ArrayList<String>>();
- for (String path : work.getPaths()) {
+ LinkedHashMap<Path, ArrayList<String>> candidates = new LinkedHashMap<>();
+ for (Path path : work.getPaths()) {
ArrayList<String> aliasesAffected = work.getPathToAliases().get(path);
if (aliasesAffected != null && aliasesAffected.size() > 0) {
candidates.put(path, aliasesAffected);
}
}
- for (Entry<String, ArrayList<String>> entry : candidates.entrySet()) {
+ for (Entry<Path, ArrayList<String>> entry : candidates.entrySet()) {
processAlias(work, entry.getKey(), entry.getValue(), aliases);
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java
index 658717c..9dc6263 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java
@@ -76,9 +76,9 @@ public class SortMergeJoinTaskDispatcher extends AbstractJoinTaskDispatcher impl
private void genSMBJoinWork(MapWork currWork, SMBMapJoinOperator smbJoinOp) {
// Remove the paths which are not part of aliasToPartitionInfo
Map<String, PartitionDesc> aliasToPartitionInfo = currWork.getAliasToPartnInfo();
- List<String> removePaths = new ArrayList<String>();
+ List<Path> removePaths = new ArrayList<>();
- for (Map.Entry<String, ArrayList<String>> entry : currWork.getPathToAliases().entrySet()) {
+ for (Map.Entry<Path, ArrayList<String>> entry : currWork.getPathToAliases().entrySet()) {
boolean keepPath = false;
for (String alias : entry.getValue()) {
if (aliasToPartitionInfo.containsKey(alias)) {
@@ -94,10 +94,10 @@ public class SortMergeJoinTaskDispatcher extends AbstractJoinTaskDispatcher impl
}
List<String> removeAliases = new ArrayList<String>();
- for (String removePath : removePaths) {
+ for (Path removePath : removePaths) {
removeAliases.addAll(currWork.getPathToAliases().get(removePath));
- currWork.getPathToAliases().remove(removePath);
- currWork.getPathToPartitionInfo().remove(removePath);
+ currWork.removePathToAlias(removePath);
+ currWork.removePathToPartitionInfo(removePath);
}
for (String alias : removeAliases) {
@@ -119,10 +119,10 @@ public class SortMergeJoinTaskDispatcher extends AbstractJoinTaskDispatcher impl
PartitionDesc partitionInfo = currWork.getAliasToPartnInfo().get(alias);
if (fetchWork.getTblDir() != null) {
- currWork.mergeAliasedInput(alias, fetchWork.getTblDir().toUri().toString(), partitionInfo);
+ currWork.mergeAliasedInput(alias, fetchWork.getTblDir(), partitionInfo);
} else {
for (Path pathDir : fetchWork.getPartDir()) {
- currWork.mergeAliasedInput(alias, pathDir.toUri().toString(), partitionInfo);
+ currWork.mergeAliasedInput(alias, pathDir, partitionInfo);
}
}
}
@@ -265,7 +265,7 @@ public class SortMergeJoinTaskDispatcher extends AbstractJoinTaskDispatcher impl
HashMap<Task<? extends Serializable>, Set<String>> taskToAliases =
new LinkedHashMap<Task<? extends Serializable>, Set<String>>();
// Note that pathToAlias will behave as if the original plan was a join plan
- HashMap<String, ArrayList<String>> pathToAliases = currJoinWork.getMapWork().getPathToAliases();
+ HashMap<Path, ArrayList<String>> pathToAliases = currJoinWork.getMapWork().getPathToAliases();
// generate a map join task for the big table
SMBJoinDesc originalSMBJoinDesc = originalSMBJoinOp.getConf();
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index bce3853..9802afc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -38,6 +38,7 @@ import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.*;
import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
@@ -710,10 +711,10 @@ public class Vectorizer implements PhysicalPlanResolver {
List<String> tableDataColumnList = null;
List<TypeInfo> tableDataTypeInfoList = null;
- LinkedHashMap<String, ArrayList<String>> pathToAliases = mapWork.getPathToAliases();
- LinkedHashMap<String, PartitionDesc> pathToPartitionInfo = mapWork.getPathToPartitionInfo();
- for (Entry<String, ArrayList<String>> entry: pathToAliases.entrySet()) {
- String path = entry.getKey();
+ LinkedHashMap<Path, ArrayList<String>> pathToAliases = mapWork.getPathToAliases();
+ LinkedHashMap<Path, PartitionDesc> pathToPartitionInfo = mapWork.getPathToPartitionInfo();
+ for (Entry<Path, ArrayList<String>> entry: pathToAliases.entrySet()) {
+ Path path = entry.getKey();
List<String> aliases = entry.getValue();
boolean isPresent = (aliases != null && aliases.indexOf(alias) != -1);
if (!isPresent) {
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkSkewJoinProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkSkewJoinProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkSkewJoinProcFactory.java
index 5990d17..abc9fcf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkSkewJoinProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkSkewJoinProcFactory.java
@@ -166,7 +166,7 @@ public class SparkSkewJoinProcFactory {
streamDesc = origStreamDesc.concat(String.valueOf(++pos));
}
}
- GenMapRedUtils.setTaskPlan(taskTmpDir.toUri().toString(), streamDesc,
+ GenMapRedUtils.setTaskPlan(taskTmpDir, streamDesc,
tableScanOp, mapWork, false, tableDesc);
// insert the new task between current task and its child
@SuppressWarnings("unchecked")
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverCommonJoin.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverCommonJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverCommonJoin.java
index 0d04e84..9256ee1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverCommonJoin.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverCommonJoin.java
@@ -21,14 +21,17 @@ import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
+import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.Map.Entry;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.Utilities;
@@ -50,7 +53,7 @@ public class ConditionalResolverCommonJoin implements ConditionalResolver, Seria
private static final long serialVersionUID = 1L;
private HashMap<Task<? extends Serializable>, Set<String>> taskToAliases;
- HashMap<String, ArrayList<String>> pathToAliases;
+ HashMap<Path, ArrayList<String>> pathToAliases;
HashMap<String, Long> aliasToKnownSize;
private Task<? extends Serializable> commonJoinTask;
@@ -85,11 +88,11 @@ public class ConditionalResolverCommonJoin implements ConditionalResolver, Seria
this.aliasToKnownSize = aliasToKnownSize;
}
- public HashMap<String, ArrayList<String>> getPathToAliases() {
+ public HashMap<Path, ArrayList<String>> getPathToAliases() {
return pathToAliases;
}
- public void setPathToAliases(HashMap<String, ArrayList<String>> pathToAliases) {
+ public void setPathToAliases(final HashMap<Path, ArrayList<String>> pathToAliases) {
this.pathToAliases = pathToAliases;
}
@@ -166,7 +169,6 @@ public class ConditionalResolverCommonJoin implements ConditionalResolver, Seria
Set<String> participants = getParticipants(ctx);
Map<String, Long> aliasToKnownSize = ctx.getAliasToKnownSize();
- Map<String, ArrayList<String>> pathToAliases = ctx.getPathToAliases();
Map<Task<? extends Serializable>, Set<String>> taskToAliases = ctx.getTaskToAliases();
long threshold = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVESMALLTABLESFILESIZE);
@@ -212,10 +214,10 @@ public class ConditionalResolverCommonJoin implements ConditionalResolver, Seria
Set<String> aliases = getParticipants(ctx);
Map<String, Long> aliasToKnownSize = ctx.getAliasToKnownSize();
- Map<String, ArrayList<String>> pathToAliases = ctx.getPathToAliases();
+ Map<Path, ArrayList<String>> pathToAliases = ctx.getPathToAliases();
- Set<String> unknownPaths = new HashSet<String>();
- for (Map.Entry<String, ArrayList<String>> entry : pathToAliases.entrySet()) {
+ Set<Path> unknownPaths = new HashSet<>();
+ for (Map.Entry<Path, ArrayList<String>> entry : pathToAliases.entrySet()) {
for (String alias : entry.getValue()) {
if (aliases.contains(alias) && !aliasToKnownSize.containsKey(alias)) {
unknownPaths.add(entry.getKey());
@@ -227,13 +229,12 @@ public class ConditionalResolverCommonJoin implements ConditionalResolver, Seria
Path localTmpDir = ctx.getLocalTmpDir();
// need to compute the input size at runtime, and select the biggest as
// the big table.
- for (String p : unknownPaths) {
+ for (Path path: unknownPaths) {
// this path is intermediate data
- if (p.startsWith(hdfsTmpDir.toString()) || p.startsWith(localTmpDir.toString())) {
- Path path = new Path(p);
+ if (FileUtils.isPathWithinSubtree(path,hdfsTmpDir) || FileUtils.isPathWithinSubtree(path,localTmpDir)) {
FileSystem fs = path.getFileSystem(conf);
long fileSize = fs.getContentSummary(path).getLength();
- for (String alias : pathToAliases.get(p)) {
+ for (String alias : pathToAliases.get(path)) {
Long length = aliasToKnownSize.get(alias);
if (length == null) {
aliasToKnownSize.put(alias, fileSize);
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java
index 3f07ea7..68b0ad9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java
@@ -238,19 +238,19 @@ public class ConditionalResolverMergeFiles implements ConditionalResolver,
FileStatus[] status = HiveStatsUtils.getFileStatusRecurse(dirPath, dpLbLevel, inpFs);
// cleanup pathToPartitionInfo
- Map<String, PartitionDesc> ptpi = work.getPathToPartitionInfo();
+ Map<Path, PartitionDesc> ptpi = work.getPathToPartitionInfo();
assert ptpi.size() == 1;
- String path = ptpi.keySet().iterator().next();
+ Path path = ptpi.keySet().iterator().next();
PartitionDesc partDesc = ptpi.get(path);
TableDesc tblDesc = partDesc.getTableDesc();
- ptpi.remove(path); // the root path is not useful anymore
+ work.removePathToPartitionInfo(path); // the root path is not useful anymore
// cleanup pathToAliases
- Map<String, ArrayList<String>> pta = work.getPathToAliases();
+ LinkedHashMap<Path, ArrayList<String>> pta = work.getPathToAliases();
assert pta.size() == 1;
path = pta.keySet().iterator().next();
ArrayList<String> aliases = pta.get(path);
- pta.remove(path); // the root path is not useful anymore
+ work.removePathToAlias(path); // the root path is not useful anymore
// populate pathToPartitionInfo and pathToAliases w/ DP paths
long totalSz = 0;
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java
index 8d329d9..156e3bb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java
@@ -74,9 +74,9 @@ public class MapWork extends BaseWork {
// use LinkedHashMap to make sure the iteration order is
// deterministic, to ease testing
- private LinkedHashMap<String, ArrayList<String>> pathToAliases = new LinkedHashMap<String, ArrayList<String>>();
+ private LinkedHashMap<Path, ArrayList<String>> pathToAliases = new LinkedHashMap<>();
- private LinkedHashMap<String, PartitionDesc> pathToPartitionInfo = new LinkedHashMap<String, PartitionDesc>();
+ private LinkedHashMap<Path, PartitionDesc> pathToPartitionInfo = new LinkedHashMap<>();
private LinkedHashMap<String, Operator<? extends OperatorDesc>> aliasToWork = new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
@@ -153,16 +153,47 @@ public class MapWork extends BaseWork {
super(name);
}
+
+ // HIVE-12244: this @Explain should be on the new method; but it changes the explain result
+ // HIVE-12244: example test which can be used to validate change: -Dtest=TestMiniLlapCliDriver -Dqfile=dynamic_partition_pruning.q
@Explain(displayName = "Path -> Alias", explainLevels = { Level.EXTENDED })
- public LinkedHashMap<String, ArrayList<String>> getPathToAliases() {
+ @Deprecated
+ public LinkedHashMap<String, ArrayList<String>> getPathToAliasesOld() {
+ LinkedHashMap<String, ArrayList<String>> ret = new LinkedHashMap<>();
+ for (Entry<Path, ArrayList<String>> p2a : pathToAliases.entrySet()) {
+ ret.put(p2a.getKey().toString(), p2a.getValue());
+ }
+ return ret;
+ }
+
+ // @Explain(displayName = "Path -> Alias", explainLevels = { Level.EXTENDED })
+ public LinkedHashMap<Path, ArrayList<String>> getPathToAliases() {
+ //
return pathToAliases;
}
- public void setPathToAliases(
- final LinkedHashMap<String, ArrayList<String>> pathToAliases) {
+ public void setPathToAliases(final LinkedHashMap<Path, ArrayList<String>> pathToAliases) {
this.pathToAliases = pathToAliases;
}
+ public void addPathToAlias(Path path, ArrayList<String> aliases){
+ pathToAliases.put(path, aliases);
+ }
+
+ public void addPathToAlias(Path path, String newAlias){
+ ArrayList<String> aliases = pathToAliases.get(path);
+ if (aliases == null) {
+ aliases=new ArrayList<String>();
+ pathToAliases.put(path, aliases);
+ }
+ aliases.add(newAlias);
+ }
+
+
+ public void removePathToAlias(Path path){
+ pathToAliases.remove(path);
+ }
+
/**
* This is used to display and verify output of "Path -> Alias" in test framework.
*
@@ -178,27 +209,49 @@ public class MapWork extends BaseWork {
public Map<String, ArrayList<String>> getTruncatedPathToAliases() {
Map<String, ArrayList<String>> trunPathToAliases = new LinkedHashMap<String,
ArrayList<String>>();
- Iterator<Entry<String, ArrayList<String>>> itr = this.pathToAliases.entrySet().iterator();
+ Iterator<Entry<Path, ArrayList<String>>> itr = this.pathToAliases.entrySet().iterator();
while (itr.hasNext()) {
- final Entry<String, ArrayList<String>> entry = itr.next();
- String origiKey = entry.getKey();
- String newKey = PlanUtils.removePrefixFromWarehouseConfig(origiKey);
+ final Entry<Path, ArrayList<String>> entry = itr.next();
+ Path origiKey = entry.getKey();
+ String newKey = PlanUtils.removePrefixFromWarehouseConfig(origiKey.toString());
ArrayList<String> value = entry.getValue();
trunPathToAliases.put(newKey, value);
}
return trunPathToAliases;
}
+ // HIVE-12244: this @Explain should be on the new method; but it changes the explain result
+ // HIVE-12244: example test which can be used to validate change: combine2.q
@Explain(displayName = "Path -> Partition", explainLevels = { Level.EXTENDED })
- public LinkedHashMap<String, PartitionDesc> getPathToPartitionInfo() {
+ @Deprecated
+ public LinkedHashMap<String, PartitionDesc> getPathToPartitionInfoOld() {
+ LinkedHashMap<String, PartitionDesc> ret = new LinkedHashMap<>();
+ for (Entry<Path, PartitionDesc> p2a : pathToPartitionInfo.entrySet()) {
+ ret.put(p2a.getKey().toString(), p2a.getValue());
+ }
+ return ret;
+ }
+
+ //@Explain(displayName = "Path -> Partition", explainLevels = { Level.EXTENDED })
+ public LinkedHashMap<Path, PartitionDesc> getPathToPartitionInfo() {
return pathToPartitionInfo;
}
- public void setPathToPartitionInfo(
- final LinkedHashMap<String, PartitionDesc> pathToPartitionInfo) {
+ public void setPathToPartitionInfo(final LinkedHashMap<Path, PartitionDesc> pathToPartitionInfo) {
this.pathToPartitionInfo = pathToPartitionInfo;
}
+ public void addPathToPartitionInfo(Path path, PartitionDesc partitionInfo) {
+ if (pathToPartitionInfo == null) {
+ pathToPartitionInfo=new LinkedHashMap<>();
+ }
+ pathToPartitionInfo.put(path, partitionInfo);
+ }
+
+ public void removePathToPartitionInfo(Path path) {
+ pathToPartitionInfo.remove(path);
+ }
+
/**
* Derive additional attributes to be rendered by EXPLAIN.
* TODO: this method is relied upon by custom input formats to set jobconf properties.
@@ -206,7 +259,7 @@ public class MapWork extends BaseWork {
*/
public void deriveExplainAttributes() {
if (pathToPartitionInfo != null) {
- for (Map.Entry<String, PartitionDesc> entry : pathToPartitionInfo.entrySet()) {
+ for (Map.Entry<Path, PartitionDesc> entry : pathToPartitionInfo.entrySet()) {
entry.getValue().deriveBaseFileName(entry.getKey());
}
}
@@ -347,7 +400,7 @@ public class MapWork extends BaseWork {
}
@SuppressWarnings("nls")
- public void addMapWork(String path, String alias, Operator<?> work,
+ public void addMapWork(Path path, String alias, Operator<?> work,
PartitionDesc pd) {
ArrayList<String> curAliases = pathToAliases.get(path);
if (curAliases == null) {
@@ -383,8 +436,8 @@ public class MapWork extends BaseWork {
public void resolveDynamicPartitionStoredAsSubDirsMerge(HiveConf conf, Path path,
TableDesc tblDesc, ArrayList<String> aliases, PartitionDesc partDesc) {
- pathToAliases.put(path.toString(), aliases);
- pathToPartitionInfo.put(path.toString(), partDesc);
+ pathToAliases.put(path, aliases);
+ pathToPartitionInfo.put(path, partDesc);
}
/**
@@ -446,7 +499,7 @@ public class MapWork extends BaseWork {
return aliasToWork.isEmpty() ? null : aliasToWork.values().iterator().next();
}
- public void mergeAliasedInput(String alias, String pathDir, PartitionDesc partitionInfo) {
+ public void mergeAliasedInput(String alias, Path pathDir, PartitionDesc partitionInfo) {
ArrayList<String> aliases = pathToAliases.get(pathDir);
if (aliases == null) {
aliases = new ArrayList<String>(Arrays.asList(alias));
@@ -529,10 +582,13 @@ public class MapWork extends BaseWork {
return new ArrayList<Operator<?>>(aliasToWork.values());
}
- public ArrayList<String> getPaths() {
- return new ArrayList<String>(pathToAliases.keySet());
+ public ArrayList<Path> getPaths() {
+ ArrayList<Path> ret=new ArrayList<>();
+ ret.addAll(pathToAliases.keySet());
+ return ret;
}
+
public ArrayList<PartitionDesc> getPartitionDescs() {
return new ArrayList<PartitionDesc>(aliasToPartnInfo.values());
}
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java
index fe09bdf..921461f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java
@@ -305,20 +305,13 @@ public class PartitionDesc implements Serializable, Cloneable {
* @param path
* URI to the partition file
*/
- public void deriveBaseFileName(String path) {
+ public void deriveBaseFileName(Path path) {
PlanUtils.configureInputJobPropertiesForStorageHandler(tableDesc);
if (path == null) {
return;
}
- try {
- Path p = new Path(path);
- baseFileName = p.getName();
- } catch (Exception ex) {
- // don't really care about the exception. the goal is to capture the
- // the last component at the minimum - so set to the complete path
- baseFileName = path;
- }
+ baseFileName = path.getName();
}
public void intern(Interner<TableDesc> interner) {
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java
index 71dfc50..df4129e 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java
@@ -162,7 +162,7 @@ public class TestExecDriver extends TestCase {
}
public static void addMapWork(MapredWork mr, Table tbl, String alias, Operator<?> work) {
- mr.getMapWork().addMapWork(tbl.getDataLocation().toString(), alias, work, new PartitionDesc(
+ mr.getMapWork().addMapWork(tbl.getDataLocation(), alias, work, new PartitionDesc(
Utilities.getTableDesc(tbl), null));
}
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java
index 2ccb05a..532d478 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java
@@ -295,17 +295,16 @@ public class TestOperators extends TestCase {
ArrayList<String> aliases = new ArrayList<String>();
aliases.add("a");
aliases.add("b");
- LinkedHashMap<String, ArrayList<String>> pathToAliases =
- new LinkedHashMap<String, ArrayList<String>>();
- pathToAliases.put("hdfs:///testDir", aliases);
+ LinkedHashMap<Path, ArrayList<String>> pathToAliases = new LinkedHashMap<>();
+ pathToAliases.put(new Path("hdfs:///testDir"), aliases);
// initialize pathToTableInfo
// Default: treat the table as a single column "col"
TableDesc td = Utilities.defaultTd;
PartitionDesc pd = new PartitionDesc(td, null);
- LinkedHashMap<String, org.apache.hadoop.hive.ql.plan.PartitionDesc> pathToPartitionInfo =
- new LinkedHashMap<String, org.apache.hadoop.hive.ql.plan.PartitionDesc>();
- pathToPartitionInfo.put("hdfs:///testDir", pd);
+ LinkedHashMap<Path, org.apache.hadoop.hive.ql.plan.PartitionDesc> pathToPartitionInfo =
+ new LinkedHashMap<>();
+ pathToPartitionInfo.put(new Path("hdfs:///testDir"), pd);
// initialize aliasToWork
CompilationOpContext ctx = new CompilationOpContext();
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/test/org/apache/hadoop/hive/ql/exec/TestPlan.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestPlan.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestPlan.java
index 06e5e07..0b52e48 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestPlan.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestPlan.java
@@ -64,13 +64,13 @@ public class TestPlan extends TestCase {
ArrayList<String> aliasList = new ArrayList<String>();
aliasList.add("a");
- LinkedHashMap<String, ArrayList<String>> pa = new LinkedHashMap<String, ArrayList<String>>();
- pa.put("/tmp/testfolder", aliasList);
+ LinkedHashMap<Path, ArrayList<String>> pa = new LinkedHashMap<>();
+ pa.put(new Path("/tmp/testfolder"), aliasList);
TableDesc tblDesc = Utilities.defaultTd;
PartitionDesc partDesc = new PartitionDesc(tblDesc, null);
- LinkedHashMap<String, PartitionDesc> pt = new LinkedHashMap<String, PartitionDesc>();
- pt.put("/tmp/testfolder", partDesc);
+ LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>();
+ pt.put(new Path("/tmp/testfolder"), partDesc);
LinkedHashMap<String, Operator<? extends OperatorDesc>> ao =
new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java
index 69f5465..7150424 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java
@@ -290,8 +290,8 @@ public class TestUtilities {
MapWork mapWork2 = new MapWork();
JobConf jobConf = new JobConf();
- String nonExistentPath1 = UUID.randomUUID().toString();
- String nonExistentPath2 = UUID.randomUUID().toString();
+ Path nonExistentPath1 = new Path(UUID.randomUUID().toString());
+ Path nonExistentPath2 = new Path(UUID.randomUUID().toString());
PartitionDesc mockPartitionDesc = mock(PartitionDesc.class);
TableDesc mockTableDesc = mock(TableDesc.class);
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java
index dc39cbd..940d78d 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java
@@ -138,11 +138,10 @@ public class TestTezTask {
mws[0].setAliasToWork(map);
mws[1].setAliasToWork(map);
- LinkedHashMap<String, ArrayList<String>> pathMap
- = new LinkedHashMap<String, ArrayList<String>>();
+ LinkedHashMap<Path, ArrayList<String>> pathMap = new LinkedHashMap<>();
ArrayList<String> aliasList = new ArrayList<String>();
aliasList.add("foo");
- pathMap.put("foo", aliasList);
+ pathMap.put(new Path("foo"), aliasList);
mws[0].setPathToAliases(pathMap);
mws[1].setPathToAliases(pathMap);
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/test/org/apache/hadoop/hive/ql/io/TestCombineHiveInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/TestCombineHiveInputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/TestCombineHiveInputFormat.java
index 9b8a519..4459089 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/TestCombineHiveInputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/TestCombineHiveInputFormat.java
@@ -47,9 +47,9 @@ public class TestCombineHiveInputFormat extends TestCase {
TableDesc tblDesc = Utilities.defaultTd;
tblDesc.setInputFileFormatClass(TestSkipCombineInputFormat.class);
PartitionDesc partDesc = new PartitionDesc(tblDesc, null);
- LinkedHashMap<String, PartitionDesc> pt = new LinkedHashMap<String, PartitionDesc>();
- pt.put("/tmp/testfolder1", partDesc);
- pt.put("/tmp/testfolder2", partDesc);
+ LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>();
+ pt.put(new Path("/tmp/testfolder1"), partDesc);
+ pt.put(new Path("/tmp/testfolder2"), partDesc);
MapredWork mrwork = new MapredWork();
mrwork.getMapWork().setPathToPartitionInfo(pt);
Path mapWorkPath = new Path("/tmp/" + System.getProperty("user.name"), "hive");
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveBinarySearchRecordReader.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveBinarySearchRecordReader.java b/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveBinarySearchRecordReader.java
index 9dc4f5b..6865c2b 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveBinarySearchRecordReader.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveBinarySearchRecordReader.java
@@ -137,8 +137,8 @@ public class TestHiveBinarySearchRecordReader extends TestCase {
TableDesc tblDesc = Utilities.defaultTd;
PartitionDesc partDesc = new PartitionDesc(tblDesc, null);
- LinkedHashMap<String, PartitionDesc> pt = new LinkedHashMap<String, PartitionDesc>();
- pt.put("/tmp/testfolder", partDesc);
+ LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>();
+ pt.put(new Path("/tmp/testfolder"), partDesc);
MapredWork mrwork = new MapredWork();
mrwork.getMapWork().setPathToPartitionInfo(pt);
Utilities.setMapRedWork(conf, mrwork,new Path("/tmp/" + System.getProperty("user.name"), "hive"));
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveFileFormatUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveFileFormatUtils.java b/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveFileFormatUtils.java
index efc18ee..dae85a6 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveFileFormatUtils.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveFileFormatUtils.java
@@ -36,16 +36,12 @@ public class TestHiveFileFormatUtils extends TestCase {
PartitionDesc partDesc_5 = new PartitionDesc();
PartitionDesc partDesc_6 = new PartitionDesc();
- Map<String, PartitionDesc> pathToPartitionInfo = new HashMap<String, PartitionDesc>();
-
- pathToPartitionInfo.put(
- new Path("file:///tbl/par1/part2/part3").toString(), partDesc_3);
- pathToPartitionInfo.put(new Path("/tbl/par1/part2/part4").toString(),
- partDesc_4);
- pathToPartitionInfo.put(new Path("/tbl/par1/part2/part5/").toString(),
- partDesc_5);
- pathToPartitionInfo.put(new Path("hdfs:///tbl/par1/part2/part6/")
- .toString(), partDesc_6);
+ Map<Path, PartitionDesc> pathToPartitionInfo = new HashMap<>();
+
+ pathToPartitionInfo.put(new Path("file:///tbl/par1/part2/part3"), partDesc_3);
+ pathToPartitionInfo.put(new Path("/tbl/par1/part2/part4"), partDesc_4);
+ pathToPartitionInfo.put(new Path("/tbl/par1/part2/part5/"), partDesc_5);
+ pathToPartitionInfo.put(new Path("hdfs:///tbl/par1/part2/part6/"), partDesc_6);
// first group
PartitionDesc ret = null;
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java
index 08b8c32..10f43bb 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/TestSymlinkTextInputFormat.java
@@ -77,8 +77,8 @@ public class TestSymlinkTextInputFormat extends TestCase {
TableDesc tblDesc = Utilities.defaultTd;
PartitionDesc partDesc = new PartitionDesc(tblDesc, null);
- LinkedHashMap<String, PartitionDesc> pt = new LinkedHashMap<String, PartitionDesc>();
- pt.put("/tmp/testfolder", partDesc);
+ LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>();
+ pt.put(new Path("/tmp/testfolder"), partDesc);
MapredWork mrwork = new MapredWork();
mrwork.getMapWork().setPathToPartitionInfo(pt);
Utilities.setMapRedWork(job, mrwork,new Path("/tmp/" + System.getProperty("user.name"), "hive"));
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
index 3c89dd6..a001f8e 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
@@ -1940,14 +1940,13 @@ public class TestInputOutputFormat {
mapWork.setVectorizedRowBatchCtx(vectorizedRowBatchCtx);
}
mapWork.setUseBucketizedHiveInputFormat(false);
- LinkedHashMap<String, ArrayList<String>> aliasMap =
- new LinkedHashMap<String, ArrayList<String>>();
+ LinkedHashMap<Path, ArrayList<String>> aliasMap = new LinkedHashMap<>();
ArrayList<String> aliases = new ArrayList<String>();
aliases.add(tableName);
- LinkedHashMap<String, PartitionDesc> partMap =
- new LinkedHashMap<String, PartitionDesc>();
+ LinkedHashMap<Path, PartitionDesc> partMap = new LinkedHashMap<>();
for(int p=0; p < partitions; ++p) {
- aliasMap.put(partPath[p], aliases);
+ Path path = new Path(partPath[p]);
+ aliasMap.put(path, aliases);
LinkedHashMap<String, String> partSpec =
new LinkedHashMap<String, String>();
PartitionDesc part = new PartitionDesc(tbl, partSpec);
@@ -1955,7 +1954,7 @@ public class TestInputOutputFormat {
part.setVectorPartitionDesc(
VectorPartitionDesc.createVectorizedInputFileFormat("MockInputFileFormatClassName", false));
}
- partMap.put(partPath[p], part);
+ partMap.put(path, part);
}
mapWork.setPathToAliases(aliasMap);
mapWork.setPathToPartitionInfo(partMap);
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/test/org/apache/hadoop/hive/ql/plan/TestConditionalResolverCommonJoin.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/plan/TestConditionalResolverCommonJoin.java b/ql/src/test/org/apache/hadoop/hive/ql/plan/TestConditionalResolverCommonJoin.java
index ef846a6..229c328 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/plan/TestConditionalResolverCommonJoin.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/plan/TestConditionalResolverCommonJoin.java
@@ -19,6 +19,8 @@
package org.apache.hadoop.hive.ql.plan;
import junit.framework.Assert;
+
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.DDLTask;
import org.apache.hadoop.hive.ql.exec.Task;
@@ -38,9 +40,9 @@ public class TestConditionalResolverCommonJoin {
public void testResolvingDriverAlias() throws Exception {
ConditionalResolverCommonJoin resolver = new ConditionalResolverCommonJoin();
- HashMap<String, ArrayList<String>> pathToAliases = new HashMap<String, ArrayList<String>>();
- pathToAliases.put("path1", new ArrayList<String>(Arrays.asList("alias1", "alias2")));
- pathToAliases.put("path2", new ArrayList<String>(Arrays.asList("alias3")));
+ HashMap<Path, ArrayList<String>> pathToAliases = new HashMap<>();
+ pathToAliases.put(new Path("path1"), new ArrayList<String>(Arrays.asList("alias1", "alias2")));
+ pathToAliases.put(new Path("path2"), new ArrayList<String>(Arrays.asList("alias3")));
HashMap<String, Long> aliasToKnownSize = new HashMap<String, Long>();
aliasToKnownSize.put("alias1", 1024l);
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/test/org/apache/hadoop/hive/ql/plan/TestMapWork.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/plan/TestMapWork.java b/ql/src/test/org/apache/hadoop/hive/ql/plan/TestMapWork.java
new file mode 100644
index 0000000..165c028
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/plan/TestMapWork.java
@@ -0,0 +1,34 @@
+package org.apache.hadoop.hive.ql.plan;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+
+import org.apache.hadoop.fs.Path;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public class TestMapWork {
+ @Test
+ public void testGetAndSetConsistency() {
+ MapWork mw = new MapWork();
+ LinkedHashMap<Path, ArrayList<String>> pathToAliases = new LinkedHashMap<>();
+ pathToAliases.put(new Path("p0"), Lists.newArrayList("a1", "a2"));
+ mw.setPathToAliases(pathToAliases);
+
+ LinkedHashMap<Path, ArrayList<String>> pta = mw.getPathToAliases();
+ assertEquals(pathToAliases, pta);
+
+ }
+
+ @Test
+ public void testPath() {
+ Path p1 = new Path("hdfs://asd/asd");
+ Path p2 = new Path("hdfs://asd/asd/");
+
+ assertEquals(p1, p2);
+ }
+
+}
[2/2] hive git commit: HIVE-12244 : Refactoring code for avoiding of
comparison of Strings and do comparison on Path (Zoltan Haindrich via
Ashutosh Chauhan)
Posted by ha...@apache.org.
HIVE-12244 : Refactoring code for avoiding of comparison of Strings and do comparison on Path (Zoltan Haindrich via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/47b5b5cd
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/47b5b5cd
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/47b5b5cd
Branch: refs/heads/master
Commit: 47b5b5cdb8702c9ddbca3911b6a03ca59982b3c1
Parents: 70a9722
Author: Zoltan Haindrich <ki...@rxd.hu>
Authored: Sat Jul 16 11:41:56 2016 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Sat Jul 16 11:41:56 2016 -0700
----------------------------------------------------------------------
.../apache/hadoop/hive/common/FileUtils.java | 62 ++++++++++
.../hive/llap/io/api/impl/LlapInputFormat.java | 2 +-
.../hive/ql/exec/AbstractMapOperator.java | 12 +-
.../org/apache/hadoop/hive/ql/exec/DDLTask.java | 5 +-
.../apache/hadoop/hive/ql/exec/MapOperator.java | 12 +-
.../apache/hadoop/hive/ql/exec/Utilities.java | 50 ++++----
.../hadoop/hive/ql/exec/mr/ExecDriver.java | 8 +-
.../exec/spark/SparkDynamicPartitionPruner.java | 8 +-
.../hadoop/hive/ql/exec/tez/DagUtils.java | 8 +-
.../ql/exec/tez/DynamicPartitionPruner.java | 10 +-
.../hadoop/hive/ql/exec/tez/SplitGrouper.java | 2 +-
.../hive/ql/exec/vector/VectorMapOperator.java | 6 +-
.../ql/exec/vector/VectorizedRowBatchCtx.java | 3 +-
.../hive/ql/index/HiveIndexedInputFormat.java | 4 -
.../hive/ql/io/BucketizedHiveInputFormat.java | 3 +-
.../hive/ql/io/CombineHiveInputFormat.java | 27 ++---
.../ql/io/HiveContextAwareRecordReader.java | 2 +-
.../hadoop/hive/ql/io/HiveFileFormatUtils.java | 116 ++++++-------------
.../hadoop/hive/ql/io/HiveInputFormat.java | 58 +++-------
.../hadoop/hive/ql/io/IOPrepareCache.java | 11 +-
.../hadoop/hive/ql/io/SymbolicInputFormat.java | 34 +++---
.../ql/io/avro/AvroGenericRecordReader.java | 10 +-
.../hadoop/hive/ql/io/merge/MergeFileWork.java | 5 +-
.../hive/ql/io/parquet/ProjectionPusher.java | 21 ++--
.../ql/io/rcfile/stats/PartialScanWork.java | 5 +-
.../io/rcfile/truncate/ColumnTruncateWork.java | 5 +-
.../hive/ql/optimizer/GenMapRedUtils.java | 71 +++++-------
.../hive/ql/optimizer/MapJoinProcessor.java | 20 ++--
.../physical/AbstractJoinTaskDispatcher.java | 7 +-
.../physical/CommonJoinTaskDispatcher.java | 14 +--
.../physical/GenMRSkewJoinProcessor.java | 4 +-
.../physical/GenSparkSkewJoinProcessor.java | 4 +-
.../hive/ql/optimizer/physical/LlapDecider.java | 3 +-
.../physical/NullScanTaskDispatcher.java | 17 ++-
.../physical/SortMergeJoinTaskDispatcher.java | 16 +--
.../hive/ql/optimizer/physical/Vectorizer.java | 9 +-
.../spark/SparkSkewJoinProcFactory.java | 2 +-
.../ql/plan/ConditionalResolverCommonJoin.java | 23 ++--
.../ql/plan/ConditionalResolverMergeFiles.java | 10 +-
.../org/apache/hadoop/hive/ql/plan/MapWork.java | 94 ++++++++++++---
.../hadoop/hive/ql/plan/PartitionDesc.java | 11 +-
.../hadoop/hive/ql/exec/TestExecDriver.java | 2 +-
.../hadoop/hive/ql/exec/TestOperators.java | 11 +-
.../apache/hadoop/hive/ql/exec/TestPlan.java | 8 +-
.../hadoop/hive/ql/exec/TestUtilities.java | 4 +-
.../hadoop/hive/ql/exec/tez/TestTezTask.java | 5 +-
.../hive/ql/io/TestCombineHiveInputFormat.java | 6 +-
.../ql/io/TestHiveBinarySearchRecordReader.java | 4 +-
.../hive/ql/io/TestHiveFileFormatUtils.java | 16 +--
.../hive/ql/io/TestSymlinkTextInputFormat.java | 4 +-
.../hive/ql/io/orc/TestInputOutputFormat.java | 11 +-
.../plan/TestConditionalResolverCommonJoin.java | 8 +-
.../apache/hadoop/hive/ql/plan/TestMapWork.java | 34 ++++++
53 files changed, 473 insertions(+), 434 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
index d755798..26ce26f 100644
--- a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
+++ b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
@@ -26,8 +26,10 @@ import java.net.URISyntaxException;
import java.security.AccessControlException;
import java.security.PrivilegedExceptionAction;
import java.util.BitSet;
+import java.util.Collection;
import java.util.List;
import java.util.Random;
+import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
@@ -849,4 +851,64 @@ public final class FileUtils {
}
return false;
}
+
+
+ /**
+ * Return whenever all paths in the collection are schemaless
+ *
+ * @param paths
+ * @return
+ */
+ public static boolean pathsContainNoScheme(Collection<Path> paths) {
+ for( Path path : paths){
+ if(path.toUri().getScheme() != null){
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Returns the deepest candidate path for the given path.
+ *
+ * prioritizes on paths including schema / then includes matches without schema
+ *
+ * @param path
+ * @param candidates the candidate paths
+ * @return
+ */
+ public static Path getParentRegardlessOfScheme(Path path, Set<Path> candidates) {
+ Path schemalessPath = Path.getPathWithoutSchemeAndAuthority(path);
+
+ for(;path!=null && schemalessPath!=null; path=path.getParent(),schemalessPath=schemalessPath.getParent()){
+ if(candidates.contains(path))
+ return path;
+ if(candidates.contains(schemalessPath))
+ return schemalessPath;
+ }
+ // exception?
+ return null;
+ }
+
+ /**
+ * Checks whenever path is inside the given subtree
+ *
+ * return true iff
+ * * path = subtree
+ * * subtreeContains(path,d) for any descendant of the subtree node
+ * @param path the path in question
+ * @param subtree
+ *
+ * @return
+ */
+ public static boolean isPathWithinSubtree(Path path, Path subtree) {
+ while(path!=null){
+ if(subtree.equals(path)){
+ return true;
+ }
+ path=path.getParent();
+ }
+ return false;
+ }
+
}
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
index c5d0680..9d16889 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
@@ -428,7 +428,7 @@ public class LlapInputFormat implements InputFormat<NullWritable, VectorizedRowB
// Determine the partition columns using the first partition descriptor.
// Note - like vectorizer, this assumes partition columns go after data columns.
int partitionColumnCount = 0;
- Iterator<String> paths = mapWork.getPathToAliases().keySet().iterator();
+ Iterator<Path> paths = mapWork.getPathToAliases().keySet().iterator();
if (paths.hasNext()) {
PartitionDesc partDesc = mapWork.getPathToPartitionInfo().get(paths.next());
if (partDesc != null) {
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapOperator.java
index 5c3012b..6de2c18 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapOperator.java
@@ -80,14 +80,14 @@ public abstract class AbstractMapOperator extends Operator<MapWork>
private final Map<Integer, DummyStoreOperator> connectedOperators
= new TreeMap<Integer, DummyStoreOperator>();
- private transient final Map<String, Path> normalizedPaths = new HashMap<String, Path>();
+ private transient final Map<Path, Path> normalizedPaths = new HashMap<>();
- private Path normalizePath(String onefile, boolean schemaless) {
+ private Path normalizePath(Path onefile, boolean schemaless) {
//creating Path is expensive, so cache the corresponding
//Path object in normalizedPaths
Path path = normalizedPaths.get(onefile);
if (path == null) {
- path = new Path(onefile);
+ path = onefile;
if (schemaless && path.toUri().getScheme() != null) {
path = new Path(path.toUri().getPath());
}
@@ -97,9 +97,9 @@ public abstract class AbstractMapOperator extends Operator<MapWork>
}
protected String getNominalPath(Path fpath) {
- String nominal = null;
+ Path nominal = null;
boolean schemaless = fpath.toUri().getScheme() == null;
- for (String onefile : conf.getPathToAliases().keySet()) {
+ for (Path onefile : conf.getPathToAliases().keySet()) {
Path onepath = normalizePath(onefile, schemaless);
Path curfpath = fpath;
if(!schemaless && onepath.toUri().getScheme() == null) {
@@ -118,7 +118,7 @@ public abstract class AbstractMapOperator extends Operator<MapWork>
if (nominal == null) {
throw new IllegalStateException("Invalid input path " + fpath);
}
- return nominal;
+ return nominal.toString();
}
public abstract void initEmptyInputChildren(List<Operator<?>> children, Configuration hconf)
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
index bb43950..2b8d6a7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
@@ -640,11 +640,10 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
// merge work only needs input and output.
MergeFileWork mergeWork = new MergeFileWork(mergeFilesDesc.getInputDir(),
mergeFilesDesc.getOutputDir(), mergeFilesDesc.getInputFormatClass().getName());
- LinkedHashMap<String, ArrayList<String>> pathToAliases =
- new LinkedHashMap<String, ArrayList<String>>();
+ LinkedHashMap<Path, ArrayList<String>> pathToAliases = new LinkedHashMap<>();
ArrayList<String> inputDirstr = new ArrayList<String>(1);
inputDirstr.add(mergeFilesDesc.getInputDir().toString());
- pathToAliases.put(mergeFilesDesc.getInputDir().get(0).toString(), inputDirstr);
+ pathToAliases.put(mergeFilesDesc.getInputDir().get(0), inputDirstr);
mergeWork.setPathToAliases(pathToAliases);
mergeWork.setListBucketingCtx(mergeFilesDesc.getLbCtx());
mergeWork.resolveConcatenateMerge(db.getConf());
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java
index fabfbc4..f3eed75 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java
@@ -18,7 +18,6 @@
package org.apache.hadoop.hive.ql.exec;
-import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
@@ -29,7 +28,6 @@ import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Set;
-import java.util.TreeMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
@@ -289,7 +287,7 @@ public class MapOperator extends AbstractMapOperator {
try {
Map<ObjectInspector, Boolean> oiSettableProperties = new HashMap<ObjectInspector, Boolean>();
- for (String onefile : conf.getPathToAliases().keySet()) {
+ for (Path onefile : conf.getPathToAliases().keySet()) {
PartitionDesc pd = conf.getPathToPartitionInfo().get(onefile);
TableDesc tableDesc = pd.getTableDesc();
Deserializer partDeserializer = pd.getDeserializer(hconf);
@@ -363,8 +361,8 @@ public class MapOperator extends AbstractMapOperator {
Map<TableDesc, StructObjectInspector> convertedOI = getConvertedOI(hconf);
- for (Map.Entry<String, ArrayList<String>> entry : conf.getPathToAliases().entrySet()) {
- String onefile = entry.getKey();
+ for (Map.Entry<Path, ArrayList<String>> entry : conf.getPathToAliases().entrySet()) {
+ Path onefile = entry.getKey();
List<String> aliases = entry.getValue();
PartitionDesc partDesc = conf.getPathToPartitionInfo().get(onefile);
@@ -374,9 +372,9 @@ public class MapOperator extends AbstractMapOperator {
LOG.debug("Adding alias " + alias + " to work list for file "
+ onefile);
}
- Map<Operator<?>, MapOpCtx> contexts = opCtxMap.get(onefile);
+ Map<Operator<?>, MapOpCtx> contexts = opCtxMap.get(onefile.toString());
if (contexts == null) {
- opCtxMap.put(onefile, contexts = new LinkedHashMap<Operator<?>, MapOpCtx>());
+ opCtxMap.put(onefile.toString(), contexts = new LinkedHashMap<Operator<?>, MapOpCtx>());
}
if (contexts.containsKey(op)) {
continue;
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index 973bc23..bc977a0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@ -2102,14 +2102,14 @@ public final class Utilities {
long[] summary = {0, 0, 0};
- final List<String> pathNeedProcess = new ArrayList<String>();
+ final List<Path> pathNeedProcess = new ArrayList<>();
// Since multiple threads could call this method concurrently, locking
// this method will avoid number of threads out of control.
synchronized (INPUT_SUMMARY_LOCK) {
// For each input path, calculate the total size.
- for (String path : work.getPathToAliases().keySet()) {
- Path p = new Path(path);
+ for (Path path : work.getPathToAliases().keySet()) {
+ Path p = path;
if (filter != null && !filter.accept(p)) {
continue;
@@ -2145,9 +2145,9 @@ public final class Utilities {
HiveInterruptCallback interrup = HiveInterruptUtils.add(new HiveInterruptCallback() {
@Override
public void interrupt() {
- for (String path : pathNeedProcess) {
+ for (Path path : pathNeedProcess) {
try {
- new Path(path).getFileSystem(ctx.getConf()).close();
+ path.getFileSystem(ctx.getConf()).close();
} catch (IOException ignore) {
LOG.debug("Failed to close filesystem", ignore);
}
@@ -2160,9 +2160,9 @@ public final class Utilities {
try {
Configuration conf = ctx.getConf();
JobConf jobConf = new JobConf(conf);
- for (String path : pathNeedProcess) {
- final Path p = new Path(path);
- final String pathStr = path;
+ for (Path path : pathNeedProcess) {
+ final Path p = path;
+ final String pathStr = path.toString();
// All threads share the same Configuration and JobConf based on the
// assumption that they are thread safe if only read operations are
// executed. It is not stated in Hadoop's javadoc, the sourcce codes
@@ -2172,9 +2172,8 @@ public final class Utilities {
final Configuration myConf = conf;
final JobConf myJobConf = jobConf;
final Map<String, Operator<?>> aliasToWork = work.getAliasToWork();
- final Map<String, ArrayList<String>> pathToAlias = work.getPathToAliases();
- final PartitionDesc partDesc = work.getPathToPartitionInfo().get(
- p.toString());
+ final Map<Path, ArrayList<String>> pathToAlias = work.getPathToAliases();
+ final PartitionDesc partDesc = work.getPathToPartitionInfo().get(p);
Runnable r = new Runnable() {
@Override
public void run() {
@@ -2991,10 +2990,10 @@ public final class Utilities {
// The alias may not have any path
Path path = null;
- for (String file : new LinkedList<String>(work.getPathToAliases().keySet())) {
+ for (Path file : new LinkedList<Path>(work.getPathToAliases().keySet())) {
List<String> aliases = work.getPathToAliases().get(file);
if (aliases.contains(alias)) {
- path = new Path(file);
+ path = file;
// Multiple aliases can point to the same path - it should be
// processed only once
@@ -3070,7 +3069,7 @@ public final class Utilities {
String strPath = path.toString();
// The input file does not exist, replace it by a empty file
- PartitionDesc partDesc = work.getPathToPartitionInfo().get(strPath);
+ PartitionDesc partDesc = work.getPathToPartitionInfo().get(path);
if (partDesc.getTableDesc().isNonNative()) {
// if this isn't a hive table we can't create an empty file for it.
return path;
@@ -3092,16 +3091,11 @@ public final class Utilities {
// update the work
String strNewPath = newPath.toString();
- LinkedHashMap<String, ArrayList<String>> pathToAliases = work.getPathToAliases();
- pathToAliases.put(strNewPath, pathToAliases.get(strPath));
- pathToAliases.remove(strPath);
+ work.addPathToAlias(newPath, work.getPathToAliases().get(path));
+ work.removePathToAlias(path);
- work.setPathToAliases(pathToAliases);
-
- LinkedHashMap<String, PartitionDesc> pathToPartitionInfo = work.getPathToPartitionInfo();
- pathToPartitionInfo.put(strNewPath, pathToPartitionInfo.get(strPath));
- pathToPartitionInfo.remove(strPath);
- work.setPathToPartitionInfo(pathToPartitionInfo);
+ work.removePathToPartitionInfo(path);
+ work.addPathToPartitionInfo(newPath, partDesc);
return newPath;
}
@@ -3129,17 +3123,15 @@ public final class Utilities {
// update the work
- LinkedHashMap<String, ArrayList<String>> pathToAliases = work.getPathToAliases();
+ LinkedHashMap<Path, ArrayList<String>> pathToAliases = work.getPathToAliases();
ArrayList<String> newList = new ArrayList<String>();
newList.add(alias);
- pathToAliases.put(newPath.toUri().toString(), newList);
+ pathToAliases.put(newPath, newList);
work.setPathToAliases(pathToAliases);
- LinkedHashMap<String, PartitionDesc> pathToPartitionInfo = work.getPathToPartitionInfo();
PartitionDesc pDesc = work.getAliasToPartnInfo().get(alias).clone();
- pathToPartitionInfo.put(newPath.toUri().toString(), pDesc);
- work.setPathToPartitionInfo(pathToPartitionInfo);
+ work.addPathToPartitionInfo(newPath, pDesc);
return newPath;
}
@@ -3197,7 +3189,7 @@ public final class Utilities {
public static void createTmpDirs(Configuration conf, MapWork mWork)
throws IOException {
- Map<String, ArrayList<String>> pa = mWork.getPathToAliases();
+ Map<Path, ArrayList<String>> pa = mWork.getPathToAliases();
if (pa != null) {
// common case: 1 table scan per map-work
// rare case: smb joins
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java
index 56f16cf..8783960 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java
@@ -491,13 +491,9 @@ public class ExecDriver extends Task<MapredWork> implements Serializable, Hadoop
Operator<?> topOp = mWork.getAliasToWork().get(alias);
PartitionDesc partDesc = mWork.getAliasToPartnInfo().get(alias);
- ArrayList<String> paths = mWork.getPaths();
ArrayList<PartitionDesc> parts = mWork.getPartitionDescs();
- List<Path> inputPaths = new ArrayList<Path>(paths.size());
- for (String path : paths) {
- inputPaths.add(new Path(path));
- }
+ List<Path> inputPaths = mWork.getPaths();
Path tmpPath = context.getExternalTmpPath(inputPaths.get(0));
Path partitionFile = new Path(tmpPath, ".partitions");
@@ -520,7 +516,7 @@ public class ExecDriver extends Task<MapredWork> implements Serializable, Hadoop
FetchWork fetchWork;
if (!partDesc.isPartitioned()) {
- assert paths.size() == 1;
+ assert inputPaths.size() == 1;
fetchWork = new FetchWork(inputPaths.get(0), partDesc.getTableDesc());
} else {
fetchWork = new FetchWork(inputPaths, parts, partDesc.getTableDesc());
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkDynamicPartitionPruner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkDynamicPartitionPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkDynamicPartitionPruner.java
index b70be01..eced0cd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkDynamicPartitionPruner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkDynamicPartitionPruner.java
@@ -197,9 +197,9 @@ public class SparkDynamicPartitionPruner {
Object[] row = new Object[1];
- Iterator<String> it = work.getPathToPartitionInfo().keySet().iterator();
+ Iterator<Path> it = work.getPathToPartitionInfo().keySet().iterator();
while (it.hasNext()) {
- String p = it.next();
+ Path p = it.next();
PartitionDesc desc = work.getPathToPartitionInfo().get(p);
Map<String, String> spec = desc.getPartSpec();
if (spec == null) {
@@ -225,8 +225,8 @@ public class SparkDynamicPartitionPruner {
if (!values.contains(partValue)) {
LOG.info("Pruning path: " + p);
it.remove();
- work.getPathToAliases().remove(p);
- work.getPaths().remove(p);
+ work.removePathToAlias(p);
+ // HIVE-12244 call currently ineffective
work.getPartitionDescs().remove(desc);
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
index a1e4e6c..60b5c40 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
@@ -157,12 +157,12 @@ public class DagUtils {
private final ConcurrentHashMap<String, Object> copyNotifiers = new ConcurrentHashMap<>();
private void addCredentials(MapWork mapWork, DAG dag) {
- Set<String> paths = mapWork.getPathToAliases().keySet();
+ Set<Path> paths = mapWork.getPathToAliases().keySet();
if (!paths.isEmpty()) {
- Iterator<URI> pathIterator = Iterators.transform(paths.iterator(), new Function<String, URI>() {
+ Iterator<URI> pathIterator = Iterators.transform(paths.iterator(), new Function<Path, URI>() {
@Override
- public URI apply(String input) {
- return new Path(input).toUri();
+ public URI apply(Path path) {
+ return path.toUri();
}
});
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DynamicPartitionPruner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DynamicPartitionPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DynamicPartitionPruner.java
index 9e1bf0b..be24e10 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DynamicPartitionPruner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DynamicPartitionPruner.java
@@ -40,6 +40,7 @@ import com.google.common.base.Preconditions;
import org.apache.commons.lang3.mutable.MutableInt;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -271,9 +272,9 @@ public class DynamicPartitionPruner {
Object[] row = new Object[1];
- Iterator<String> it = work.getPathToPartitionInfo().keySet().iterator();
+ Iterator<Path> it = work.getPathToPartitionInfo().keySet().iterator();
while (it.hasNext()) {
- String p = it.next();
+ Path p = it.next();
PartitionDesc desc = work.getPathToPartitionInfo().get(p);
Map<String, String> spec = desc.getPartSpec();
if (spec == null) {
@@ -299,9 +300,8 @@ public class DynamicPartitionPruner {
if (!values.contains(partValue)) {
LOG.info("Pruning path: " + p);
it.remove();
- work.getPathToAliases().remove(p);
- work.getPaths().remove(p);
- work.getPartitionDescs().remove(desc);
+ // work.removePathToPartitionInfo(p);
+ work.removePathToAlias(p);
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java
index b678884..5c0c3ed 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/SplitGrouper.java
@@ -60,7 +60,7 @@ public class SplitGrouper {
// TODO This needs to be looked at. Map of Map to Map... Made concurrent for now since split generation
// can happen in parallel.
- private static final Map<Map<String, PartitionDesc>, Map<String, PartitionDesc>> cache =
+ private static final Map<Map<Path, PartitionDesc>, Map<Path, PartitionDesc>> cache =
new ConcurrentHashMap<>();
private final TezMapredSplitsGrouper tezGrouper = new TezMapredSplitsGrouper();
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
index 0f55c00..56af05e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
@@ -533,8 +533,8 @@ public class VectorMapOperator extends AbstractMapOperator {
HashMap<PartitionDesc, VectorPartitionContext> partitionContextMap =
new HashMap<PartitionDesc, VectorPartitionContext>();
- for (Map.Entry<String, ArrayList<String>> entry : conf.getPathToAliases().entrySet()) {
- String path = entry.getKey();
+ for (Map.Entry<Path, ArrayList<String>> entry : conf.getPathToAliases().entrySet()) {
+ Path path = entry.getKey();
PartitionDesc partDesc = conf.getPathToPartitionInfo().get(path);
ArrayList<String> aliases = entry.getValue();
@@ -552,7 +552,7 @@ public class VectorMapOperator extends AbstractMapOperator {
vectorPartitionContext = partitionContextMap.get(partDesc);
}
- fileToPartitionContextMap.put(path, vectorPartitionContext);
+ fileToPartitionContextMap.put(path.toString(), vectorPartitionContext);
}
// Create list of one.
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
index e06349d..3e3844e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
@@ -30,6 +30,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
@@ -142,7 +143,7 @@ public class VectorizedRowBatchCtx {
public static void getPartitionValues(VectorizedRowBatchCtx vrbCtx, Configuration hiveConf,
FileSplit split, Object[] partitionValues) throws IOException {
- Map<String, PartitionDesc> pathToPartitionInfo = Utilities
+ Map<Path, PartitionDesc> pathToPartitionInfo = Utilities
.getMapWork(hiveConf).getPathToPartitionInfo();
PartitionDesc partDesc = HiveFileFormatUtils
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java
index e072ee6..5247ece 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java
@@ -23,15 +23,11 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Iterator;
import java.util.Set;
-import java.util.Map;
import java.util.Arrays;
-import java.util.HashMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.ql.exec.Utilities;
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java
index a9c1614..340d109 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java
@@ -68,8 +68,7 @@ public class BucketizedHiveInputFormat<K extends WritableComparable, V extends W
throw new IOException("cannot find class " + inputFormatClassName);
}
- pushProjectionsAndFilters(job, inputFormatClass, hsplit.getPath()
- .toString(), hsplit.getPath().toUri().getPath());
+ pushProjectionsAndFilters(job, inputFormatClass, hsplit.getPath());
InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
index 49b03a4..e91064b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
@@ -127,7 +127,7 @@ public class CombineHiveInputFormat<K extends WritableComparable, V extends Writ
private String inputFormatClassName;
private CombineFileSplit inputSplitShim;
- private Map<String, PartitionDesc> pathToPartitionInfo;
+ private Map<Path, PartitionDesc> pathToPartitionInfo;
public CombineHiveInputSplit() throws IOException {
this(ShimLoader.getHadoopShims().getCombineFileInputFormat()
@@ -142,7 +142,7 @@ public class CombineHiveInputFormat<K extends WritableComparable, V extends Writ
this(job, inputSplitShim, null);
}
public CombineHiveInputSplit(JobConf job, CombineFileSplit inputSplitShim,
- Map<String, PartitionDesc> pathToPartitionInfo) throws IOException {
+ Map<Path, PartitionDesc> pathToPartitionInfo) throws IOException {
this.inputSplitShim = inputSplitShim;
this.pathToPartitionInfo = pathToPartitionInfo;
if (job != null) {
@@ -262,7 +262,6 @@ public class CombineHiveInputFormat<K extends WritableComparable, V extends Writ
@Override
public void write(DataOutput out) throws IOException {
inputSplitShim.write(out);
-
if (inputFormatClassName == null) {
if (pathToPartitionInfo == null) {
pathToPartitionInfo = Utilities.getMapWork(getJob()).getPathToPartitionInfo();
@@ -319,10 +318,10 @@ public class CombineHiveInputFormat<K extends WritableComparable, V extends Writ
* Create Hive splits based on CombineFileSplit.
*/
private InputSplit[] getCombineSplits(JobConf job, int numSplits,
- Map<String, PartitionDesc> pathToPartitionInfo)
+ Map<Path, PartitionDesc> pathToPartitionInfo)
throws IOException {
init(job);
- Map<String, ArrayList<String>> pathToAliases = mrwork.getPathToAliases();
+ Map<Path, ArrayList<String>> pathToAliases = mrwork.getPathToAliases();
Map<String, Operator<? extends OperatorDesc>> aliasToWork =
mrwork.getAliasToWork();
CombineFileInputFormatShim combine = ShimLoader.getHadoopShims()
@@ -542,7 +541,7 @@ public class CombineHiveInputFormat<K extends WritableComparable, V extends Writ
if (combinablePaths.size() > 0) {
FileInputFormat.setInputPaths(job, combinablePaths.toArray
(new Path[combinablePaths.size()]));
- Map<String, PartitionDesc> pathToPartitionInfo = this.pathToPartitionInfo != null ?
+ Map<Path, PartitionDesc> pathToPartitionInfo = this.pathToPartitionInfo != null ?
this.pathToPartitionInfo : Utilities.getMapWork(job).getPathToPartitionInfo();
InputSplit[] splits = getCombineSplits(job, numSplits, pathToPartitionInfo);
for (InputSplit split : splits) {
@@ -587,8 +586,8 @@ public class CombineHiveInputFormat<K extends WritableComparable, V extends Writ
HashMap<String, SplitSample> nameToSamples = mrwork.getNameToSplitSample();
List<CombineFileSplit> retLists = new ArrayList<CombineFileSplit>();
Map<String, ArrayList<CombineFileSplit>> aliasToSplitList = new HashMap<String, ArrayList<CombineFileSplit>>();
- Map<String, ArrayList<String>> pathToAliases = mrwork.getPathToAliases();
- Map<String, ArrayList<String>> pathToAliasesNoScheme = removeScheme(pathToAliases);
+ Map<Path, ArrayList<String>> pathToAliases = mrwork.getPathToAliases();
+ Map<Path, ArrayList<String>> pathToAliasesNoScheme = removeScheme(pathToAliases);
// Populate list of exclusive splits for every sampled alias
//
@@ -657,10 +656,10 @@ public class CombineHiveInputFormat<K extends WritableComparable, V extends Writ
return retLists;
}
- Map<String, ArrayList<String>> removeScheme(Map<String, ArrayList<String>> pathToAliases) {
- Map<String, ArrayList<String>> result = new HashMap<String, ArrayList<String>>();
- for (Map.Entry <String, ArrayList<String>> entry : pathToAliases.entrySet()) {
- String newKey = new Path(entry.getKey()).toUri().getPath();
+ Map<Path, ArrayList<String>> removeScheme(Map<Path, ArrayList<String>> pathToAliases) {
+ Map<Path, ArrayList<String>> result = new HashMap<>();
+ for (Map.Entry <Path, ArrayList<String>> entry : pathToAliases.entrySet()) {
+ Path newKey = Path.getPathWithoutSchemeAndAuthority(entry.getKey());
result.put(newKey, entry.getValue());
}
return result;
@@ -688,9 +687,7 @@ public class CombineHiveInputFormat<K extends WritableComparable, V extends Writ
throw new IOException("cannot find class " + inputFormatClassName);
}
- pushProjectionsAndFilters(job, inputFormatClass,
- hsplit.getPath(0).toString(),
- hsplit.getPath(0).toUri().getPath());
+ pushProjectionsAndFilters(job, inputFormatClass, hsplit.getPath(0));
return ShimLoader.getHadoopShims().getCombineFileInputFormat()
.getRecordReader(job,
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java
index 4a05a62..d602c76 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java
@@ -66,7 +66,7 @@ public abstract class HiveContextAwareRecordReader<K, V> implements RecordReader
private boolean wasUsingSortedSearch = false;
private String genericUDFClassName = null;
private final List<Comparison> stopComparisons = new ArrayList<Comparison>();
- private Map<String, PartitionDesc> pathToPartitionInfo;
+ private Map<Path, PartitionDesc> pathToPartitionInfo;
protected RecordReader recordReader;
protected JobConf jobConf;
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
index 6bb5efa..7727114 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
@@ -23,6 +23,7 @@ import java.nio.file.FileSystemNotFoundException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
@@ -37,6 +38,7 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
+import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.common.JavaUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter;
@@ -349,33 +351,32 @@ public final class HiveFileFormatUtils {
}
public static PartitionDesc getPartitionDescFromPathRecursively(
- Map<String, PartitionDesc> pathToPartitionInfo, Path dir,
- Map<Map<String, PartitionDesc>, Map<String, PartitionDesc>> cacheMap)
+ Map<Path, PartitionDesc> pathToPartitionInfo, Path dir,
+ Map<Map<Path, PartitionDesc>, Map<Path, PartitionDesc>> cacheMap)
throws IOException {
return getPartitionDescFromPathRecursively(pathToPartitionInfo, dir,
cacheMap, false);
}
public static PartitionDesc getPartitionDescFromPathRecursively(
- Map<String, PartitionDesc> pathToPartitionInfo, Path dir,
- Map<Map<String, PartitionDesc>, Map<String, PartitionDesc>> cacheMap,
- boolean ignoreSchema) throws IOException {
+ Map<Path, PartitionDesc> pathToPartitionInfo, Path dir,
+ Map<Map<Path, PartitionDesc>, Map<Path, PartitionDesc>> cacheMap, boolean ignoreSchema)
+ throws IOException {
PartitionDesc part = doGetPartitionDescFromPath(pathToPartitionInfo, dir);
if (part == null
&& (ignoreSchema
|| (dir.toUri().getScheme() == null || dir.toUri().getScheme().trim().equals(""))
- || pathsContainNoScheme(pathToPartitionInfo))) {
+ || FileUtils.pathsContainNoScheme(pathToPartitionInfo.keySet()))) {
- Map<String, PartitionDesc> newPathToPartitionInfo = null;
+ Map<Path, PartitionDesc> newPathToPartitionInfo = null;
if (cacheMap != null) {
newPathToPartitionInfo = cacheMap.get(pathToPartitionInfo);
}
if (newPathToPartitionInfo == null) { // still null
- newPathToPartitionInfo = new HashMap<String, PartitionDesc>();
- populateNewPartitionDesc(pathToPartitionInfo, newPathToPartitionInfo);
+ newPathToPartitionInfo = populateNewPartitionDesc(pathToPartitionInfo);
if (cacheMap != null) {
cacheMap.put(pathToPartitionInfo, newPathToPartitionInfo);
@@ -391,64 +392,32 @@ public final class HiveFileFormatUtils {
}
}
- private static boolean pathsContainNoScheme(Map<String, PartitionDesc> pathToPartitionInfo) {
-
- for( Entry<String, PartitionDesc> pe : pathToPartitionInfo.entrySet()){
- if(new Path(pe.getKey()).toUri().getScheme() != null){
- return false;
- }
- }
- return true;
-
- }
-
- private static void populateNewPartitionDesc(
- Map<String, PartitionDesc> pathToPartitionInfo,
- Map<String, PartitionDesc> newPathToPartitionInfo) {
- for (Map.Entry<String, PartitionDesc> entry: pathToPartitionInfo.entrySet()) {
- String entryKey = entry.getKey();
+ private static Map<Path, PartitionDesc> populateNewPartitionDesc(Map<Path, PartitionDesc> pathToPartitionInfo) {
+ Map<Path, PartitionDesc> newPathToPartitionInfo = new HashMap<>();
+ for (Map.Entry<Path, PartitionDesc> entry: pathToPartitionInfo.entrySet()) {
PartitionDesc partDesc = entry.getValue();
- Path newP = new Path(entryKey);
- String pathOnly = newP.toUri().getPath();
+ Path pathOnly = Path.getPathWithoutSchemeAndAuthority(entry.getKey());
newPathToPartitionInfo.put(pathOnly, partDesc);
}
+ return newPathToPartitionInfo;
}
private static PartitionDesc doGetPartitionDescFromPath(
- Map<String, PartitionDesc> pathToPartitionInfo, Path dir) {
+ Map<Path, PartitionDesc> pathToPartitionInfo, Path dir) {
+
// We first do exact match, and then do prefix matching. The latter is due to input dir
// could be /dir/ds='2001-02-21'/part-03 where part-03 is not part of partition
- String dirPath = dir.toUri().getPath();
- PartitionDesc part = pathToPartitionInfo.get(dir.toString());
- if (part == null) {
- // LOG.warn("exact match not found, try ripping input path's theme and authority");
- part = pathToPartitionInfo.get(dirPath);
+ Path path = FileUtils.getParentRegardlessOfScheme(dir,pathToPartitionInfo.keySet());
+
+ if(path == null) {
+ // FIXME: old implementation returned null; exception maybe?
+ return null;
}
-
- if (part == null) {
- Path curPath = new Path(dir.toUri().getPath()).getParent();
- dir = dir.getParent();
- while (dir != null) {
-
- // first try full match
- part = pathToPartitionInfo.get(dir.toString());
- if (part == null) {
-
- // exact match not found, try ripping input path's scheme and authority
- part = pathToPartitionInfo.get(curPath.toString());
- }
- if (part != null) {
- break;
- }
- dir = dir.getParent();
- curPath = curPath.getParent();
- }
- }
- return part;
+ return pathToPartitionInfo.get(path);
}
- private static boolean foundAlias(Map<String, ArrayList<String>> pathToAliases,
- String path) {
+ private static boolean foundAlias(Map<Path, ArrayList<String>> pathToAliases,
+ Path path) {
List<String> aliases = pathToAliases.get(path);
if ((aliases == null) || (aliases.isEmpty())) {
return false;
@@ -456,40 +425,29 @@ public final class HiveFileFormatUtils {
return true;
}
- private static String getMatchingPath(Map<String, ArrayList<String>> pathToAliases,
+ private static Path getMatchingPath(Map<Path, ArrayList<String>> pathToAliases,
Path dir) {
// First find the path to be searched
- String path = dir.toString();
+ Path path = dir;
if (foundAlias(pathToAliases, path)) {
return path;
}
- String dirPath = dir.toUri().getPath();
- if(Shell.WINDOWS){
- //temp hack
- //do this to get rid of "/" before the drive letter in windows
- dirPath = new Path(dirPath).toString();
- }
+ Path dirPath = Path.getPathWithoutSchemeAndAuthority(dir);
if (foundAlias(pathToAliases, dirPath)) {
return dirPath;
}
- path = dirPath;
-
- String dirStr = dir.toString();
- int dirPathIndex = dirPath.lastIndexOf(Path.SEPARATOR);
- int dirStrIndex = dirStr.lastIndexOf(Path.SEPARATOR);
- while (dirPathIndex >= 0 && dirStrIndex >= 0) {
- dirStr = dirStr.substring(0, dirStrIndex);
- dirPath = dirPath.substring(0, dirPathIndex);
+
+ while (path!=null && dirPath!=null) {
+ path=path.getParent();
+ dirPath=dirPath.getParent();
//first try full match
- if (foundAlias(pathToAliases, dirStr)) {
- return dirStr;
+ if (foundAlias(pathToAliases, path)) {
+ return path;
}
if (foundAlias(pathToAliases, dirPath)) {
return dirPath;
}
- dirPathIndex = dirPath.lastIndexOf(Path.SEPARATOR);
- dirStrIndex = dirStr.lastIndexOf(Path.SEPARATOR);
}
return null;
}
@@ -501,7 +459,7 @@ public final class HiveFileFormatUtils {
* @param dir The path to look for
**/
public static List<Operator<? extends OperatorDesc>> doGetWorksFromPath(
- Map<String, ArrayList<String>> pathToAliases,
+ Map<Path, ArrayList<String>> pathToAliases,
Map<String, Operator<? extends OperatorDesc>> aliasToWork, Path dir) {
List<Operator<? extends OperatorDesc>> opList =
new ArrayList<Operator<? extends OperatorDesc>>();
@@ -519,12 +477,12 @@ public final class HiveFileFormatUtils {
* @param dir The path to look for
**/
public static List<String> doGetAliasesFromPath(
- Map<String, ArrayList<String>> pathToAliases,
+ Map<Path, ArrayList<String>> pathToAliases,
Path dir) {
if (pathToAliases == null) {
return new ArrayList<String>();
}
- String path = getMatchingPath(pathToAliases, dir);
+ Path path = getMatchingPath(pathToAliases, dir);
return pathToAliases.get(path);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
index 227a051..945b828 100755
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
@@ -40,6 +40,7 @@ import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil;
@@ -92,7 +93,7 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable>
private JobConf job;
// both classes access by subclasses
- protected Map<String, PartitionDesc> pathToPartitionInfo;
+ protected Map<Path, PartitionDesc> pathToPartitionInfo;
protected MapWork mrwork;
/**
@@ -289,14 +290,13 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable>
}
boolean nonNative = false;
- PartitionDesc part = pathToPartitionInfo.get(hsplit.getPath().toString());
+ PartitionDesc part = pathToPartitionInfo.get(hsplit.getPath());
if ((part != null) && (part.getTableDesc() != null)) {
Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), job);
nonNative = part.getTableDesc().isNonNative();
}
- pushProjectionsAndFilters(job, inputFormatClass, hsplit.getPath()
- .toString(), hsplit.getPath().toUri().getPath(), nonNative);
+ pushProjectionsAndFilters(job, inputFormatClass, hsplit.getPath(), nonNative);
InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);
RecordReader innerReader = null;
@@ -376,12 +376,7 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable>
Path[] getInputPaths(JobConf job) throws IOException {
Path[] dirs;
if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
- Set<String> pathStrings = mrwork.getPathToPartitionInfo().keySet();
- dirs = new Path[pathStrings.size()];
- Iterator<String> it = pathStrings.iterator();
- for (int i = 0; i < dirs.length; i++) {
- dirs[i] = new Path(it.next());
- }
+ dirs = mrwork.getPathToPartitionInfo().keySet().toArray(new Path[]{});
} else {
dirs = FileInputFormat.getInputPaths(job);
if (dirs.length == 0) {
@@ -427,7 +422,7 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable>
TableDesc table = part.getTableDesc();
TableScanOperator tableScan = null;
- List<String> aliases = mrwork.getPathToAliases().get(dir.toString());
+ List<String> aliases = mrwork.getPathToAliases().get(dir);
// Make filter pushdown information available to getSplits.
if ((aliases != null) && (aliases.size() == 1)) {
@@ -518,12 +513,13 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable>
}
}
+
protected static PartitionDesc getPartitionDescFromPath(
- Map<String, PartitionDesc> pathToPartitionInfo, Path dir)
+ Map<Path, PartitionDesc> pathToPartitionInfo, Path dir)
throws IOException {
- PartitionDesc partDesc = pathToPartitionInfo.get(dir.toString());
+ PartitionDesc partDesc = pathToPartitionInfo.get(dir);
if (partDesc == null) {
- partDesc = pathToPartitionInfo.get(dir.toUri().getPath());
+ partDesc = pathToPartitionInfo.get(Path.getPathWithoutSchemeAndAuthority(dir));
}
if (partDesc == null) {
throw new IOException("cannot find dir = " + dir.toString()
@@ -585,31 +581,13 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable>
}
protected void pushProjectionsAndFilters(JobConf jobConf, Class inputFormatClass,
- String splitPath, String splitPathWithNoSchema) {
- pushProjectionsAndFilters(jobConf, inputFormatClass, splitPath,
- splitPathWithNoSchema, false);
- }
-
- private static boolean isMatch(String splitPath, String key) {
- if (splitPath.equals(key)) {
- return true;
- }
- // Take care of these cases:
- // splitPath: hdfs://ns/user/hive/warehouse/src/data.txt
- // key: [hdfs://ns]/user/hive/warehouse/src
- // [hdfs://ns]/user/hive/warehouse/src_2
- // [hdfs://ns]/user/hive/warehouse/src/
- // [hdfs://ns]/user/hive/warehouse/src/data.txt
- key = StringUtils.removeEnd(key, "/");
- int index = splitPath.indexOf(key);
- if (index == -1) {
- return false;
- }
- return splitPath.substring(index).equals(key) || splitPath.charAt(index+key.length()) == '/';
+ Path splitPath) {
+ pushProjectionsAndFilters(jobConf, inputFormatClass, splitPath, false);
}
protected void pushProjectionsAndFilters(JobConf jobConf, Class inputFormatClass,
- String splitPath, String splitPathWithNoSchema, boolean nonNative) {
+ Path splitPath, boolean nonNative) {
+ Path splitPathWithNoSchema = Path.getPathWithoutSchemeAndAuthority(splitPath);
if (this.mrwork == null) {
init(job);
}
@@ -619,12 +597,12 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable>
}
ArrayList<String> aliases = new ArrayList<String>();
- Iterator<Entry<String, ArrayList<String>>> iterator = this.mrwork
+ Iterator<Entry<Path, ArrayList<String>>> iterator = this.mrwork
.getPathToAliases().entrySet().iterator();
while (iterator.hasNext()) {
- Entry<String, ArrayList<String>> entry = iterator.next();
- String key = entry.getKey();
+ Entry<Path, ArrayList<String>> entry = iterator.next();
+ Path key = entry.getKey();
boolean match;
if (nonNative) {
// For non-native tables, we need to do an exact match to avoid
@@ -637,7 +615,7 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable>
// subdirectories. (Unlike non-native tables, prefix mixups don't seem
// to be a potential problem here since we are always dealing with the
// path to something deeper than the table location.)
- match = isMatch(splitPath, key) || isMatch(splitPathWithNoSchema, key);
+ match = FileUtils.isPathWithinSubtree(splitPath, key) || FileUtils.isPathWithinSubtree(splitPathWithNoSchema, key);
}
if (match) {
ArrayList<String> list = entry.getValue();
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/io/IOPrepareCache.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/IOPrepareCache.java b/ql/src/java/org/apache/hadoop/hive/ql/io/IOPrepareCache.java
index f4a21f8..5637270 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/IOPrepareCache.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/IOPrepareCache.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.io;
import java.util.HashMap;
import java.util.Map;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
/**
@@ -48,21 +49,21 @@ public class IOPrepareCache {
}
}
- private Map<Map<String, PartitionDesc>, Map<String, PartitionDesc>> partitionDescMap;
+ private Map<Map<Path, PartitionDesc>, Map<Path, PartitionDesc>> partitionDescMap;
- public Map<Map<String, PartitionDesc>, Map<String, PartitionDesc>> allocatePartitionDescMap() {
+ public Map<Map<Path, PartitionDesc>, Map<Path, PartitionDesc>> allocatePartitionDescMap() {
if (partitionDescMap == null) {
- partitionDescMap = new HashMap<Map<String, PartitionDesc>, Map<String, PartitionDesc>>();
+ partitionDescMap = new HashMap<>();
}
return partitionDescMap;
}
- public Map<Map<String, PartitionDesc>, Map<String, PartitionDesc>> getPartitionDescMap() {
+ public Map<Map<Path, PartitionDesc>, Map<Path, PartitionDesc>> getPartitionDescMap() {
return partitionDescMap;
}
public void setPartitionDescMap(
- Map<Map<String, PartitionDesc>, Map<String, PartitionDesc>> partitionDescMap) {
+ Map<Map<Path, PartitionDesc>, Map<Path, PartitionDesc>> partitionDescMap) {
this.partitionDescMap = partitionDescMap;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/io/SymbolicInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/SymbolicInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/SymbolicInputFormat.java
index 8b49204..55b3b55 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/SymbolicInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/SymbolicInputFormat.java
@@ -25,6 +25,7 @@ import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.Map.Entry;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -38,28 +39,26 @@ import org.apache.hadoop.mapred.TextInputFormat;
public class SymbolicInputFormat implements ReworkMapredInputFormat {
public void rework(HiveConf job, MapredWork work) throws IOException {
- Map<String, PartitionDesc> pathToParts = work.getMapWork().getPathToPartitionInfo();
- List<String> toRemovePaths = new ArrayList<String>();
- Map<String, PartitionDesc> toAddPathToPart = new HashMap<String, PartitionDesc>();
- Map<String, ArrayList<String>> pathToAliases = work.getMapWork().getPathToAliases();
+ Map<Path, PartitionDesc> pathToParts = work.getMapWork().getPathToPartitionInfo();
+ List<Path> toRemovePaths = new ArrayList<>();
+ Map<Path, PartitionDesc> toAddPathToPart = new HashMap<>();
+ Map<Path, ArrayList<String>> pathToAliases = work.getMapWork().getPathToAliases();
- for (Map.Entry<String, PartitionDesc> pathPartEntry : pathToParts
- .entrySet()) {
- String path = pathPartEntry.getKey();
+ for (Map.Entry<Path, PartitionDesc> pathPartEntry : pathToParts.entrySet()) {
+ Path path = pathPartEntry.getKey();
PartitionDesc partDesc = pathPartEntry.getValue();
// this path points to a symlink path
if (partDesc.getInputFileFormatClass().equals(
SymlinkTextInputFormat.class)) {
// change to TextInputFormat
partDesc.setInputFileFormatClass(TextInputFormat.class);
- Path symlinkDir = new Path(path);
- FileSystem fileSystem = symlinkDir.getFileSystem(job);
- FileStatus fStatus = fileSystem.getFileStatus(symlinkDir);
+ FileSystem fileSystem = path.getFileSystem(job);
+ FileStatus fStatus = fileSystem.getFileStatus(path);
FileStatus[] symlinks = null;
if (!fStatus.isDir()) {
symlinks = new FileStatus[] { fStatus };
} else {
- symlinks = fileSystem.listStatus(symlinkDir, FileUtils.HIDDEN_FILES_PATH_FILTER);
+ symlinks = fileSystem.listStatus(path, FileUtils.HIDDEN_FILES_PATH_FILTER);
}
toRemovePaths.add(path);
ArrayList<String> aliases = pathToAliases.remove(path);
@@ -77,8 +76,9 @@ public class SymbolicInputFormat implements ReworkMapredInputFormat {
// if the line is invalid for any reason, the job will fail.
FileStatus[] matches = fileSystem.globStatus(new Path(line));
for(FileStatus fileStatus :matches) {
- toAddPathToPart.put(fileStatus.getPath().toUri().getPath(), partDesc);
- pathToAliases.put(fileStatus.getPath().toUri().getPath(), aliases);
+ Path schemaLessPath = Path.getPathWithoutSchemeAndAuthority(fileStatus.getPath());
+ toAddPathToPart.put(schemaLessPath, partDesc);
+ pathToAliases.put(schemaLessPath, aliases);
}
}
} finally {
@@ -88,9 +88,11 @@ public class SymbolicInputFormat implements ReworkMapredInputFormat {
}
}
- pathToParts.putAll(toAddPathToPart);
- for (String toRemove : toRemovePaths) {
- pathToParts.remove(toRemove);
+ for (Entry<Path, PartitionDesc> toAdd : toAddPathToPart.entrySet()) {
+ work.getMapWork().addPathToPartitionInfo(toAdd.getKey(), toAdd.getValue());
+ }
+ for (Path toRemove : toRemovePaths) {
+ work.getMapWork().removePathToPartitionInfo(toRemove);
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java
index 30862c8..4fccfc1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java
@@ -109,8 +109,8 @@ public class AvroGenericRecordReader implements
// Iterate over the Path -> Partition descriptions to find the partition
// that matches our input split.
- for (Map.Entry<String,PartitionDesc> pathsAndParts: mapWork.getPathToPartitionInfo().entrySet()){
- String partitionPath = pathsAndParts.getKey();
+ for (Map.Entry<Path,PartitionDesc> pathsAndParts: mapWork.getPathToPartitionInfo().entrySet()){
+ Path partitionPath = pathsAndParts.getKey();
if(pathIsInPartition(split.getPath(), partitionPath)) {
if(LOG.isInfoEnabled()) {
LOG.info("Matching partition " + partitionPath +
@@ -143,13 +143,13 @@ public class AvroGenericRecordReader implements
return null;
}
- private boolean pathIsInPartition(Path split, String partitionPath) {
+ private boolean pathIsInPartition(Path split, Path partitionPath) {
boolean schemeless = split.toUri().getScheme() == null;
if (schemeless) {
- String schemelessPartitionPath = new Path(partitionPath).toUri().getPath();
+ String schemelessPartitionPath = partitionPath.toUri().getPath();
return split.toString().startsWith(schemelessPartitionPath);
} else {
- return split.toString().startsWith(partitionPath);
+ return split.toString().startsWith(partitionPath.toString());
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileWork.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileWork.java b/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileWork.java
index 3339c8d..94b9431 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileWork.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/merge/MergeFileWork.java
@@ -78,11 +78,8 @@ public class MergeFileWork extends MapWork {
this.internalInputFormat = RCFileBlockMergeInputFormat.class;
}
partDesc.setInputFileFormatClass(internalInputFormat);
- if (this.getPathToPartitionInfo() == null) {
- this.setPathToPartitionInfo(new LinkedHashMap<String, PartitionDesc>());
- }
for (Path path : this.inputPaths) {
- this.getPathToPartitionInfo().put(path.toString(), partDesc);
+ this.addPathToPartitionInfo(path, partDesc);
}
this.isListBucketingAlterTableConcatenate = false;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ProjectionPusher.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ProjectionPusher.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ProjectionPusher.java
index db923fa..b058500 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ProjectionPusher.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ProjectionPusher.java
@@ -48,8 +48,7 @@ public class ProjectionPusher {
private static final Logger LOG = LoggerFactory.getLogger(ProjectionPusher.class);
- private final Map<String, PartitionDesc> pathToPartitionInfo =
- new LinkedHashMap<String, PartitionDesc>();
+ private final Map<Path, PartitionDesc> pathToPartitionInfo = new LinkedHashMap<>();
/**
* MapWork is the Hive object which describes input files,
* columns projections, and filters.
@@ -66,14 +65,13 @@ public class ProjectionPusher {
if (mapWork == null && plan != null && plan.length() > 0) {
mapWork = Utilities.getMapWork(job);
pathToPartitionInfo.clear();
- for (final Map.Entry<String, PartitionDesc> entry : mapWork.getPathToPartitionInfo().entrySet()) {
+ for (final Map.Entry<Path, PartitionDesc> entry : mapWork.getPathToPartitionInfo().entrySet()) {
// key contains scheme (such as pfile://) and we want only the path portion fix in HIVE-6366
- pathToPartitionInfo.put(new Path(entry.getKey()).toUri().getPath(), entry.getValue());
+ pathToPartitionInfo.put(Path.getPathWithoutSchemeAndAuthority(entry.getKey()), entry.getValue());
}
}
}
- @Deprecated // Uses deprecated methods on ColumnProjectionUtils
private void pushProjectionsAndFilters(final JobConf jobConf,
final String splitPath,
final String splitPathWithNoSchema) {
@@ -85,12 +83,12 @@ public class ProjectionPusher {
}
final Set<String> aliases = new HashSet<String>();
- final Iterator<Entry<String, ArrayList<String>>> iterator =
+ final Iterator<Entry<Path, ArrayList<String>>> iterator =
mapWork.getPathToAliases().entrySet().iterator();
while (iterator.hasNext()) {
- final Entry<String, ArrayList<String>> entry = iterator.next();
- final String key = new Path(entry.getKey()).toUri().getPath();
+ final Entry<Path, ArrayList<String>> entry = iterator.next();
+ final String key = entry.getKey().toUri().getPath();
if (splitPath.equals(key) || splitPathWithNoSchema.equals(key)) {
aliases.addAll(entry.getValue());
}
@@ -144,10 +142,10 @@ public class ProjectionPusher {
// push down projections
if (!allColumnsNeeded) {
if (!neededColumnIDs.isEmpty()) {
- ColumnProjectionUtils.appendReadColumnIDs(jobConf, new ArrayList<Integer>(neededColumnIDs));
+ ColumnProjectionUtils.appendReadColumns(jobConf, new ArrayList<Integer>(neededColumnIDs));
}
} else {
- ColumnProjectionUtils.setFullyReadColumns(jobConf);
+ ColumnProjectionUtils.setReadAllColumns(jobConf);
}
pushFilters(jobConf, rowSchema, tableFilterExpr);
@@ -173,12 +171,11 @@ public class ProjectionPusher {
filterExprSerialized);
}
- @Deprecated // Uses deprecated methods on ColumnProjectionUtils
public JobConf pushProjectionsAndFilters(JobConf jobConf, Path path)
throws IOException {
updateMrWork(jobConf); // TODO: refactor this in HIVE-6366
final JobConf cloneJobConf = new JobConf(jobConf);
- final PartitionDesc part = pathToPartitionInfo.get(path.toString());
+ final PartitionDesc part = pathToPartitionInfo.get(path);
if ((part != null) && (part.getTableDesc() != null)) {
Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), cloneJobConf);
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanWork.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanWork.java b/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanWork.java
index c006743..919cea0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanWork.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanWork.java
@@ -56,11 +56,8 @@ public class PartialScanWork extends MapWork implements Serializable {
this.inputPaths = inputPaths;
PartitionDesc partDesc = new PartitionDesc();
partDesc.setInputFileFormatClass(RCFileBlockMergeInputFormat.class);
- if(this.getPathToPartitionInfo() == null) {
- this.setPathToPartitionInfo(new LinkedHashMap<String, PartitionDesc>());
- }
for(Path path: this.inputPaths) {
- this.getPathToPartitionInfo().put(path.toString(), partDesc);
+ this.addPathToPartitionInfo(path, partDesc);
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateWork.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateWork.java b/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateWork.java
index d63aa29..80cf5e0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateWork.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateWork.java
@@ -63,10 +63,7 @@ public class ColumnTruncateWork extends MapWork implements Serializable {
this.dynPartCtx = dynPartCtx;
PartitionDesc partDesc = new PartitionDesc();
partDesc.setInputFileFormatClass(RCFileBlockMergeInputFormat.class);
- if(this.getPathToPartitionInfo() == null) {
- this.setPathToPartitionInfo(new LinkedHashMap<String, PartitionDesc>());
- }
- this.getPathToPartitionInfo().put(inputDir.toString(), partDesc);
+ this.addPathToPartitionInfo(inputDir, partDesc);
}
public Path getInputDir() {
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
index f555741..7fc3354 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
@@ -244,11 +244,9 @@ public final class GenMapRedUtils {
TableDesc tt_desc = tt_descLst.get(pos);
MapWork mWork = plan.getMapWork();
if (mWork.getPathToAliases().get(taskTmpDir) == null) {
- mWork.getPathToAliases().put(taskTmpDir,
- new ArrayList<String>());
- mWork.getPathToAliases().get(taskTmpDir).add(taskTmpDir);
- mWork.getPathToPartitionInfo().put(taskTmpDir,
- new PartitionDesc(tt_desc, null));
+ mWork.removePathToAlias(new Path(taskTmpDir));
+ mWork.addPathToAlias(new Path(taskTmpDir),taskTmpDir);
+ mWork.addPathToPartitionInfo(new Path(taskTmpDir), new PartitionDesc(tt_desc, null));
mWork.getAliasToWork().put(taskTmpDir, topOperators.get(pos));
}
}
@@ -704,16 +702,13 @@ public final class GenMapRedUtils {
if (!local) {
while (iterPath.hasNext()) {
assert iterPartnDesc.hasNext();
- String path = iterPath.next().toString();
+ Path path = iterPath.next();
PartitionDesc prtDesc = iterPartnDesc.next();
// Add the path to alias mapping
- if (plan.getPathToAliases().get(path) == null) {
- plan.getPathToAliases().put(path, new ArrayList<String>());
- }
- plan.getPathToAliases().get(path).add(alias_id);
- plan.getPathToPartitionInfo().put(path, prtDesc);
+ plan.addPathToAlias(path,alias_id);
+ plan.addPathToPartitionInfo(path, prtDesc);
if (LOG.isDebugEnabled()) {
LOG.debug("Information added for path " + path);
}
@@ -761,7 +756,7 @@ public final class GenMapRedUtils {
* table descriptor
* @throws SerDeException
*/
- public static void setTaskPlan(String path, String alias,
+ public static void setTaskPlan(Path path, String alias,
Operator<? extends OperatorDesc> topOp, MapWork plan, boolean local,
TableDesc tt_desc) throws SemanticException {
@@ -780,11 +775,8 @@ public final class GenMapRedUtils {
}
if (!local) {
- if (plan.getPathToAliases().get(path) == null) {
- plan.getPathToAliases().put(path, new ArrayList<String>());
- }
- plan.getPathToAliases().get(path).add(alias);
- plan.getPathToPartitionInfo().put(path, new PartitionDesc(tt_desc, null));
+ plan.addPathToAlias(path,alias);
+ plan.addPathToPartitionInfo(path, new PartitionDesc(tt_desc, null));
plan.getAliasToWork().put(alias, topOp);
} else {
// populate local work if needed
@@ -982,8 +974,8 @@ public final class GenMapRedUtils {
conf.getBoolVar(
HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS);
work.setMapperCannotSpanPartns(mapperCannotSpanPartns);
- work.setPathToAliases(new LinkedHashMap<String, ArrayList<String>>());
- work.setPathToPartitionInfo(new LinkedHashMap<String, PartitionDesc>());
+ work.setPathToAliases(new LinkedHashMap<Path, ArrayList<String>>());
+ work.setPathToPartitionInfo(new LinkedHashMap<Path, PartitionDesc>());
work.setAliasToWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>());
return mrWork;
}
@@ -1133,7 +1125,7 @@ public final class GenMapRedUtils {
}
// Add the path to alias mapping
- setTaskPlan(taskTmpDir.toUri().toString(), streamDesc, tableScanOp, cplan.getMapWork(), false, tt_desc);
+ setTaskPlan(taskTmpDir, streamDesc, tableScanOp, cplan.getMapWork(), false, tt_desc);
opProcCtx.setCurrTopOp(null);
opProcCtx.setCurrAliasId(null);
opProcCtx.setCurrTask(childTask);
@@ -1161,13 +1153,13 @@ public final class GenMapRedUtils {
*/
public static void replaceMapWork(String sourceAlias, String targetAlias,
MapWork source, MapWork target) {
- Map<String, ArrayList<String>> sourcePathToAliases = source.getPathToAliases();
- Map<String, PartitionDesc> sourcePathToPartitionInfo = source.getPathToPartitionInfo();
+ Map<Path, ArrayList<String>> sourcePathToAliases = source.getPathToAliases();
+ Map<Path, PartitionDesc> sourcePathToPartitionInfo = source.getPathToPartitionInfo();
Map<String, Operator<? extends OperatorDesc>> sourceAliasToWork = source.getAliasToWork();
Map<String, PartitionDesc> sourceAliasToPartnInfo = source.getAliasToPartnInfo();
- Map<String, ArrayList<String>> targetPathToAliases = target.getPathToAliases();
- Map<String, PartitionDesc> targetPathToPartitionInfo = target.getPathToPartitionInfo();
+ LinkedHashMap<Path, ArrayList<String>> targetPathToAliases = target.getPathToAliases();
+ LinkedHashMap<Path, PartitionDesc> targetPathToPartitionInfo = target.getPathToPartitionInfo();
Map<String, Operator<? extends OperatorDesc>> targetAliasToWork = target.getAliasToWork();
Map<String, PartitionDesc> targetAliasToPartnInfo = target.getAliasToPartnInfo();
@@ -1188,15 +1180,15 @@ public final class GenMapRedUtils {
// Remove unnecessary information from target
targetAliasToWork.remove(targetAlias);
targetAliasToPartnInfo.remove(targetAlias);
- List<String> pathsToRemove = new ArrayList<String>();
- for (Entry<String, ArrayList<String>> entry: targetPathToAliases.entrySet()) {
+ List<Path> pathsToRemove = new ArrayList<>();
+ for (Entry<Path, ArrayList<String>> entry: targetPathToAliases.entrySet()) {
ArrayList<String> aliases = entry.getValue();
aliases.remove(targetAlias);
if (aliases.isEmpty()) {
pathsToRemove.add(entry.getKey());
}
}
- for (String pathToRemove: pathsToRemove) {
+ for (Path pathToRemove: pathsToRemove) {
targetPathToAliases.remove(pathToRemove);
targetPathToPartitionInfo.remove(pathToRemove);
}
@@ -1205,19 +1197,21 @@ public final class GenMapRedUtils {
targetAliasToWork.put(sourceAlias, sourceAliasToWork.get(sourceAlias));
targetAliasToPartnInfo.putAll(sourceAliasToPartnInfo);
targetPathToPartitionInfo.putAll(sourcePathToPartitionInfo);
- List<String> pathsToAdd = new ArrayList<String>();
- for (Entry<String, ArrayList<String>> entry: sourcePathToAliases.entrySet()) {
+ List<Path> pathsToAdd = new ArrayList<>();
+ for (Entry<Path, ArrayList<String>> entry: sourcePathToAliases.entrySet()) {
ArrayList<String> aliases = entry.getValue();
if (aliases.contains(sourceAlias)) {
pathsToAdd.add(entry.getKey());
}
}
- for (String pathToAdd: pathsToAdd) {
+ for (Path pathToAdd: pathsToAdd) {
if (!targetPathToAliases.containsKey(pathToAdd)) {
targetPathToAliases.put(pathToAdd, new ArrayList<String>());
}
targetPathToAliases.get(pathToAdd).add(sourceAlias);
}
+ target.setPathToAliases(targetPathToAliases);
+ target.setPathToPartitionInfo(targetPathToPartitionInfo);
}
/**
@@ -1535,16 +1529,16 @@ public final class GenMapRedUtils {
TableScanOperator topOp, FileSinkDesc fsDesc) {
ArrayList<String> aliases = new ArrayList<String>();
- String inputDir = fsDesc.getFinalDirName().toString();
+ Path inputDir = fsDesc.getFinalDirName();
TableDesc tblDesc = fsDesc.getTableInfo();
- aliases.add(inputDir); // dummy alias: just use the input path
+ aliases.add(inputDir.toString()); // dummy alias: just use the input path
// constructing the default MapredWork
MapredWork cMrPlan = GenMapRedUtils.getMapRedWorkFromConf(conf);
MapWork cplan = cMrPlan.getMapWork();
- cplan.getPathToAliases().put(inputDir, aliases);
- cplan.getPathToPartitionInfo().put(inputDir, new PartitionDesc(tblDesc, null));
- cplan.getAliasToWork().put(inputDir, topOp);
+ cplan.addPathToAlias(inputDir, aliases);
+ cplan.addPathToPartitionInfo(inputDir, new PartitionDesc(tblDesc, null));
+ cplan.getAliasToWork().put(inputDir.toString(), topOp);
cplan.setMapperCannotSpanPartns(true);
return cplan;
@@ -1591,14 +1585,13 @@ public final class GenMapRedUtils {
// create the merge file work
MergeFileWork work = new MergeFileWork(inputDirs, finalName,
hasDynamicPartitions, tblDesc.getInputFileFormatClass().getName());
- LinkedHashMap<String, ArrayList<String>> pathToAliases =
- new LinkedHashMap<String, ArrayList<String>>();
- pathToAliases.put(inputDir.toString(), inputDirstr);
+ LinkedHashMap<Path, ArrayList<String>> pathToAliases = new LinkedHashMap<>();
+ pathToAliases.put(inputDir, inputDirstr);
work.setMapperCannotSpanPartns(true);
work.setPathToAliases(pathToAliases);
PartitionDesc pDesc = new PartitionDesc(tblDesc, null);
pDesc.setInputFileFormatClass(internalIFClass);
- work.getPathToPartitionInfo().put(inputDir.toString(), pDesc);
+ work.addPathToPartitionInfo(inputDir, pDesc);
work.setListBucketingCtx(fsInputDesc.getLbCtx());
// create alias to work which contains the merge operator
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
index e577e19..c6efd5b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
@@ -146,13 +146,13 @@ public class MapJoinProcessor extends Transform {
smallTableAliasList.add(alias);
// get input path and remove this alias from pathToAlias
// because this file will be fetched by fetch operator
- LinkedHashMap<String, ArrayList<String>> pathToAliases = newWork.getMapWork().getPathToAliases();
+ LinkedHashMap<Path, ArrayList<String>> pathToAliases = newWork.getMapWork().getPathToAliases();
// keep record all the input path for this alias
- HashSet<String> pathSet = new HashSet<String>();
- HashSet<String> emptyPath = new HashSet<String>();
- for (Map.Entry<String, ArrayList<String>> entry2 : pathToAliases.entrySet()) {
- String path = entry2.getKey();
+ HashSet<Path> pathSet = new HashSet<>();
+ HashSet<Path> emptyPath = new HashSet<>();
+ for (Map.Entry<Path, ArrayList<String>> entry2 : pathToAliases.entrySet()) {
+ Path path = entry2.getKey();
ArrayList<String> list = entry2.getValue();
if (list.contains(alias)) {
// add to path set
@@ -165,8 +165,8 @@ public class MapJoinProcessor extends Transform {
}
}
//remove the path, with which no alias associates
- for (String path : emptyPath) {
- pathToAliases.remove(path);
+ for (Path path : emptyPath) {
+ newWork.getMapWork().removePathToAlias(path);
}
// create fetch work
@@ -174,15 +174,15 @@ public class MapJoinProcessor extends Transform {
List<Path> partDir = new ArrayList<Path>();
List<PartitionDesc> partDesc = new ArrayList<PartitionDesc>();
- for (String tablePath : pathSet) {
+ for (Path tablePath : pathSet) {
PartitionDesc partitionDesc = newWork.getMapWork().getPathToPartitionInfo().get(tablePath);
// create fetchwork for non partitioned table
if (partitionDesc.getPartSpec() == null || partitionDesc.getPartSpec().size() == 0) {
- fetchWork = new FetchWork(new Path(tablePath), partitionDesc.getTableDesc());
+ fetchWork = new FetchWork(tablePath, partitionDesc.getTableDesc());
break;
}
// if table is partitioned,add partDir and partitionDesc
- partDir.add(new Path(tablePath));
+ partDir.add(tablePath);
partDesc.add(partitionDesc);
}
// create fetchwork for partitioned table
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/AbstractJoinTaskDispatcher.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/AbstractJoinTaskDispatcher.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/AbstractJoinTaskDispatcher.java
index a3a7f42..81b527c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/AbstractJoinTaskDispatcher.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/AbstractJoinTaskDispatcher.java
@@ -25,6 +25,7 @@ import java.util.Map;
import java.util.Stack;
import org.apache.hadoop.fs.ContentSummary;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.exec.ConditionalTask;
import org.apache.hadoop.hive.ql.exec.Task;
@@ -118,7 +119,7 @@ public abstract class AbstractJoinTaskDispatcher implements Dispatcher {
}
public long getTotalKnownInputSize(Context context, MapWork currWork,
- Map<String, ArrayList<String>> pathToAliases,
+ Map<Path, ArrayList<String>> pathToAliases,
HashMap<String, Long> aliasToSize) throws SemanticException {
try {
// go over all the input paths, and calculate a known total size, known
@@ -129,8 +130,8 @@ public abstract class AbstractJoinTaskDispatcher implements Dispatcher {
// is chosen as big table, what's the total size of left tables, which
// are going to be small tables.
long aliasTotalKnownInputSize = 0L;
- for (Map.Entry<String, ArrayList<String>> entry : pathToAliases.entrySet()) {
- String path = entry.getKey();
+ for (Map.Entry<Path, ArrayList<String>> entry : pathToAliases.entrySet()) {
+ Path path = entry.getKey();
List<String> aliasList = entry.getValue();
ContentSummary cs = context.getCS(path);
if (cs != null) {
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java
index 9ea0857..f7f6d1e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java
@@ -31,6 +31,7 @@ import java.util.Map.Entry;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.exec.ConditionalTask;
@@ -233,9 +234,8 @@ public class CommonJoinTaskDispatcher extends AbstractJoinTaskDispatcher impleme
throw new SemanticException("Cannot find the " + FileSinkOperator.getOperatorName() +
" operator at the last operator of the MapJoin Task.");
}
-
// The mapJoinTaskFileSinkOperator writes to a different directory
- String childMRPath = mapJoinTaskFileSinkOperator.getConf().getDirName().toString();
+ Path childMRPath = mapJoinTaskFileSinkOperator.getConf().getDirName();
List<String> childMRAliases = childMapWork.getPathToAliases().get(childMRPath);
if (childMRAliases == null || childMRAliases.size() != 1) {
return;
@@ -243,8 +243,8 @@ public class CommonJoinTaskDispatcher extends AbstractJoinTaskDispatcher impleme
String childMRAlias = childMRAliases.get(0);
// Sanity check to make sure there is no alias conflict after merge.
- for (Entry<String, ArrayList<String>> entry : childMapWork.getPathToAliases().entrySet()) {
- String path = entry.getKey();
+ for (Entry<Path, ArrayList<String>> entry : childMapWork.getPathToAliases().entrySet()) {
+ Path path = entry.getKey();
List<String> aliases = entry.getValue();
if (path.equals(childMRPath)) {
@@ -299,7 +299,7 @@ public class CommonJoinTaskDispatcher extends AbstractJoinTaskDispatcher impleme
TableScanOperator childMRTaskTableScanOperator =
OperatorUtils.findSingleOperator(
- childMapWork.getAliasToWork().get(childMRAlias), TableScanOperator.class);
+ childMapWork.getAliasToWork().get(childMRAlias.toString()), TableScanOperator.class);
if (childMRTaskTableScanOperator == null) {
throw new SemanticException("Expected a " + TableScanOperator.getOperatorName() +
" operator as the work associated with alias " + childMRAlias +
@@ -323,7 +323,7 @@ public class CommonJoinTaskDispatcher extends AbstractJoinTaskDispatcher impleme
childInChildMRTask.replaceParent(childMRTaskTableScanOperator, parentInMapJoinTask);
// Step 2.2: Replace the corresponding part childMRWork's MapWork.
- GenMapRedUtils.replaceMapWork(mapJoinAlias, childMRAlias, mapJoinMapWork, childMapWork);
+ GenMapRedUtils.replaceMapWork(mapJoinAlias, childMRAlias.toString(), mapJoinMapWork, childMapWork);
// Step 2.3: Fill up stuff in local work
if (mapJoinLocalWork != null) {
@@ -394,7 +394,7 @@ public class CommonJoinTaskDispatcher extends AbstractJoinTaskDispatcher impleme
// Must be deterministic order map for consistent q-test output across Java versions
HashMap<Task<? extends Serializable>, Set<String>> taskToAliases =
new LinkedHashMap<Task<? extends Serializable>, Set<String>>();
- HashMap<String, ArrayList<String>> pathToAliases = currWork.getPathToAliases();
+ HashMap<Path, ArrayList<String>> pathToAliases = currWork.getPathToAliases();
Map<String, Operator<? extends OperatorDesc>> aliasToWork = currWork.getAliasToWork();
// get parseCtx for this Join Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/47b5b5cd/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java
index f41fa4e..ede4fcb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java
@@ -264,12 +264,12 @@ public final class GenMRSkewJoinProcessor {
String alias = src.toString();
aliases.add(alias);
Path bigKeyDirPath = bigKeysDirMap.get(src);
- newPlan.getPathToAliases().put(bigKeyDirPath.toString(), aliases);
+ newPlan.addPathToAlias(bigKeyDirPath, aliases);
newPlan.getAliasToWork().put(alias, tblScan_op);
PartitionDesc part = new PartitionDesc(tableDescList.get(src), null);
- newPlan.getPathToPartitionInfo().put(bigKeyDirPath.toString(), part);
+ newPlan.addPathToPartitionInfo(bigKeyDirPath, part);
newPlan.getAliasToPartnInfo().put(alias, part);
Operator<? extends OperatorDesc> reducer = clonePlan.getReduceWork().getReducer();