You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2015/10/12 19:51:09 UTC
[01/10] hive git commit: HIVE-11976: Extend CBO rules to being able
to apply rules only once on a given operator (Jesus Camacho Rodriguez,
reviewed by Laljo John Pullokkaran)
Repository: hive
Updated Branches:
refs/heads/llap 1d9574ad0 -> 4e53bfd10
HIVE-11976: Extend CBO rules to being able to apply rules only once on a given operator (Jesus Camacho Rodriguez, reviewed by Laljo John Pullokkaran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5201f188
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5201f188
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5201f188
Branch: refs/heads/llap
Commit: 5201f188bc6c808c2a9f3d100118340af3ebd7c4
Parents: aded0d3
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Mon Oct 5 12:12:39 2015 +0100
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Fri Oct 9 09:01:48 2015 +0100
----------------------------------------------------------------------
.../ql/optimizer/calcite/HiveConfigContext.java | 37 ----
.../calcite/HiveHepPlannerContext.java | 37 ++++
.../calcite/HiveVolcanoPlannerContext.java | 37 ++++
.../calcite/cost/HiveVolcanoPlanner.java | 6 +-
.../calcite/rules/HivePreFilteringRule.java | 42 ++++-
.../calcite/rules/HiveRulesRegistry.java | 44 +++++
.../hadoop/hive/ql/parse/CalcitePlanner.java | 11 +-
.../calcite/TestCBORuleFiredOnlyOnce.java | 168 +++++++++++++++++++
8 files changed, 332 insertions(+), 50 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/5201f188/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveConfigContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveConfigContext.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveConfigContext.java
deleted file mode 100644
index 0e559e0..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveConfigContext.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.optimizer.calcite;
-
-import org.apache.calcite.plan.Context;
-import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf;
-
-
-public class HiveConfigContext implements Context {
- private HiveAlgorithmsConf config;
-
- public HiveConfigContext(HiveAlgorithmsConf config) {
- this.config = config;
- }
-
- public <T> T unwrap(Class<T> clazz) {
- if (clazz.isInstance(config)) {
- return clazz.cast(config);
- }
- return null;
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/5201f188/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveHepPlannerContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveHepPlannerContext.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveHepPlannerContext.java
new file mode 100644
index 0000000..ad79aee
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveHepPlannerContext.java
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite;
+
+import org.apache.calcite.plan.Context;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry;
+
+
+public class HiveHepPlannerContext implements Context {
+ private HiveRulesRegistry registry;
+
+ public HiveHepPlannerContext(HiveRulesRegistry registry) {
+ this.registry = registry;
+ }
+
+ public <T> T unwrap(Class<T> clazz) {
+ if (clazz.isInstance(registry)) {
+ return clazz.cast(registry);
+ }
+ return null;
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/5201f188/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveVolcanoPlannerContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveVolcanoPlannerContext.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveVolcanoPlannerContext.java
new file mode 100644
index 0000000..8859fc2
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveVolcanoPlannerContext.java
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite;
+
+import org.apache.calcite.plan.Context;
+import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf;
+
+
+public class HiveVolcanoPlannerContext implements Context {
+ private HiveAlgorithmsConf config;
+
+ public HiveVolcanoPlannerContext(HiveAlgorithmsConf config) {
+ this.config = config;
+ }
+
+ public <T> T unwrap(Class<T> clazz) {
+ if (clazz.isInstance(config)) {
+ return clazz.cast(config);
+ }
+ return null;
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/5201f188/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java
index a39ded2..8610edc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java
@@ -22,7 +22,7 @@ import org.apache.calcite.plan.ConventionTraitDef;
import org.apache.calcite.plan.RelOptPlanner;
import org.apache.calcite.plan.volcano.VolcanoPlanner;
import org.apache.calcite.rel.RelCollationTraitDef;
-import org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfigContext;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveVolcanoPlannerContext;
/**
* Refinement of {@link org.apache.calcite.plan.volcano.VolcanoPlanner} for Hive.
@@ -35,11 +35,11 @@ public class HiveVolcanoPlanner extends VolcanoPlanner {
private static final boolean ENABLE_COLLATION_TRAIT = true;
/** Creates a HiveVolcanoPlanner. */
- public HiveVolcanoPlanner(HiveConfigContext conf) {
+ public HiveVolcanoPlanner(HiveVolcanoPlannerContext conf) {
super(HiveCost.FACTORY, conf);
}
- public static RelOptPlanner createPlanner(HiveConfigContext conf) {
+ public static RelOptPlanner createPlanner(HiveVolcanoPlannerContext conf) {
final VolcanoPlanner planner = new HiveVolcanoPlanner(conf);
planner.addRelTraitDef(ConventionTraitDef.INSTANCE);
if (ENABLE_COLLATION_TRAIT) {
http://git-wip-us.apache.org/repos/asf/hive/blob/5201f188/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
index 3e2311c..349c7f8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
@@ -76,14 +76,38 @@ public class HivePreFilteringRule extends RelOptRule {
this.filterFactory = HiveFilter.DEFAULT_FILTER_FACTORY;
}
- public void onMatch(RelOptRuleCall call) {
+ @Override
+ public boolean matches(RelOptRuleCall call) {
final Filter filter = call.rel(0);
final RelNode filterChild = call.rel(1);
- // 0. If the filter is already on top of a TableScan,
- // we can bail out
+ // If the filter is already on top of a TableScan,
+ // we can bail out
if (filterChild instanceof TableScan) {
- return;
+ return false;
+ }
+
+ HiveRulesRegistry registry = call.getPlanner().
+ getContext().unwrap(HiveRulesRegistry.class);
+
+ // If this operator has been visited already by the rule,
+ // we do not need to apply the optimization
+ if (registry != null && registry.getVisited(this).contains(filter)) {
+ return false;
+ }
+
+ return true;
+ }
+
+ @Override
+ public void onMatch(RelOptRuleCall call) {
+ final Filter filter = call.rel(0);
+
+ // 0. Register that we have visited this operator in this rule
+ HiveRulesRegistry registry = call.getPlanner().
+ getContext().unwrap(HiveRulesRegistry.class);
+ if (registry != null) {
+ registry.registerVisited(this, filter);
}
final RexBuilder rexBuilder = filter.getCluster().getRexBuilder();
@@ -114,7 +138,7 @@ public class HivePreFilteringRule extends RelOptRule {
}
// 3. If the new conjuncts are already present in the plan, we bail out
- final RelOptPredicateList predicates = RelMetadataQuery.getPulledUpPredicates(filter);
+ final RelOptPredicateList predicates = RelMetadataQuery.getPulledUpPredicates(filter.getInput());
final List<RexNode> newConjuncts = new ArrayList<>();
for (RexNode commonOperand : commonOperands) {
boolean found = false;
@@ -137,9 +161,15 @@ public class HivePreFilteringRule extends RelOptRule {
RexUtil.composeConjunction(rexBuilder, newConjuncts, false));
// 5. We create the new filter that might be pushed down
- RelNode newFilter = filterFactory.createFilter(filterChild, newCondition);
+ RelNode newFilter = filterFactory.createFilter(filter.getInput(), newCondition);
RelNode newTopFilter = filterFactory.createFilter(newFilter, condition);
+ // 6. We register both so we do not fire the rule on them again
+ if (registry != null) {
+ registry.registerVisited(this, newFilter);
+ registry.registerVisited(this, newTopFilter);
+ }
+
call.transformTo(newTopFilter);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/5201f188/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRulesRegistry.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRulesRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRulesRegistry.java
new file mode 100644
index 0000000..18a065e
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRulesRegistry.java
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import java.util.Set;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.rel.RelNode;
+
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.SetMultimap;
+
+public class HiveRulesRegistry {
+
+ private SetMultimap<RelOptRule, RelNode> registry;
+
+ public HiveRulesRegistry() {
+ this.registry = HashMultimap.create();
+ }
+
+ public void registerVisited(RelOptRule rule, RelNode operator) {
+ this.registry.put(rule, operator);
+ }
+
+ public Set<RelNode> getVisited(RelOptRule rule) {
+ return this.registry.get(rule);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/5201f188/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index e68b385..61ee2bd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -63,7 +63,6 @@ import org.apache.calcite.rel.core.Sort;
import org.apache.calcite.rel.metadata.CachingRelMetadataProvider;
import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMetadataProvider;
-import org.apache.calcite.rel.rules.AggregateJoinTransposeRule;
import org.apache.calcite.rel.rules.FilterAggregateTransposeRule;
import org.apache.calcite.rel.rules.FilterProjectTransposeRule;
import org.apache.calcite.rel.rules.JoinToMultiJoinRule;
@@ -118,9 +117,10 @@ import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
-import org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfigContext;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveHepPlannerContext;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveVolcanoPlannerContext;
import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf;
@@ -151,6 +151,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePreFilteringRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectMergeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveWindowingFixRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter;
import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter;
@@ -841,7 +842,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
final Double maxMemory = (double) HiveConf.getLongVar(
conf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD);
HiveAlgorithmsConf algorithmsConf = new HiveAlgorithmsConf(maxSplitSize, maxMemory);
- HiveConfigContext confContext = new HiveConfigContext(algorithmsConf);
+ HiveVolcanoPlannerContext confContext = new HiveVolcanoPlannerContext(algorithmsConf);
RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(confContext);
final RelOptQuery query = new RelOptQuery(planner);
final RexBuilder rexBuilder = cluster.getRexBuilder();
@@ -1061,7 +1062,9 @@ public class CalcitePlanner extends SemanticAnalyzer {
programBuilder.addRuleInstance(r);
}
- HepPlanner planner = new HepPlanner(programBuilder.build());
+ HiveRulesRegistry registry = new HiveRulesRegistry();
+ HiveHepPlannerContext context = new HiveHepPlannerContext(registry);
+ HepPlanner planner = new HepPlanner(programBuilder.build(), context);
List<RelMetadataProvider> list = Lists.newArrayList();
list.add(mdProvider);
planner.registerMetadataProviders(list);
http://git-wip-us.apache.org/repos/asf/hive/blob/5201f188/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java
new file mode 100644
index 0000000..f1d8d1d
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java
@@ -0,0 +1,168 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.List;
+
+import org.apache.calcite.jdbc.JavaTypeFactoryImpl;
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelTraitSet;
+import org.apache.calcite.plan.hep.HepMatchOrder;
+import org.apache.calcite.plan.hep.HepPlanner;
+import org.apache.calcite.plan.hep.HepProgramBuilder;
+import org.apache.calcite.rel.AbstractRelNode;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.metadata.CachingRelMetadataProvider;
+import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMetadataProvider;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rel.type.RelRecordType;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry;
+import org.junit.Test;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+
+public class TestCBORuleFiredOnlyOnce {
+
+
+ @Test
+ public void testRuleFiredOnlyOnce() {
+
+ HiveConf conf = new HiveConf();
+
+ // Create HepPlanner
+ HepProgramBuilder programBuilder = new HepProgramBuilder();
+ programBuilder.addMatchOrder(HepMatchOrder.TOP_DOWN);
+ programBuilder = programBuilder.addRuleCollection(
+ ImmutableList.<RelOptRule>of(DummyRule.INSTANCE));
+
+ // Create rules registry to not trigger a rule more than once
+ HiveRulesRegistry registry = new HiveRulesRegistry();
+ HiveHepPlannerContext context = new HiveHepPlannerContext(registry);
+ HepPlanner planner = new HepPlanner(programBuilder.build(), context);
+
+ // Cluster
+ RexBuilder rexBuilder = new RexBuilder(new JavaTypeFactoryImpl());
+ RelOptCluster cluster = RelOptCluster.create(planner, rexBuilder);
+
+ // Create MD provider
+ HiveDefaultRelMetadataProvider mdProvider = new HiveDefaultRelMetadataProvider(conf);
+ List<RelMetadataProvider> list = Lists.newArrayList();
+ list.add(mdProvider.getMetadataProvider());
+ planner.registerMetadataProviders(list);
+ RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list);
+
+ final RelNode node = new DummyNode(cluster, cluster.traitSet());
+
+ node.getCluster().setMetadataProvider(
+ new CachingRelMetadataProvider(chainedProvider, planner));
+
+ planner.setRoot(node);
+
+ planner.findBestExp();
+
+ // Matches 3 times: 2 times the original node, 1 time the new node created by the rule
+ assertEquals(3, DummyRule.INSTANCE.numberMatches);
+ // It is fired only once: on the original node
+ assertEquals(1, DummyRule.INSTANCE.numberOnMatch);
+ }
+
+ public static class DummyRule extends RelOptRule {
+
+ public static final DummyRule INSTANCE =
+ new DummyRule();
+
+ public int numberMatches;
+ public int numberOnMatch;
+
+ private DummyRule() {
+ super(operand(RelNode.class, any()));
+ numberMatches = 0;
+ numberOnMatch = 0;
+ }
+
+ @Override
+ public boolean matches(RelOptRuleCall call) {
+ final RelNode node = call.rel(0);
+
+ numberMatches++;
+
+ HiveRulesRegistry registry = call.getPlanner().
+ getContext().unwrap(HiveRulesRegistry.class);
+
+ // If this operator has been visited already by the rule,
+ // we do not need to apply the optimization
+ if (registry != null && registry.getVisited(this).contains(node)) {
+ return false;
+ }
+
+ return true;
+ }
+
+ @Override
+ public void onMatch(RelOptRuleCall call) {
+ final RelNode node = call.rel(0);
+
+ numberOnMatch++;
+
+ // If we have fired it already once, we return and the test will fail
+ if (numberOnMatch > 1) {
+ return;
+ }
+
+ // Register that we have visited this operator in this rule
+ HiveRulesRegistry registry = call.getPlanner().
+ getContext().unwrap(HiveRulesRegistry.class);
+ if (registry != null) {
+ registry.registerVisited(this, node);
+ }
+
+ // We create a new op if it is the first time we fire the rule
+ final RelNode newNode = new DummyNode(node.getCluster(), node.getTraitSet());
+ // We register it so we do not fire the rule on it again
+ if (registry != null) {
+ registry.registerVisited(this, newNode);
+ }
+
+ call.transformTo(newNode);
+
+ }
+ }
+
+ public static class DummyNode extends AbstractRelNode {
+
+ protected DummyNode(RelOptCluster cluster, RelTraitSet traits) {
+ super(cluster, cluster.traitSet());
+ }
+
+ @Override
+ protected RelDataType deriveRowType() {
+ return new RelRecordType(Lists.<RelDataTypeField>newArrayList());
+ }
+ }
+
+
+}
[04/10] hive git commit: HIVE-12032: Add unit test for HIVE-9855 (Wei
Zheng via Jason Dere)
Posted by se...@apache.org.
HIVE-12032: Add unit test for HIVE-9855 (Wei Zheng via Jason Dere)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/04febfd6
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/04febfd6
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/04febfd6
Branch: refs/heads/llap
Commit: 04febfd625286058066c1b57b62d278b7fb51d6f
Parents: cc2adc7
Author: Jason Dere <jd...@hortonworks.com>
Authored: Fri Oct 9 10:39:50 2015 -0700
Committer: Jason Dere <jd...@hortonworks.com>
Committed: Fri Oct 9 10:39:50 2015 -0700
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 1 +
.../clientpositive/skewjoin_onesideskew.q | 22 ++
.../clientpositive/skewjoin_onesideskew.q.out | 212 +++++++++++++++++++
3 files changed, 235 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/04febfd6/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 9c9f4cc..ad47fac 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -42,6 +42,7 @@ minimr.query.files=auto_sortmerge_join_16.q,\
schemeAuthority2.q,\
scriptfile1.q,\
scriptfile1_win.q,\
+ skewjoin_onesideskew.q,\
stats_counter.q,\
stats_counter_partitioned.q,\
table_nonprintable.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/04febfd6/ql/src/test/queries/clientpositive/skewjoin_onesideskew.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/skewjoin_onesideskew.q b/ql/src/test/queries/clientpositive/skewjoin_onesideskew.q
new file mode 100644
index 0000000..371f05c
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/skewjoin_onesideskew.q
@@ -0,0 +1,22 @@
+set hive.auto.convert.join=false;
+set hive.optimize.skewjoin=true;
+set hive.skewjoin.key=2;
+
+
+DROP TABLE IF EXISTS skewtable;
+CREATE TABLE skewtable (key STRING, value STRING) STORED AS TEXTFILE;
+INSERT INTO TABLE skewtable VALUES ("0", "val_0");
+INSERT INTO TABLE skewtable VALUES ("0", "val_0");
+INSERT INTO TABLE skewtable VALUES ("0", "val_0");
+
+DROP TABLE IF EXISTS nonskewtable;
+CREATE TABLE nonskewtable (key STRING, value STRING) STORED AS TEXTFILE;
+INSERT INTO TABLE nonskewtable VALUES ("1", "val_1");
+INSERT INTO TABLE nonskewtable VALUES ("2", "val_2");
+
+EXPLAIN
+CREATE TABLE result AS SELECT a.* FROM skewtable a JOIN nonskewtable b ON a.key=b.key;
+CREATE TABLE result AS SELECT a.* FROM skewtable a JOIN nonskewtable b ON a.key=b.key;
+
+SELECT * FROM result;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/04febfd6/ql/src/test/results/clientpositive/skewjoin_onesideskew.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/skewjoin_onesideskew.q.out b/ql/src/test/results/clientpositive/skewjoin_onesideskew.q.out
new file mode 100644
index 0000000..f8cde9b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/skewjoin_onesideskew.q.out
@@ -0,0 +1,212 @@
+PREHOOK: query: DROP TABLE IF EXISTS skewtable
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS skewtable
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE skewtable (key STRING, value STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@skewtable
+POSTHOOK: query: CREATE TABLE skewtable (key STRING, value STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@skewtable
+PREHOOK: query: INSERT INTO TABLE skewtable VALUES ("0", "val_0")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@skewtable
+POSTHOOK: query: INSERT INTO TABLE skewtable VALUES ("0", "val_0")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@skewtable
+POSTHOOK: Lineage: skewtable.key SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: skewtable.value SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: INSERT INTO TABLE skewtable VALUES ("0", "val_0")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@skewtable
+POSTHOOK: query: INSERT INTO TABLE skewtable VALUES ("0", "val_0")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@skewtable
+POSTHOOK: Lineage: skewtable.key SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: skewtable.value SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: INSERT INTO TABLE skewtable VALUES ("0", "val_0")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__3
+PREHOOK: Output: default@skewtable
+POSTHOOK: query: INSERT INTO TABLE skewtable VALUES ("0", "val_0")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__3
+POSTHOOK: Output: default@skewtable
+POSTHOOK: Lineage: skewtable.key SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: skewtable.value SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: DROP TABLE IF EXISTS nonskewtable
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS nonskewtable
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE nonskewtable (key STRING, value STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@nonskewtable
+POSTHOOK: query: CREATE TABLE nonskewtable (key STRING, value STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@nonskewtable
+PREHOOK: query: INSERT INTO TABLE nonskewtable VALUES ("1", "val_1")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__4
+PREHOOK: Output: default@nonskewtable
+POSTHOOK: query: INSERT INTO TABLE nonskewtable VALUES ("1", "val_1")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__4
+POSTHOOK: Output: default@nonskewtable
+POSTHOOK: Lineage: nonskewtable.key SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: nonskewtable.value SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: INSERT INTO TABLE nonskewtable VALUES ("2", "val_2")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__5
+PREHOOK: Output: default@nonskewtable
+POSTHOOK: query: INSERT INTO TABLE nonskewtable VALUES ("2", "val_2")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__5
+POSTHOOK: Output: default@nonskewtable
+POSTHOOK: Lineage: nonskewtable.key SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: nonskewtable.value SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: EXPLAIN
+CREATE TABLE result AS SELECT a.* FROM skewtable a JOIN nonskewtable b ON a.key=b.key
+PREHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: query: EXPLAIN
+CREATE TABLE result AS SELECT a.* FROM skewtable a JOIN nonskewtable b ON a.key=b.key
+POSTHOOK: type: CREATETABLE_AS_SELECT
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-5 depends on stages: Stage-1 , consists of Stage-6, Stage-0
+ Stage-6
+ Stage-4 depends on stages: Stage-6
+ Stage-0 depends on stages: Stage-4
+ Stage-7 depends on stages: Stage-0
+ Stage-2 depends on stages: Stage-7
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ value expressions: value (type: string)
+ TableScan
+ alias: b
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ handleSkewJoin: true
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.result
+
+ Stage: Stage-5
+ Conditional Operator
+
+ Stage: Stage-6
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ 1
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ 1
+ TableScan
+ HashTable Sink Operator
+ keys:
+ 0 reducesinkkey0 (type: string)
+ 1 reducesinkkey0 (type: string)
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 reducesinkkey0 (type: string)
+ 1 reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.result
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-0
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-7
+ Create Table Operator:
+ Create Table
+ columns: key string, value string
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
+ serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.result
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+PREHOOK: query: CREATE TABLE result AS SELECT a.* FROM skewtable a JOIN nonskewtable b ON a.key=b.key
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@nonskewtable
+PREHOOK: Input: default@skewtable
+PREHOOK: Output: database:default
+PREHOOK: Output: default@result
+POSTHOOK: query: CREATE TABLE result AS SELECT a.* FROM skewtable a JOIN nonskewtable b ON a.key=b.key
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@nonskewtable
+POSTHOOK: Input: default@skewtable
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@result
+PREHOOK: query: SELECT * FROM result
+PREHOOK: type: QUERY
+PREHOOK: Input: default@result
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM result
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@result
+#### A masked pattern was here ####
[05/10] hive git commit: HIVE-12038 : Fix the 'overall' section in
the HiveQA report (Szehon, reviewed by Sergio Pena)
Posted by se...@apache.org.
HIVE-12038 : Fix the 'overall' section in the HiveQA report (Szehon, reviewed by Sergio Pena)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/86f7af66
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/86f7af66
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/86f7af66
Branch: refs/heads/llap
Commit: 86f7af66f03a5b467709a9845d07430b2d729d28
Parents: 04febfd
Author: Szehon Ho <sz...@cloudera.com>
Authored: Fri Oct 9 16:54:10 2015 -0700
Committer: Szehon Ho <sz...@cloudera.com>
Committed: Fri Oct 9 16:54:10 2015 -0700
----------------------------------------------------------------------
.../hive/ptest/execution/JIRAService.java | 96 +++++++++++---------
.../hive/ptest/execution/TestJIRAService.java | 89 +++++++++++++++++-
...RAService.testErrorWithMessages.approved.txt | 20 ++++
...ervice.testErrorWithoutMessages.approved.txt | 14 +++
.../TestJIRAService.testFailAdd.approved.txt | 21 +++++
.../TestJIRAService.testFailNoAdd.approved.txt | 21 +++++
.../TestJIRAService.testSuccessAdd.approved.txt | 16 ++++
...estJIRAService.testSuccessNoAdd.approved.txt | 16 ++++
.../resources/test-configuration.properties | 2 +
9 files changed, 249 insertions(+), 46 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/86f7af66/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/JIRAService.java
----------------------------------------------------------------------
diff --git a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/JIRAService.java b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/JIRAService.java
index 37127ea..03a6321 100644
--- a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/JIRAService.java
+++ b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/JIRAService.java
@@ -101,37 +101,37 @@ class JIRAService {
void postComment(boolean error, int numTestsExecuted, SortedSet<String> failedTests,
List<String> messages, Set<String> addedTests) {
- DefaultHttpClient httpClient = new DefaultHttpClient();
- try {
- BuildInfo buildInfo = formatBuildTag(mBuildTag);
- String buildTagForLogs = formatBuildTagForLogs(mBuildTag);
- List<String> comments = Lists.newArrayList();
- comments.add("");
- comments.add("");
- if (!failedTests.isEmpty()) {
- comments.add("{color:red}Overall{color}: -1 at least one tests failed");
- } else if (numTestsExecuted == 0) {
- comments.add("{color:red}Overall{color}: -1 no tests executed");
- } else if (error) {
- comments.add("{color:red}Overall{color}: -1 build exited with an error");
- } else {
- comments.add("{color:green}Overall{color}: +1 all checks pass");
- }
- comments.add("");
- if (!mPatch.isEmpty()) {
- comments.add("Here are the results of testing the latest attachment:");
- comments.add(mPatch);
- }
- comments.add("");
+ String comments = generateComments(error, numTestsExecuted, failedTests, messages, addedTests);
+ publishComments(comments);
+ }
+
+ @VisibleForTesting
+ String generateComments(boolean error, int numTestsExecuted, SortedSet<String> failedTests,
+ List<String> messages, Set<String> addedTests) {
+ BuildInfo buildInfo = formatBuildTag(mBuildTag);
+ String buildTagForLogs = formatBuildTagForLogs(mBuildTag);
+ List<String> comments = Lists.newArrayList();
+ comments.add("");
+ comments.add("");
+ if (!mPatch.isEmpty()) {
+ comments.add("Here are the results of testing the latest attachment:");
+ comments.add(mPatch);
+ }
+ comments.add("");
+ if (error) {
+ comments.add(formatError("-1 due to build exiting with an error"));
+ } else {
if (addedTests.size() > 0) {
comments.add(formatSuccess("+1 due to " + addedTests.size() + " test(s) being added or modified."));
} else {
comments.add(formatError("-1 due to no test(s) being added or modified."));
}
comments.add("");
- if (numTestsExecuted > 0) {
+ if (numTestsExecuted == 0) {
+ comments.add(formatError("-1 due to no tests executed"));
+ } else {
if (failedTests.isEmpty()) {
- comments.add(formatSuccess("+1 " + numTestsExecuted + " tests passed"));
+ comments.add(formatSuccess("+1 due to " + numTestsExecuted + " tests passed"));
} else {
comments.add(formatError("-1 due to " + failedTests.size()
+ " failed/errored test(s), " + numTestsExecuted + " tests executed"));
@@ -140,28 +140,34 @@ class JIRAService {
comments.addAll(failedTests);
comments.add("{noformat}");
}
- comments.add("");
- }
- comments.add("Test results: " + mJenkinsURL + "/" +
- buildInfo.getFormattedBuildTag() + "/testReport");
- comments.add("Console output: " + mJenkinsURL + "/" +
- buildInfo.getFormattedBuildTag() + "/console");
- comments.add("Test logs: " + mLogsURL + buildTagForLogs);
- comments.add("");
- if (!messages.isEmpty()) {
- comments.add("Messages:");
- comments.add("{noformat}");
- comments.addAll(trimMessages(messages));
- comments.add("{noformat}");
- comments.add("");
}
- comments.add("This message is automatically generated.");
- String attachmentId = parseAttachementId(mPatch);
+ }
+ comments.add("");
+ comments.add("Test results: " + mJenkinsURL + "/" +
+ buildInfo.getFormattedBuildTag() + "/testReport");
+ comments.add("Console output: " + mJenkinsURL + "/" +
+ buildInfo.getFormattedBuildTag() + "/console");
+ comments.add("Test logs: " + mLogsURL + buildTagForLogs);
+ comments.add("");
+ if (!messages.isEmpty()) {
+ comments.add("Messages:");
+ comments.add("{noformat}");
+ comments.addAll(trimMessages(messages));
+ comments.add("{noformat}");
comments.add("");
- comments.add("ATTACHMENT ID: " + attachmentId +
- " - " + buildInfo.getBuildName());
- mLogger.info("Comment: " + Joiner.on("\n").join(comments));
- String body = Joiner.on("\n").join(comments);
+ }
+ comments.add("This message is automatically generated.");
+ String attachmentId = parseAttachementId(mPatch);
+ comments.add("");
+ comments.add("ATTACHMENT ID: " + attachmentId +
+ " - " + buildInfo.getBuildName());
+ mLogger.info("Comment: " + Joiner.on("\n").join(comments));
+ return Joiner.on("\n").join(comments);
+ }
+
+ void publishComments(String comments) {
+ DefaultHttpClient httpClient = new DefaultHttpClient();
+ try {
String url = String.format("%s/rest/api/2/issue/%s/comment", mUrl, mName);
URL apiURL = new URL(mUrl);
httpClient.getCredentialsProvider()
@@ -174,7 +180,7 @@ class JIRAService {
httpClient.addRequestInterceptor(new PreemptiveAuth(), 0);
HttpPost request = new HttpPost(url);
ObjectMapper mapper = new ObjectMapper();
- StringEntity params = new StringEntity(mapper.writeValueAsString(new Body(body)));
+ StringEntity params = new StringEntity(mapper.writeValueAsString(new Body(comments)));
request.addHeader("Content-Type", "application/json");
request.setEntity(params);
HttpResponse httpResponse = httpClient.execute(request, localcontext);
http://git-wip-us.apache.org/repos/asf/hive/blob/86f7af66/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestJIRAService.java
----------------------------------------------------------------------
diff --git a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestJIRAService.java b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestJIRAService.java
index 2ce1dc9..b97b890 100644
--- a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestJIRAService.java
+++ b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestJIRAService.java
@@ -20,13 +20,42 @@ package org.apache.hive.ptest.execution;
import com.google.common.collect.Lists;
+import java.util.ArrayList;
+import java.util.HashSet;
import java.util.List;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import com.google.common.io.Resources;
import junit.framework.Assert;
import org.apache.hive.ptest.execution.JIRAService.BuildInfo;
+import org.apache.hive.ptest.execution.conf.TestConfiguration;
+import org.approvaltests.Approvals;
+import org.approvaltests.reporters.JunitReporter;
+import org.approvaltests.reporters.UseReporter;
+import org.junit.Before;
+import org.junit.BeforeClass;
import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@UseReporter(JunitReporter.class)
+public class TestJIRAService extends AbstractTestPhase {
+
+ TestConfiguration conf;
+ JIRAService jiraService;
+
+ @Before
+ public void setup() throws Exception {
+ initialize(getClass().getSimpleName());
+ conf = TestConfiguration.fromInputStream(
+ Resources.getResource("test-configuration.properties").openStream(), logger);
+ conf.setPatch("https://HIVE-10000.patch");
+ jiraService = new JIRAService(logger, conf, "tag-10");
+ }
-public class TestJIRAService {
@Test
public void testFormatBuildTagPositive() throws Throwable {
@@ -75,4 +104,62 @@ public class TestJIRAService {
expected.add(0, JIRAService.TRIMMED_MESSAGE);
Assert.assertEquals(expected, JIRAService.trimMessages(messages));
}
+
+ @Test
+ public void testErrorWithMessages() throws Exception {
+ SortedSet<String> failedTests = new TreeSet<String>();
+ List<String> messages = new ArrayList<String>();
+ messages.add("Error message 1");
+ messages.add("Error message 2");
+ Set<String> addedTests = new HashSet<String>();
+ Approvals.verify(jiraService.generateComments(true, 0, failedTests, messages, addedTests));
+ }
+
+ @Test
+ public void testErrorWithoutMessages() throws Exception {
+ SortedSet<String> failedTests = new TreeSet<String>();
+ List<String> messages = new ArrayList<String>();
+ Set<String> addedTests = new HashSet<String>();
+ Approvals.verify(jiraService.generateComments(true, 0, failedTests, messages, addedTests));
+ }
+
+ @Test
+ public void testFailNoAdd() throws Exception {
+ SortedSet<String> failedTests = new TreeSet<String>();
+ failedTests.add("FailedTest1");
+ failedTests.add("FailedTest2");
+ List<String> messages = new ArrayList<String>();
+ Set<String> addedTests = new HashSet<String>();
+ Approvals.verify(jiraService.generateComments(false, 5, failedTests, messages, addedTests));
+ }
+
+ @Test
+ public void testFailAdd() throws Exception {
+ SortedSet<String> failedTests = new TreeSet<String>();
+ failedTests.add("FailedTest1");
+ failedTests.add("FailedTest2");
+ List<String> messages = new ArrayList<String>();
+ Set<String> addedTests = new HashSet<String>();
+ addedTests.add("AddedTest1");
+ addedTests.add("AddedTest2");
+ Approvals.verify(jiraService.generateComments(false, 5, failedTests, messages, addedTests));
+ }
+
+ @Test
+ public void testSuccessNoAdd() throws Exception {
+ SortedSet<String> failedTests = new TreeSet<String>();
+ List<String> messages = new ArrayList<String>();
+ Set<String> addedTests = new HashSet<String>();
+ Approvals.verify(jiraService.generateComments(false, 5, failedTests, messages, addedTests));
+ }
+
+ @Test
+ public void testSuccessAdd() throws Exception {
+ SortedSet<String> failedTests = new TreeSet<String>();
+ List<String> messages = new ArrayList<String>();
+ Set<String> addedTests = new HashSet<String>();
+ addedTests.add("AddedTest1");
+ addedTests.add("AddedTest2");
+ Approvals.verify(jiraService.generateComments(false, 5, failedTests, messages, addedTests));
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/86f7af66/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestJIRAService.testErrorWithMessages.approved.txt
----------------------------------------------------------------------
diff --git a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestJIRAService.testErrorWithMessages.approved.txt b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestJIRAService.testErrorWithMessages.approved.txt
new file mode 100644
index 0000000..546a5f4
--- /dev/null
+++ b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestJIRAService.testErrorWithMessages.approved.txt
@@ -0,0 +1,20 @@
+
+
+Here are the results of testing the latest attachment:
+https://HIVE-10000.patch
+
+{color:red}ERROR:{color} -1 due to build exiting with an error
+
+Test results: https://builds.apache.org/job/tag/10/testReport
+Console output: https://builds.apache.org/job/tag/10/console
+Test logs: http://builds.apache.org/logs/tag-10/
+
+Messages:
+{noformat}
+Error message 1
+Error message 2
+{noformat}
+
+This message is automatically generated.
+
+ATTACHMENT ID: - tag
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/86f7af66/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestJIRAService.testErrorWithoutMessages.approved.txt
----------------------------------------------------------------------
diff --git a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestJIRAService.testErrorWithoutMessages.approved.txt b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestJIRAService.testErrorWithoutMessages.approved.txt
new file mode 100644
index 0000000..ec42030
--- /dev/null
+++ b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestJIRAService.testErrorWithoutMessages.approved.txt
@@ -0,0 +1,14 @@
+
+
+Here are the results of testing the latest attachment:
+https://HIVE-10000.patch
+
+{color:red}ERROR:{color} -1 due to build exiting with an error
+
+Test results: https://builds.apache.org/job/tag/10/testReport
+Console output: https://builds.apache.org/job/tag/10/console
+Test logs: http://builds.apache.org/logs/tag-10/
+
+This message is automatically generated.
+
+ATTACHMENT ID: - tag
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/86f7af66/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestJIRAService.testFailAdd.approved.txt
----------------------------------------------------------------------
diff --git a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestJIRAService.testFailAdd.approved.txt b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestJIRAService.testFailAdd.approved.txt
new file mode 100644
index 0000000..07ee6b4
--- /dev/null
+++ b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestJIRAService.testFailAdd.approved.txt
@@ -0,0 +1,21 @@
+
+
+Here are the results of testing the latest attachment:
+https://HIVE-10000.patch
+
+{color:green}SUCCESS:{color} +1 due to 2 test(s) being added or modified.
+
+{color:red}ERROR:{color} -1 due to 2 failed/errored test(s), 5 tests executed
+*Failed tests:*
+{noformat}
+FailedTest1
+FailedTest2
+{noformat}
+
+Test results: https://builds.apache.org/job/tag/10/testReport
+Console output: https://builds.apache.org/job/tag/10/console
+Test logs: http://builds.apache.org/logs/tag-10/
+
+This message is automatically generated.
+
+ATTACHMENT ID: - tag
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/86f7af66/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestJIRAService.testFailNoAdd.approved.txt
----------------------------------------------------------------------
diff --git a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestJIRAService.testFailNoAdd.approved.txt b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestJIRAService.testFailNoAdd.approved.txt
new file mode 100644
index 0000000..1f86cfc
--- /dev/null
+++ b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestJIRAService.testFailNoAdd.approved.txt
@@ -0,0 +1,21 @@
+
+
+Here are the results of testing the latest attachment:
+https://HIVE-10000.patch
+
+{color:red}ERROR:{color} -1 due to no test(s) being added or modified.
+
+{color:red}ERROR:{color} -1 due to 2 failed/errored test(s), 5 tests executed
+*Failed tests:*
+{noformat}
+FailedTest1
+FailedTest2
+{noformat}
+
+Test results: https://builds.apache.org/job/tag/10/testReport
+Console output: https://builds.apache.org/job/tag/10/console
+Test logs: http://builds.apache.org/logs/tag-10/
+
+This message is automatically generated.
+
+ATTACHMENT ID: - tag
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/86f7af66/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestJIRAService.testSuccessAdd.approved.txt
----------------------------------------------------------------------
diff --git a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestJIRAService.testSuccessAdd.approved.txt b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestJIRAService.testSuccessAdd.approved.txt
new file mode 100644
index 0000000..bc8839c
--- /dev/null
+++ b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestJIRAService.testSuccessAdd.approved.txt
@@ -0,0 +1,16 @@
+
+
+Here are the results of testing the latest attachment:
+https://HIVE-10000.patch
+
+{color:green}SUCCESS:{color} +1 due to 2 test(s) being added or modified.
+
+{color:green}SUCCESS:{color} +1 due to 5 tests passed
+
+Test results: https://builds.apache.org/job/tag/10/testReport
+Console output: https://builds.apache.org/job/tag/10/console
+Test logs: http://builds.apache.org/logs/tag-10/
+
+This message is automatically generated.
+
+ATTACHMENT ID: - tag
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/86f7af66/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestJIRAService.testSuccessNoAdd.approved.txt
----------------------------------------------------------------------
diff --git a/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestJIRAService.testSuccessNoAdd.approved.txt b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestJIRAService.testSuccessNoAdd.approved.txt
new file mode 100644
index 0000000..8ea4d37
--- /dev/null
+++ b/testutils/ptest2/src/test/java/org/apache/hive/ptest/execution/TestJIRAService.testSuccessNoAdd.approved.txt
@@ -0,0 +1,16 @@
+
+
+Here are the results of testing the latest attachment:
+https://HIVE-10000.patch
+
+{color:red}ERROR:{color} -1 due to no test(s) being added or modified.
+
+{color:green}SUCCESS:{color} +1 due to 5 tests passed
+
+Test results: https://builds.apache.org/job/tag/10/testReport
+Console output: https://builds.apache.org/job/tag/10/console
+Test logs: http://builds.apache.org/logs/tag-10/
+
+This message is automatically generated.
+
+ATTACHMENT ID: - tag
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/86f7af66/testutils/ptest2/src/test/resources/test-configuration.properties
----------------------------------------------------------------------
diff --git a/testutils/ptest2/src/test/resources/test-configuration.properties b/testutils/ptest2/src/test/resources/test-configuration.properties
index caba9ea..73a683b 100644
--- a/testutils/ptest2/src/test/resources/test-configuration.properties
+++ b/testutils/ptest2/src/test/resources/test-configuration.properties
@@ -6,6 +6,8 @@ host.localhost.host = localhost
host.localhost.threads = 2
host.localhost.localDirs = /home/hiveptest
+logsURL = http://builds.apache.org/logs/
+
workingDirectory = /tmp/hive-ptest-units/working/dir
profileDirectory = /etc/hiveptest/conf
[02/10] hive git commit: HIVE-12021: HivePreFilteringRule may
introduce wrong common operands (Jesus Camacho Rodriguez,
reviewed by Laljo John Pullokkaran)
Posted by se...@apache.org.
HIVE-12021: HivePreFilteringRule may introduce wrong common operands (Jesus Camacho Rodriguez, reviewed by Laljo John Pullokkaran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/be05e32e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/be05e32e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/be05e32e
Branch: refs/heads/llap
Commit: be05e32e58f54c0185551cb61d9769d2ceb5bbdd
Parents: 5201f18
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Sun Oct 4 11:40:30 2015 +0100
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Fri Oct 9 09:04:49 2015 +0100
----------------------------------------------------------------------
.../calcite/rules/HivePreFilteringRule.java | 58 ++++++++------
.../clientpositive/filter_cond_pushdown.q | 5 ++
.../clientpositive/filter_cond_pushdown.q.out | 80 ++++++++++++++++++++
3 files changed, 121 insertions(+), 22 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/be05e32e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
index 349c7f8..5824127 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
@@ -47,6 +47,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.LinkedHashMultimap;
import com.google.common.collect.Multimap;
+import com.google.common.collect.Sets;
public class HivePreFilteringRule extends RelOptRule {
@@ -178,54 +179,67 @@ public class HivePreFilteringRule extends RelOptRule {
assert condition.getKind() == SqlKind.OR;
Multimap<String,RexNode> reductionCondition = LinkedHashMultimap.create();
+ // Data structure to control whether a certain reference is present in every operand
+ Set<String> refsInAllOperands = null;
+
// 1. We extract the information necessary to create the predicate for the new
// filter; currently we support comparison functions, in and between
ImmutableList<RexNode> operands = RexUtil.flattenOr(((RexCall) condition).getOperands());
- for (RexNode operand: operands) {
+ for (int i = 0; i < operands.size(); i++) {
+ final RexNode operand = operands.get(i);
+
final RexNode operandCNF = RexUtil.toCnf(rexBuilder, operand);
final List<RexNode> conjunctions = RelOptUtil.conjunctions(operandCNF);
- boolean addedToReductionCondition = false; // Flag to control whether we have added a new factor
- // to the reduction predicate
+
+ Set<String> refsInCurrentOperand = Sets.newHashSet();
for (RexNode conjunction: conjunctions) {
+ // We do not know what it is, we bail out for safety
if (!(conjunction instanceof RexCall)) {
- continue;
+ return new ArrayList<>();
}
RexCall conjCall = (RexCall) conjunction;
+ RexNode ref = null;
if(COMPARISON.contains(conjCall.getOperator().getKind())) {
if (conjCall.operands.get(0) instanceof RexInputRef &&
conjCall.operands.get(1) instanceof RexLiteral) {
- reductionCondition.put(conjCall.operands.get(0).toString(),
- conjCall);
- addedToReductionCondition = true;
+ ref = conjCall.operands.get(0);
} else if (conjCall.operands.get(1) instanceof RexInputRef &&
conjCall.operands.get(0) instanceof RexLiteral) {
- reductionCondition.put(conjCall.operands.get(1).toString(),
- conjCall);
- addedToReductionCondition = true;
+ ref = conjCall.operands.get(1);
+ } else {
+ // We do not know what it is, we bail out for safety
+ return new ArrayList<>();
}
} else if(conjCall.getOperator().getKind().equals(SqlKind.IN)) {
- reductionCondition.put(conjCall.operands.get(0).toString(),
- conjCall);
- addedToReductionCondition = true;
+ ref = conjCall.operands.get(0);
} else if(conjCall.getOperator().getKind().equals(SqlKind.BETWEEN)) {
- reductionCondition.put(conjCall.operands.get(1).toString(),
- conjCall);
- addedToReductionCondition = true;
+ ref = conjCall.operands.get(1);
+ } else {
+ // We do not know what it is, we bail out for safety
+ return new ArrayList<>();
}
+
+ String stringRef = ref.toString();
+ reductionCondition.put(stringRef, conjCall);
+ refsInCurrentOperand.add(stringRef);
}
- // If we did not add any factor, we can bail out
- if (!addedToReductionCondition) {
+ // Updates the references that are present in every operand up till now
+ if (i == 0) {
+ refsInAllOperands = refsInCurrentOperand;
+ } else {
+ refsInAllOperands = Sets.intersection(refsInAllOperands, refsInCurrentOperand);
+ }
+ // If we did not add any factor or there are no common factors, we can bail out
+ if (refsInAllOperands.isEmpty()) {
return new ArrayList<>();
}
}
// 2. We gather the common factors and return them
List<RexNode> commonOperands = new ArrayList<>();
- for (Entry<String,Collection<RexNode>> pair : reductionCondition.asMap().entrySet()) {
- if (pair.getValue().size() == operands.size()) {
- commonOperands.add(RexUtil.composeDisjunction(rexBuilder, pair.getValue(), false));
- }
+ for (String ref : refsInAllOperands) {
+ commonOperands.add(RexUtil.composeDisjunction(rexBuilder, reductionCondition.get(ref), false));
}
return commonOperands;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/be05e32e/ql/src/test/queries/clientpositive/filter_cond_pushdown.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/filter_cond_pushdown.q b/ql/src/test/queries/clientpositive/filter_cond_pushdown.q
index 5e23b71..2425706 100644
--- a/ql/src/test/queries/clientpositive/filter_cond_pushdown.q
+++ b/ql/src/test/queries/clientpositive/filter_cond_pushdown.q
@@ -17,3 +17,8 @@ JOIN (
JOIN (SELECT * FROM cbo_t3 t3 WHERE c_int=1) t3 ON t2.key=t3.c_int
WHERE ((t2.key=t3.key) AND (t2.c_float + t3.c_float > 2)) OR
((t2.key=t3.key) AND (t2.c_int + t3.c_int > 2))) t4 ON t1.key=t4.key;
+
+EXPLAIN
+SELECT f.key, f.value, m.value
+FROM src f JOIN src m ON(f.key = m.key AND m.value is not null AND m.value !='')
+WHERE (f.value IN ('2008-04-08','2008-04-10') AND f.value IN ('2008-04-08','2008-04-09') AND m.value='2008-04-10') OR (m.value='2008-04-08');
http://git-wip-us.apache.org/repos/asf/hive/blob/be05e32e/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out b/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out
index af42d5c..99eb3f7 100644
--- a/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out
+++ b/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out
@@ -380,3 +380,83 @@ STAGE PLANS:
Processor Tree:
ListSink
+PREHOOK: query: EXPLAIN
+SELECT f.key, f.value, m.value
+FROM src f JOIN src m ON(f.key = m.key AND m.value is not null AND m.value !='')
+WHERE (f.value IN ('2008-04-08','2008-04-10') AND f.value IN ('2008-04-08','2008-04-09') AND m.value='2008-04-10') OR (m.value='2008-04-08')
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT f.key, f.value, m.value
+FROM src f JOIN src m ON(f.key = m.key AND m.value is not null AND m.value !='')
+WHERE (f.value IN ('2008-04-08','2008-04-10') AND f.value IN ('2008-04-08','2008-04-09') AND m.value='2008-04-10') OR (m.value='2008-04-08')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: f
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ TableScan
+ alias: f
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((((value = '2008-04-10') or (value = '2008-04-08')) and value is not null) and (value <> '')) and key is not null) (type: boolean)
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((_col1) IN ('2008-04-08', '2008-04-10') and (_col1) IN ('2008-04-08', '2008-04-09') and (_col3 = '2008-04-10')) or (_col3 = '2008-04-08')) (type: boolean)
+ Statistics: Num rows: 171 Data size: 1816 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 171 Data size: 1816 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 171 Data size: 1816 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
[09/10] hive git commit: HIVE-12046: Re-create spark client if
connection is dropped (Jimmy, reviewed by Xuefu)
Posted by se...@apache.org.
HIVE-12046: Re-create spark client if connection is dropped (Jimmy, reviewed by Xuefu)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/09f5e843
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/09f5e843
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/09f5e843
Branch: refs/heads/llap
Commit: 09f5e8436890043135a44ae9ef84625a53ec63ec
Parents: ec8c793
Author: Jimmy Xiang <jx...@cloudera.com>
Authored: Mon Oct 5 14:52:31 2015 -0700
Committer: Jimmy Xiang <jx...@cloudera.com>
Committed: Mon Oct 12 08:43:30 2015 -0700
----------------------------------------------------------------------
.../ql/exec/spark/RemoteHiveSparkClient.java | 22 ++++++++++++++++++++
.../apache/hive/spark/client/SparkClient.java | 5 +++++
.../hive/spark/client/SparkClientImpl.java | 5 +++++
.../org/apache/hive/spark/client/rpc/Rpc.java | 4 ++++
4 files changed, 36 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/09f5e843/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/RemoteHiveSparkClient.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/RemoteHiveSparkClient.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/RemoteHiveSparkClient.java
index 7d43160..2e8d1d3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/RemoteHiveSparkClient.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/RemoteHiveSparkClient.java
@@ -75,6 +75,7 @@ public class RemoteHiveSparkClient implements HiveSparkClient {
private static final long MAX_PREWARM_TIME = 30000; // 30s
private static final transient Splitter CSV_SPLITTER = Splitter.on(",").omitEmptyStrings();
+ private transient Map<String, String> conf;
private transient SparkClient remoteClient;
private transient SparkConf sparkConf;
private transient HiveConf hiveConf;
@@ -89,6 +90,11 @@ public class RemoteHiveSparkClient implements HiveSparkClient {
sparkClientTimtout = hiveConf.getTimeVar(HiveConf.ConfVars.SPARK_CLIENT_FUTURE_TIMEOUT,
TimeUnit.SECONDS);
sparkConf = HiveSparkClientFactory.generateSparkConf(conf);
+ this.conf = conf;
+ createRemoteClient();
+ }
+
+ private void createRemoteClient() throws Exception {
remoteClient = SparkClientFactory.createClient(conf, hiveConf);
if (HiveConf.getBoolVar(hiveConf, ConfVars.HIVE_PREWARM_ENABLED) &&
@@ -155,6 +161,20 @@ public class RemoteHiveSparkClient implements HiveSparkClient {
@Override
public SparkJobRef execute(final DriverContext driverContext, final SparkWork sparkWork) throws Exception {
+ if (hiveConf.get("spark.master").startsWith("yarn-") && !remoteClient.isActive()) {
+ // Re-create the remote client if not active any more
+ close();
+ createRemoteClient();
+ }
+
+ try {
+ return submit(driverContext, sparkWork);
+ } catch (Throwable cause) {
+ throw new Exception("Failed to submit Spark work, please retry later", cause);
+ }
+ }
+
+ private SparkJobRef submit(final DriverContext driverContext, final SparkWork sparkWork) throws Exception {
final Context ctx = driverContext.getCtx();
final HiveConf hiveConf = (HiveConf) ctx.getConf();
refreshLocalResources(sparkWork, hiveConf);
@@ -246,6 +266,8 @@ public class RemoteHiveSparkClient implements HiveSparkClient {
if (remoteClient != null) {
remoteClient.stop();
}
+ localFiles.clear();
+ localJars.clear();
}
private static class JobStatusJob implements Job<Serializable> {
http://git-wip-us.apache.org/repos/asf/hive/blob/09f5e843/spark-client/src/main/java/org/apache/hive/spark/client/SparkClient.java
----------------------------------------------------------------------
diff --git a/spark-client/src/main/java/org/apache/hive/spark/client/SparkClient.java b/spark-client/src/main/java/org/apache/hive/spark/client/SparkClient.java
index 13c2dbc..3e921a5 100644
--- a/spark-client/src/main/java/org/apache/hive/spark/client/SparkClient.java
+++ b/spark-client/src/main/java/org/apache/hive/spark/client/SparkClient.java
@@ -94,4 +94,9 @@ public interface SparkClient extends Serializable {
* Get default parallelism. For standalone mode, this can be used to get total number of cores.
*/
Future<Integer> getDefaultParallelism();
+
+ /**
+ * Check if remote context is still active.
+ */
+ boolean isActive();
}
http://git-wip-us.apache.org/repos/asf/hive/blob/09f5e843/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java
----------------------------------------------------------------------
diff --git a/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java b/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java
index 2546a46..ceebbb3 100644
--- a/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java
+++ b/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java
@@ -178,6 +178,11 @@ class SparkClientImpl implements SparkClient {
return run(new GetDefaultParallelismJob());
}
+ @Override
+ public boolean isActive() {
+ return isAlive && driverRpc.isActive();
+ }
+
void cancel(String jobId) {
protocol.cancel(jobId);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/09f5e843/spark-client/src/main/java/org/apache/hive/spark/client/rpc/Rpc.java
----------------------------------------------------------------------
diff --git a/spark-client/src/main/java/org/apache/hive/spark/client/rpc/Rpc.java b/spark-client/src/main/java/org/apache/hive/spark/client/rpc/Rpc.java
index 6d0b7cc..b2f133b 100644
--- a/spark-client/src/main/java/org/apache/hive/spark/client/rpc/Rpc.java
+++ b/spark-client/src/main/java/org/apache/hive/spark/client/rpc/Rpc.java
@@ -259,6 +259,10 @@ public class Rpc implements Closeable {
return call(msg, Void.class);
}
+ public boolean isActive() {
+ return channel.isActive();
+ }
+
/**
* Send an RPC call to the remote endpoint and returns a future that can be used to monitor the
* operation.
[07/10] hive git commit: HIVE-12025 refactor bucketId generating code
(Eugene Koifman, reviewed by Prashanth Jayachandran, Sergey Shelukhin,
Elliot West)
Posted by se...@apache.org.
HIVE-12025 refactor bucketId generating code (Eugene Koifman, reviewed by Prashanth Jayachandran, Sergey Shelukhin, Elliot West)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ba83fd7b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ba83fd7b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ba83fd7b
Branch: refs/heads/llap
Commit: ba83fd7bffde4b6be8c03768a0b421c7b93f3ab1
Parents: 6edb2c2
Author: Eugene Koifman <ek...@hortonworks.com>
Authored: Sat Oct 10 10:08:46 2015 -0700
Committer: Eugene Koifman <ek...@hortonworks.com>
Committed: Sat Oct 10 10:08:46 2015 -0700
----------------------------------------------------------------------
.../mutate/worker/BucketIdResolverImpl.java | 16 ++++---------
.../mutate/worker/TestBucketIdResolverImpl.java | 2 +-
.../hadoop/hive/ql/exec/FileSinkOperator.java | 9 ++++---
.../hadoop/hive/ql/exec/ReduceSinkOperator.java | 23 ++++++++----------
.../hive/ql/io/DefaultHivePartitioner.java | 3 ++-
.../hive/ql/udf/generic/GenericUDFHash.java | 11 ++++-----
.../hive/ql/lockmgr/TestDbTxnManager.java | 6 +++--
.../objectinspector/ObjectInspectorUtils.java | 13 ++++++----
.../TestObjectInspectorUtils.java | 25 ++++++++++++++++++++
9 files changed, 64 insertions(+), 44 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/ba83fd7b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/BucketIdResolverImpl.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/BucketIdResolverImpl.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/BucketIdResolverImpl.java
index dbed9e1..bb9462d 100644
--- a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/BucketIdResolverImpl.java
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/worker/BucketIdResolverImpl.java
@@ -56,21 +56,15 @@ public class BucketIdResolverImpl implements BucketIdResolver {
return record;
}
- /** Based on: {@link org.apache.hadoop.hive.ql.exec.ReduceSinkOperator#computeBucketNumber(Object, int)}. */
@Override
public int computeBucketId(Object record) {
- int bucketId = 1;
-
+ Object[] bucketFieldValues = new Object[bucketFields.length];
+ ObjectInspector[] bucketFiledInspectors = new ObjectInspector[bucketFields.length];
for (int columnIndex = 0; columnIndex < bucketFields.length; columnIndex++) {
- Object columnValue = structObjectInspector.getStructFieldData(record, bucketFields[columnIndex]);
- bucketId = bucketId * 31 + ObjectInspectorUtils.hashCode(columnValue, bucketFields[columnIndex].getFieldObjectInspector());
- }
-
- if (bucketId < 0) {
- bucketId = -1 * bucketId;
+ bucketFieldValues[columnIndex] = structObjectInspector.getStructFieldData(record, bucketFields[columnIndex]);
+ bucketFiledInspectors[columnIndex] = bucketFields[columnIndex].getFieldObjectInspector();
}
-
- return bucketId % totalBuckets;
+ return ObjectInspectorUtils.getBucketNumber(bucketFieldValues, bucketFiledInspectors, totalBuckets);
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/ba83fd7b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/worker/TestBucketIdResolverImpl.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/worker/TestBucketIdResolverImpl.java b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/worker/TestBucketIdResolverImpl.java
index f81373e..5297c5d 100644
--- a/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/worker/TestBucketIdResolverImpl.java
+++ b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/worker/TestBucketIdResolverImpl.java
@@ -23,7 +23,7 @@ public class TestBucketIdResolverImpl {
public void testAttachBucketIdToRecord() {
MutableRecord record = new MutableRecord(1, "hello");
capturingBucketIdResolver.attachBucketIdToRecord(record);
- assertThat(record.rowId, is(new RecordIdentifier(-1L, 8, -1L)));
+ assertThat(record.rowId, is(new RecordIdentifier(-1L, 1, -1L)));
assertThat(record.id, is(1));
assertThat(record.msg.toString(), is("hello"));
}
http://git-wip-us.apache.org/repos/asf/hive/blob/ba83fd7b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
index 39944a9..e247673 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
@@ -791,12 +791,11 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements
if (!multiFileSpray) {
return 0;
} else {
- int keyHashCode = 0;
- for (int i = 0; i < partitionEval.length; i++) {
- Object o = partitionEval[i].evaluate(row);
- keyHashCode = keyHashCode * 31
- + ObjectInspectorUtils.hashCode(o, partitionObjectInspectors[i]);
+ Object[] bucketFieldValues = new Object[partitionEval.length];
+ for(int i = 0; i < partitionEval.length; i++) {
+ bucketFieldValues[i] = partitionEval[i].evaluate(row);
}
+ int keyHashCode = ObjectInspectorUtils.getBucketHashCode(bucketFieldValues, partitionObjectInspectors);
key.setHashCode(keyHashCode);
int bucketNum = prtner.getBucket(key, null, totalFiles);
return bucketMap.get(bucketNum);
http://git-wip-us.apache.org/repos/asf/hive/blob/ba83fd7b/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java
index f1df608..dd08210 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java
@@ -405,27 +405,24 @@ public class ReduceSinkOperator extends TerminalOperator<ReduceSinkDesc>
}
private int computeBucketNumber(Object row, int numBuckets) throws HiveException {
- int buckNum = 0;
-
if (conf.getWriteType() == AcidUtils.Operation.UPDATE ||
conf.getWriteType() == AcidUtils.Operation.DELETE) {
- // We don't need to evalute the hash code. Instead read the bucket number directly from
+ // We don't need to evaluate the hash code. Instead read the bucket number directly from
// the row. I don't need to evaluate any expressions as I know I am reading the ROW__ID
// column directly.
Object recIdValue = acidRowInspector.getStructFieldData(row, recIdField);
- buckNum = bucketInspector.get(recIdInspector.getStructFieldData(recIdValue, bucketField));
+ int buckNum = bucketInspector.get(recIdInspector.getStructFieldData(recIdValue, bucketField));
if (isLogTraceEnabled) {
LOG.trace("Acid choosing bucket number " + buckNum);
}
+ return buckNum;
} else {
+ Object[] bucketFieldValues = new Object[bucketEval.length];
for (int i = 0; i < bucketEval.length; i++) {
- Object o = bucketEval[i].evaluate(row);
- buckNum = buckNum * 31 + ObjectInspectorUtils.hashCode(o, bucketObjectInspectors[i]);
+ bucketFieldValues[i] = bucketEval[i].evaluate(row);
}
+ return ObjectInspectorUtils.getBucketNumber(bucketFieldValues, bucketObjectInspectors, numBuckets);
}
-
- // similar to hive's default partitioner, refer DefaultHivePartitioner
- return (buckNum & Integer.MAX_VALUE) % numBuckets;
}
private void populateCachedDistributionKeys(Object row, int index) throws HiveException {
@@ -476,11 +473,11 @@ public class ReduceSinkOperator extends TerminalOperator<ReduceSinkDesc>
keyHashCode = 1;
}
} else {
- for (int i = 0; i < partitionEval.length; i++) {
- Object o = partitionEval[i].evaluate(row);
- keyHashCode = keyHashCode * 31
- + ObjectInspectorUtils.hashCode(o, partitionObjectInspectors[i]);
+ Object[] bucketFieldValues = new Object[partitionEval.length];
+ for(int i = 0; i < partitionEval.length; i++) {
+ bucketFieldValues[i] = partitionEval[i].evaluate(row);
}
+ keyHashCode = ObjectInspectorUtils.getBucketHashCode(bucketFieldValues, partitionObjectInspectors);
}
int hashCode = buckNum < 0 ? keyHashCode : keyHashCode * 31 + buckNum;
if (isLogTraceEnabled) {
http://git-wip-us.apache.org/repos/asf/hive/blob/ba83fd7b/ql/src/java/org/apache/hadoop/hive/ql/io/DefaultHivePartitioner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/DefaultHivePartitioner.java b/ql/src/java/org/apache/hadoop/hive/ql/io/DefaultHivePartitioner.java
index 6a91cb8..6a14fb8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/DefaultHivePartitioner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/DefaultHivePartitioner.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.ql.io;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.mapred.lib.HashPartitioner;
/** Partition keys by their {@link Object#hashCode()}. */
@@ -26,7 +27,7 @@ public class DefaultHivePartitioner<K2, V2> extends HashPartitioner<K2, V2> impl
/** Use {@link Object#hashCode()} to partition. */
@Override
public int getBucket(K2 key, V2 value, int numBuckets) {
- return (key.hashCode() & Integer.MAX_VALUE) % numBuckets;
+ return ObjectInspectorUtils.getBucketNumber(key.hashCode(), numBuckets);
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/ba83fd7b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFHash.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFHash.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFHash.java
index 474f404..fd1fe92 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFHash.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFHash.java
@@ -18,7 +18,6 @@
package org.apache.hadoop.hive.ql.udf.generic;
-import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -45,13 +44,11 @@ public class GenericUDFHash extends GenericUDF {
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
- // See
- // http://java.sun.com/j2se/1.5.0/docs/api/java/util/List.html#hashCode()
- int r = 0;
- for (int i = 0; i < arguments.length; i++) {
- r = r * 31
- + ObjectInspectorUtils.hashCode(arguments[i].get(), argumentOIs[i]);
+ Object[] fieldValues = new Object[arguments.length];
+ for(int i = 0; i < arguments.length; i++) {
+ fieldValues[i] = arguments[i].get();
}
+ int r = ObjectInspectorUtils.getBucketHashCode(fieldValues, argumentOIs);
result.set(r);
return result;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/ba83fd7b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager.java b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager.java
index 8a53ec5..68c6542 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager.java
@@ -20,13 +20,15 @@ package org.apache.hadoop.hive.ql.lockmgr;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.metastore.api.ShowLocksResponse;import org.apache.hadoop.hive.metastore.api.ShowLocksResponseElement;import org.apache.hadoop.hive.metastore.txn.TxnDbUtil;
+import org.apache.hadoop.hive.metastore.api.ShowLocksResponse;import org.apache.hadoop.hive.metastore.api.ShowLocksResponseElement;
+import org.apache.hadoop.hive.metastore.txn.TxnDbUtil;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.QueryPlan;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
-import org.apache.hadoop.hive.ql.metadata.DummyPartition;import org.apache.hadoop.hive.ql.metadata.Partition;
+import org.apache.hadoop.hive.ql.metadata.DummyPartition;
+import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.txn.AcidHouseKeeperService;
http://git-wip-us.apache.org/repos/asf/hive/blob/ba83fd7b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
index 54ae48e..09e9108 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
@@ -502,18 +502,23 @@ public final class ObjectInspectorUtils {
* @return the bucket number
*/
public static int getBucketNumber(Object[] bucketFields, ObjectInspector[] bucketFieldInspectors, int totalBuckets) {
- int hashCode = getBucketHashCode(bucketFields, bucketFieldInspectors);
- int bucketID = (hashCode & Integer.MAX_VALUE) % totalBuckets;
- return bucketID;
+ return getBucketNumber(getBucketHashCode(bucketFields, bucketFieldInspectors), totalBuckets);
}
/**
+ * https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL+BucketedTables
+ * @param hashCode as produced by {@link #getBucketHashCode(Object[], ObjectInspector[])}
+ */
+ public static int getBucketNumber(int hashCode, int numberOfBuckets) {
+ return (hashCode & Integer.MAX_VALUE) % numberOfBuckets;
+ }
+ /**
* Computes the hash code for the given bucketed fields
* @param bucketFields
* @param bucketFieldInspectors
* @return
*/
- private static int getBucketHashCode(Object[] bucketFields, ObjectInspector[] bucketFieldInspectors) {
+ public static int getBucketHashCode(Object[] bucketFields, ObjectInspector[] bucketFieldInspectors) {
int hashCode = 0;
for (int i = 0; i < bucketFields.length; i++) {
int fieldHash = ObjectInspectorUtils.hashCode(bucketFields[i], bucketFieldInspectors[i]);
http://git-wip-us.apache.org/repos/asf/hive/blob/ba83fd7b/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestObjectInspectorUtils.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestObjectInspectorUtils.java b/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestObjectInspectorUtils.java
index ade0ef7..cf73b28 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestObjectInspectorUtils.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestObjectInspectorUtils.java
@@ -131,4 +131,29 @@ public class TestObjectInspectorUtils extends TestCase {
}
}
+ public void testBucketIdGeneration() {
+ ArrayList<String> fieldNames = new ArrayList<String>();
+ fieldNames.add("firstInteger");
+ fieldNames.add("secondString");
+ fieldNames.add("thirdBoolean");
+ ArrayList<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>();
+ fieldObjectInspectors
+ .add(PrimitiveObjectInspectorFactory.javaIntObjectInspector);
+ fieldObjectInspectors
+ .add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
+ fieldObjectInspectors
+ .add(PrimitiveObjectInspectorFactory.javaBooleanObjectInspector);
+
+ StandardStructObjectInspector soi1 = ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldObjectInspectors);
+ ArrayList<Object> struct = new ArrayList<Object>(3);
+ struct.add(1);
+ struct.add("two");
+ struct.add(true);
+
+ int hashCode = ObjectInspectorUtils.getBucketHashCode(struct.toArray(), fieldObjectInspectors.toArray(new ObjectInspector[fieldObjectInspectors.size()]));
+ assertEquals("", 3574518, hashCode);
+ int bucketId = ObjectInspectorUtils.getBucketNumber(struct.toArray(), fieldObjectInspectors.toArray(new ObjectInspector[fieldObjectInspectors.size()]), 16);
+ assertEquals("", 6, bucketId);
+ assertEquals("", bucketId, ObjectInspectorUtils.getBucketNumber(hashCode, 16));
+ }
}
[08/10] hive git commit: HIVE-12003 Hive Streaming API : Add check to
ensure table is transactional(Roshan Naik via Eugene Koifman)
Posted by se...@apache.org.
HIVE-12003 Hive Streaming API : Add check to ensure table is transactional(Roshan Naik via Eugene Koifman)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ec8c793c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ec8c793c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ec8c793c
Branch: refs/heads/llap
Commit: ec8c793c3bfc6edafada2329939553e5cd6cb0f3
Parents: ba83fd7
Author: Eugene Koifman <ek...@hortonworks.com>
Authored: Sat Oct 10 10:11:48 2015 -0700
Committer: Eugene Koifman <ek...@hortonworks.com>
Committed: Sat Oct 10 10:11:48 2015 -0700
----------------------------------------------------------------------
.../hive/hcatalog/streaming/HiveEndPoint.java | 21 ++++++++++
.../hive/hcatalog/streaming/InvalidTable.java | 8 ++++
.../hive/hcatalog/streaming/TestStreaming.java | 41 +++++++++++++++++++-
.../hive/ql/txn/compactor/TestCompactor.java | 13 ++++---
4 files changed, 76 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/ec8c793c/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/HiveEndPoint.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/HiveEndPoint.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/HiveEndPoint.java
index 7e99008..5de3f1d 100644
--- a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/HiveEndPoint.java
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/HiveEndPoint.java
@@ -49,6 +49,7 @@ import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
+import java.util.Map;
/**
* Information about the hive end point (i.e. table or partition) to write to.
@@ -272,11 +273,31 @@ public class HiveEndPoint {
}
this.secureMode = ugi==null ? false : ugi.hasKerberosCredentials();
this.msClient = getMetaStoreClient(endPoint, conf, secureMode);
+ checkEndPoint(endPoint, msClient);
if (createPart && !endPoint.partitionVals.isEmpty()) {
createPartitionIfNotExists(endPoint, msClient, conf);
}
}
+ private void checkEndPoint(HiveEndPoint endPoint, IMetaStoreClient msClient) throws InvalidTable {
+ // 1 - check if TBLPROPERTIES ('transactional'='true') is set on table
+ try {
+ Table t = msClient.getTable(endPoint.database, endPoint.table);
+ Map<String, String> params = t.getParameters();
+ if(params != null) {
+ String transactionalProp = params.get("transactional");
+ if (transactionalProp != null && transactionalProp.equalsIgnoreCase("true")) {
+ return;
+ }
+ }
+ LOG.error("'transactional' property is not set on Table " + endPoint);
+ throw new InvalidTable(endPoint.database, endPoint.table, "\'transactional\' property is not set on Table");
+ } catch (Exception e) {
+ LOG.warn("Unable to check if Table is transactional. " + endPoint, e);
+ throw new InvalidTable(endPoint.database, endPoint.table, e);
+ }
+ }
+
/**
* Close connection
*/
http://git-wip-us.apache.org/repos/asf/hive/blob/ec8c793c/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/InvalidTable.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/InvalidTable.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/InvalidTable.java
index 903c37e..98ef688 100644
--- a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/InvalidTable.java
+++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/InvalidTable.java
@@ -27,4 +27,12 @@ public class InvalidTable extends StreamingException {
public InvalidTable(String db, String table) {
super(makeMsg(db,table), null);
}
+
+ public InvalidTable(String db, String table, String msg) {
+ super(msg);
+ }
+
+ public InvalidTable(String db, String table, Exception inner) {
+ super(inner.getMessage(), inner);
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/ec8c793c/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java
index 340ab6c..d9a7eae 100644
--- a/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java
+++ b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java
@@ -388,6 +388,44 @@ public class TestStreaming {
}
+ @Test
+ public void testTableValidation() throws Exception {
+ int bucketCount = 100;
+
+ String dbUri = "raw://" + new Path(dbFolder.newFolder().toString()).toUri().toString();
+ String tbl1 = "validation1";
+ String tbl2 = "validation2";
+
+ String tableLoc = "'" + dbUri + Path.SEPARATOR + tbl1 + "'";
+ String tableLoc2 = "'" + dbUri + Path.SEPARATOR + tbl2 + "'";
+
+ runDDL(driver, "create database testBucketing3");
+ runDDL(driver, "use testBucketing3");
+
+ runDDL(driver, "create table " + tbl1 + " ( key1 string, data string ) clustered by ( key1 ) into "
+ + bucketCount + " buckets stored as orc location " + tableLoc) ;
+
+ runDDL(driver, "create table " + tbl2 + " ( key1 string, data string ) clustered by ( key1 ) into "
+ + bucketCount + " buckets stored as orc location " + tableLoc2 + " TBLPROPERTIES ('transactional'='false')") ;
+
+
+ try {
+ HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, "testBucketing3", "validation1", null);
+ endPt.newConnection(false);
+ Assert.assertTrue("InvalidTable exception was not thrown", false);
+ } catch (InvalidTable e) {
+ // expecting this exception
+ }
+ try {
+ HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, "testBucketing3", "validation2", null);
+ endPt.newConnection(false);
+ Assert.assertTrue("InvalidTable exception was not thrown", false);
+ } catch (InvalidTable e) {
+ // expecting this exception
+ }
+ }
+
+
private void checkDataWritten(Path partitionPath, long minTxn, long maxTxn, int buckets, int numExpectedFiles,
String... records) throws Exception {
ValidTxnList txns = msClient.getValidTxns();
@@ -1191,7 +1229,8 @@ public class TestStreaming {
" clustered by ( " + join(bucketCols, ",") + " )" +
" into " + bucketCount + " buckets " +
" stored as orc " +
- " location '" + tableLoc + "'";
+ " location '" + tableLoc + "'" +
+ " TBLPROPERTIES ('transactional'='true') ";
runDDL(driver, crtTbl);
if(partNames!=null && partNames.length!=0) {
return addPartition(driver, tableName, partVals, partNames);
http://git-wip-us.apache.org/repos/asf/hive/blob/ec8c793c/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
index abca1ce..e2910dd 100644
--- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
+++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
@@ -154,11 +154,12 @@ public class TestCompactor {
executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " +
" PARTITIONED BY(bkt INT)" +
" CLUSTERED BY(a) INTO 4 BUCKETS" + //currently ACID requires table to be bucketed
- " STORED AS ORC", driver);
+ " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
executeStatementOnDriver("CREATE EXTERNAL TABLE " + tblNameStg + "(a INT, b STRING)" +
" ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\t' LINES TERMINATED BY '\\n'" +
" STORED AS TEXTFILE" +
- " LOCATION '" + stagingFolder.newFolder().toURI().getPath() + "'", driver);
+ " LOCATION '" + stagingFolder.newFolder().toURI().getPath() + "'" +
+ " TBLPROPERTIES ('transactional'='true')", driver);
executeStatementOnDriver("load data local inpath '" + BASIC_FILE_NAME +
"' overwrite into table " + tblNameStg, driver);
@@ -411,7 +412,7 @@ public class TestCompactor {
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " +
" CLUSTERED BY(a) INTO 1 BUCKETS" + //currently ACID requires table to be bucketed
- " STORED AS ORC", driver);
+ " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
HiveEndPoint endPt = new HiveEndPoint(null, dbName, tblName, null);
DelimitedInputWriter writer = new DelimitedInputWriter(new String[] {"a","b"},",", endPt);
@@ -468,7 +469,7 @@ public class TestCompactor {
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " +
" CLUSTERED BY(a) INTO 1 BUCKETS" + //currently ACID requires table to be bucketed
- " STORED AS ORC", driver);
+ " STORED AS ORC TBLPROPERTIES ('transactional'='true') ", driver);
HiveEndPoint endPt = new HiveEndPoint(null, dbName, tblName, null);
DelimitedInputWriter writer = new DelimitedInputWriter(new String[] {"a","b"},",", endPt);
@@ -516,7 +517,7 @@ public class TestCompactor {
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " +
" CLUSTERED BY(a) INTO 1 BUCKETS" + //currently ACID requires table to be bucketed
- " STORED AS ORC", driver);
+ " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
HiveEndPoint endPt = new HiveEndPoint(null, dbName, tblName, null);
DelimitedInputWriter writer = new DelimitedInputWriter(new String[] {"a","b"},",", endPt);
@@ -576,7 +577,7 @@ public class TestCompactor {
executeStatementOnDriver("drop table if exists " + tblName, driver);
executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " +
" CLUSTERED BY(a) INTO 1 BUCKETS" + //currently ACID requires table to be bucketed
- " STORED AS ORC", driver);
+ " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
HiveEndPoint endPt = new HiveEndPoint(null, dbName, tblName, null);
DelimitedInputWriter writer = new DelimitedInputWriter(new String[] {"a","b"},",", endPt);
[10/10] hive git commit: HIVE-12096 : LLAP: merge master into branch
(Sergey Shelukhin)
Posted by se...@apache.org.
HIVE-12096 : LLAP: merge master into branch (Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4e53bfd1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4e53bfd1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4e53bfd1
Branch: refs/heads/llap
Commit: 4e53bfd10e1d6bd2a24d112d2af452ac49d8b16f
Parents: 1d9574a 09f5e84
Author: Sergey Shelukhin <se...@apache.org>
Authored: Mon Oct 12 10:47:39 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Mon Oct 12 10:47:39 2015 -0700
----------------------------------------------------------------------
.../apache/hive/hcatalog/data/JsonSerDe.java | 8 +-
.../hive/hcatalog/data/TestJsonSerDe.java | 36 ++++
.../hive/hcatalog/streaming/HiveEndPoint.java | 21 ++
.../hive/hcatalog/streaming/InvalidTable.java | 8 +
.../mutate/worker/BucketIdResolverImpl.java | 16 +-
.../hive/hcatalog/streaming/TestStreaming.java | 68 +++++-
.../mutate/worker/TestBucketIdResolverImpl.java | 2 +-
.../hive/ql/txn/compactor/TestCompactor.java | 13 +-
.../test/resources/testconfiguration.properties | 1 +
.../hadoop/hive/metastore/txn/TxnHandler.java | 4 +
.../hadoop/hive/ql/exec/FileSinkOperator.java | 9 +-
.../hadoop/hive/ql/exec/ReduceSinkOperator.java | 23 +-
.../ql/exec/spark/RemoteHiveSparkClient.java | 22 ++
.../hive/ql/io/DefaultHivePartitioner.java | 3 +-
.../hadoop/hive/ql/lockmgr/DbTxnManager.java | 34 ++-
.../ql/optimizer/calcite/HiveConfigContext.java | 37 ----
.../calcite/HiveHepPlannerContext.java | 37 ++++
.../calcite/HiveVolcanoPlannerContext.java | 37 ++++
.../calcite/cost/HiveVolcanoPlanner.java | 6 +-
.../calcite/rules/HivePreFilteringRule.java | 100 ++++++---
.../calcite/rules/HiveRulesRegistry.java | 44 ++++
.../hadoop/hive/ql/parse/CalcitePlanner.java | 11 +-
.../hive/ql/txn/compactor/CompactorMR.java | 8 +-
.../hive/ql/udf/generic/GenericUDFHash.java | 11 +-
.../hive/ql/lockmgr/TestDbTxnManager.java | 8 +-
.../calcite/TestCBORuleFiredOnlyOnce.java | 168 +++++++++++++++
.../clientpositive/filter_cond_pushdown.q | 5 +
.../test/queries/clientpositive/json_serde1.q | 36 ++++
.../clientpositive/skewjoin_onesideskew.q | 22 ++
.../clientpositive/filter_cond_pushdown.q.out | 80 +++++++
.../results/clientpositive/json_serde1.q.out | 113 ++++++++++
.../clientpositive/skewjoin_onesideskew.q.out | 212 +++++++++++++++++++
.../objectinspector/ObjectInspectorUtils.java | 13 +-
.../TestObjectInspectorUtils.java | 25 +++
.../apache/hive/spark/client/SparkClient.java | 5 +
.../hive/spark/client/SparkClientImpl.java | 5 +
.../org/apache/hive/spark/client/rpc/Rpc.java | 4 +
.../hive/ptest/execution/JIRAService.java | 96 +++++----
.../hive/ptest/execution/TestJIRAService.java | 89 +++++++-
...RAService.testErrorWithMessages.approved.txt | 20 ++
...ervice.testErrorWithoutMessages.approved.txt | 14 ++
.../TestJIRAService.testFailAdd.approved.txt | 21 ++
.../TestJIRAService.testFailNoAdd.approved.txt | 21 ++
.../TestJIRAService.testSuccessAdd.approved.txt | 16 ++
...estJIRAService.testSuccessNoAdd.approved.txt | 16 ++
.../resources/test-configuration.properties | 2 +
46 files changed, 1358 insertions(+), 192 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/4e53bfd1/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/4e53bfd1/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/4e53bfd1/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java
----------------------------------------------------------------------
[06/10] hive git commit: HIVE-11914 When transactions gets a
heartbeat, it doesn't update the lock heartbeat. (Eugene Koifman,
reviewed by Alan Gates)
Posted by se...@apache.org.
HIVE-11914 When transactions gets a heartbeat, it doesn't update the lock heartbeat. (Eugene Koifman, reviewed by Alan Gates)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6edb2c2f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6edb2c2f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6edb2c2f
Branch: refs/heads/llap
Commit: 6edb2c2f5b22ab84ee0e4150d0982f81c39a5ccc
Parents: 86f7af6
Author: Eugene Koifman <ek...@hortonworks.com>
Authored: Sat Oct 10 10:05:16 2015 -0700
Committer: Eugene Koifman <ek...@hortonworks.com>
Committed: Sat Oct 10 10:05:16 2015 -0700
----------------------------------------------------------------------
.../hive/hcatalog/streaming/TestStreaming.java | 27 ++++++++++++++++
.../hadoop/hive/metastore/txn/TxnHandler.java | 4 +++
.../hadoop/hive/ql/lockmgr/DbTxnManager.java | 34 +++++++++++++-------
.../hive/ql/txn/compactor/CompactorMR.java | 8 ++---
.../hive/ql/lockmgr/TestDbTxnManager.java | 2 +-
5 files changed, 59 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/6edb2c2f/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java
----------------------------------------------------------------------
diff --git a/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java
index 2f6baec..340ab6c 100644
--- a/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java
+++ b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java
@@ -31,6 +31,8 @@ import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hadoop.hive.metastore.api.ShowLocksResponse;
+import org.apache.hadoop.hive.metastore.api.ShowLocksResponseElement;
import org.apache.hadoop.hive.metastore.api.TxnAbortedException;
import org.apache.hadoop.hive.metastore.txn.TxnDbUtil;
import org.apache.hadoop.hive.ql.CommandNeedRetryException;
@@ -553,6 +555,31 @@ public class TestStreaming {
txnBatch.close();
connection.close();
}
+
+ @Test
+ public void testHearbeat() throws Exception {
+ HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null);
+ DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames2,",", endPt);
+ StreamingConnection connection = endPt.newConnection(false, null);
+
+ TransactionBatch txnBatch = connection.fetchTransactionBatch(5, writer);
+ txnBatch.beginNextTransaction();
+ //todo: this should ideally check Transaction heartbeat as well, but heartbeat
+ //timestamp is not reported yet
+ //GetOpenTxnsInfoResponse txnresp = msClient.showTxns();
+ ShowLocksResponse response = msClient.showLocks();
+ Assert.assertEquals("Wrong nubmer of locks: " + response, 1, response.getLocks().size());
+ ShowLocksResponseElement lock = response.getLocks().get(0);
+ long acquiredAt = lock.getAcquiredat();
+ long heartbeatAt = lock.getAcquiredat();
+ txnBatch.heartbeat();
+ response = msClient.showLocks();
+ Assert.assertEquals("Wrong number of locks2: " + response, 1, response.getLocks().size());
+ lock = response.getLocks().get(0);
+ Assert.assertEquals("Acquired timestamp didn't match", acquiredAt, lock.getAcquiredat());
+ Assert.assertTrue("Expected new heartbeat (" + lock.getLastheartbeat() +
+ ") > old heartbeat(" + heartbeatAt +")", lock.getLastheartbeat() > heartbeatAt);
+ }
@Test
public void testTransactionBatchEmptyAbort() throws Exception {
// 1) to partitioned table
http://git-wip-us.apache.org/repos/asf/hive/blob/6edb2c2f/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
index 6218a03..ca485fa 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
@@ -1742,6 +1742,10 @@ public class TxnHandler {
dbConn.rollback();
throw new NoSuchTxnException("No such txn: " + txnid);
}
+ //update locks for this txn to the same heartbeat
+ s = "update HIVE_LOCKS set hl_last_heartbeat = " + now + " where hl_txnid = " + txnid;
+ LOG.debug("Going to execute update <" + s + ">");
+ stmt.executeUpdate(s);
LOG.debug("Going to commit");
dbConn.commit();
} finally {
http://git-wip-us.apache.org/repos/asf/hive/blob/6edb2c2f/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java
index 39b44e8..219a54a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java
@@ -293,17 +293,28 @@ public class DbTxnManager extends HiveTxnManagerImpl {
@Override
public void heartbeat() throws LockException {
- LOG.debug("Heartbeating lock and transaction " + JavaUtils.txnIdToString(txnId));
- List<HiveLock> locks = lockMgr.getLocks(false, false);
- if (locks.size() == 0) {
- if (!isTxnOpen()) {
- // No locks, no txn, we outta here.
- return;
- } else {
- // Create one dummy lock so we can go through the loop below
- DbLockManager.DbHiveLock dummyLock = new DbLockManager.DbHiveLock(0L);
- locks.add(dummyLock);
+ List<HiveLock> locks;
+ if(isTxnOpen()) {
+ // Create one dummy lock so we can go through the loop below, though we only
+ //really need txnId
+ DbLockManager.DbHiveLock dummyLock = new DbLockManager.DbHiveLock(0L);
+ locks = new ArrayList<>(1);
+ locks.add(dummyLock);
+ }
+ else {
+ locks = lockMgr.getLocks(false, false);
+ }
+ if(LOG.isInfoEnabled()) {
+ StringBuilder sb = new StringBuilder("Sending heartbeat for ")
+ .append(JavaUtils.txnIdToString(txnId)).append(" and");
+ for(HiveLock lock : locks) {
+ sb.append(" ").append(lock.toString());
}
+ LOG.info(sb.toString());
+ }
+ if(!isTxnOpen() && locks.isEmpty()) {
+ // No locks, no txn, we outta here.
+ return;
}
for (HiveLock lock : locks) {
long lockId = ((DbLockManager.DbHiveLock)lock).lockId;
@@ -320,7 +331,8 @@ public class DbTxnManager extends HiveTxnManagerImpl {
throw new LockException(e, ErrorMsg.TXN_ABORTED, JavaUtils.txnIdToString(txnId));
} catch (TException e) {
throw new LockException(
- ErrorMsg.METASTORE_COMMUNICATION_FAILED.getMsg(), e);
+ ErrorMsg.METASTORE_COMMUNICATION_FAILED.getMsg() + "(" + JavaUtils.txnIdToString(txnId)
+ + "," + lock.toString() + ")", e);
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/6edb2c2f/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
index 3ee9346..391f99a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
@@ -402,10 +402,10 @@ public class CompactorMR {
dir.getName().startsWith(AcidUtils.DELTA_PREFIX)) {
boolean sawBase = dir.getName().startsWith(AcidUtils.BASE_PREFIX);
FileStatus[] files = fs.listStatus(dir, AcidUtils.bucketFileFilter);
- for (int j = 0; j < files.length; j++) {
+ for(FileStatus f : files) {
// For each file, figure out which bucket it is.
- Matcher matcher = AcidUtils.BUCKET_DIGIT_PATTERN.matcher(files[j].getPath().getName());
- addFileToMap(matcher, files[j].getPath(), sawBase, splitToBucketMap);
+ Matcher matcher = AcidUtils.BUCKET_DIGIT_PATTERN.matcher(f.getPath().getName());
+ addFileToMap(matcher, f.getPath(), sawBase, splitToBucketMap);
}
} else {
// Legacy file, see if it's a bucket file
@@ -434,7 +434,7 @@ public class CompactorMR {
Map<Integer, BucketTracker> splitToBucketMap) {
if (!matcher.find()) {
LOG.warn("Found a non-bucket file that we thought matched the bucket pattern! " +
- file.toString());
+ file.toString() + " Matcher=" + matcher.toString());
}
int bucketNum = Integer.valueOf(matcher.group());
BucketTracker bt = splitToBucketMap.get(bucketNum);
http://git-wip-us.apache.org/repos/asf/hive/blob/6edb2c2f/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager.java b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager.java
index db119e1..8a53ec5 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager.java
@@ -236,7 +236,7 @@ public class TestDbTxnManager {
exception = ex;
}
Assert.assertNotNull("Expected exception3", exception);
- Assert.assertEquals("Wrong Exception3", ErrorMsg.LOCK_NO_SUCH_LOCK, exception.getCanonicalErrorMsg());
+ Assert.assertEquals("Wrong Exception3", ErrorMsg.TXN_ABORTED, exception.getCanonicalErrorMsg());
}
@Test
[03/10] hive git commit: HIVE-12012: select query on json table with
map containing numeric values fails (Jason Dere, reviewed by Sushanth Sowmyan)
Posted by se...@apache.org.
HIVE-12012: select query on json table with map containing numeric values fails (Jason Dere, reviewed by Sushanth Sowmyan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cc2adc73
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cc2adc73
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cc2adc73
Branch: refs/heads/llap
Commit: cc2adc732790338cdc198e7c45d463a3ee6b0e4f
Parents: be05e32
Author: Jason Dere <jd...@hortonworks.com>
Authored: Fri Oct 9 09:59:35 2015 -0700
Committer: Jason Dere <jd...@hortonworks.com>
Committed: Fri Oct 9 09:59:35 2015 -0700
----------------------------------------------------------------------
.../apache/hive/hcatalog/data/JsonSerDe.java | 8 +-
.../hive/hcatalog/data/TestJsonSerDe.java | 36 ++++++
.../test/queries/clientpositive/json_serde1.q | 36 ++++++
.../results/clientpositive/json_serde1.q.out | 113 +++++++++++++++++++
4 files changed, 186 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/cc2adc73/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/JsonSerDe.java
----------------------------------------------------------------------
diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/JsonSerDe.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/JsonSerDe.java
index 9b325b6..1b47b28 100644
--- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/JsonSerDe.java
+++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/JsonSerDe.java
@@ -349,13 +349,7 @@ public class JsonSerDe implements SerDe {
HCatFieldSchema valueSchema = hcatFieldSchema.getMapValueSchema().get(0);
while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) {
Object k = getObjectOfCorrespondingPrimitiveType(p.getCurrentName(), hcatFieldSchema.getMapKeyTypeInfo());
- Object v;
- if (valueSchema.getType() == HCatFieldSchema.Type.STRUCT) {
- v = extractCurrentField(p, valueSchema, false);
- } else {
- v = extractCurrentField(p, valueSchema, true);
- }
-
+ Object v = extractCurrentField(p, valueSchema, false);
map.put(k, v);
}
val = map;
http://git-wip-us.apache.org/repos/asf/hive/blob/cc2adc73/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestJsonSerDe.java
----------------------------------------------------------------------
diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestJsonSerDe.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestJsonSerDe.java
index 618f39b..5ececb5 100644
--- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestJsonSerDe.java
+++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestJsonSerDe.java
@@ -307,4 +307,40 @@ public class TestJsonSerDe extends TestCase {
assertTrue(HCatDataCheckUtil.recordsEqual((HCatRecord)rjsd.deserialize(text2), expected2));
}
+
+ private static HashMap<String, Integer> createHashMapStringInteger(Object...vals) {
+ assertTrue(vals.length % 2 == 0);
+ HashMap<String, Integer> retval = new HashMap<String, Integer>();
+ for (int idx = 0; idx < vals.length; idx += 2) {
+ retval.put((String) vals[idx], (Integer) vals[idx+1]);
+ }
+ return retval;
+ }
+
+ public void testMapValues() throws Exception {
+ Configuration conf = new Configuration();
+ Properties props = new Properties();
+
+ props.put(serdeConstants.LIST_COLUMNS, "a,b");
+ props.put(serdeConstants.LIST_COLUMN_TYPES, "array<string>,map<string,int>");
+ JsonSerDe rjsd = new JsonSerDe();
+ SerDeUtils.initializeSerDe(rjsd, conf, props, null);
+
+ Text text1 = new Text("{ \"a\":[\"aaa\"],\"b\":{\"bbb\":1}} ");
+ Text text2 = new Text("{\"a\":[\"yyy\"],\"b\":{\"zzz\":123}}");
+ Text text3 = new Text("{\"a\":[\"a\"],\"b\":{\"x\":11, \"y\": 22, \"z\": null}}");
+
+ HCatRecord expected1 = new DefaultHCatRecord(Arrays.<Object>asList(
+ Arrays.<String>asList("aaa"),
+ createHashMapStringInteger("bbb", 1)));
+ HCatRecord expected2 = new DefaultHCatRecord(Arrays.<Object>asList(
+ Arrays.<String>asList("yyy"),
+ createHashMapStringInteger("zzz", 123)));
+ HCatRecord expected3 = new DefaultHCatRecord(Arrays.<Object>asList(
+ Arrays.<String>asList("a"),
+ createHashMapStringInteger("x", 11, "y", 22, "z", null)));
+
+ assertTrue(HCatDataCheckUtil.recordsEqual((HCatRecord)rjsd.deserialize(text1), expected1));
+ assertTrue(HCatDataCheckUtil.recordsEqual((HCatRecord)rjsd.deserialize(text2), expected2));
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/cc2adc73/ql/src/test/queries/clientpositive/json_serde1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/json_serde1.q b/ql/src/test/queries/clientpositive/json_serde1.q
new file mode 100644
index 0000000..85f5af2
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/json_serde1.q
@@ -0,0 +1,36 @@
+
+add jar ${system:maven.local.repository}/org/apache/hive/hcatalog/hive-hcatalog-core/${system:hive.version}/hive-hcatalog-core-${system:hive.version}.jar;
+
+drop table if exists json_serde1_1;
+drop table if exists json_serde1_2;
+
+create table json_serde1_1 (a array<string>,b map<string,int>)
+ row format serde 'org.apache.hive.hcatalog.data.JsonSerDe';
+
+insert into table json_serde1_1
+ select array('aaa'),map('aaa',1) from src limit 2;
+
+select * from json_serde1_1;
+
+create table json_serde1_2 (
+ a array<int>,
+ b map<int,date>,
+ c struct<c1:int, c2:string, c3:array<string>, c4:map<string, int>, c5:struct<c5_1:string, c5_2:int>>
+) row format serde 'org.apache.hive.hcatalog.data.JsonSerDe';
+
+insert into table json_serde1_2
+ select
+ array(3, 2, 1),
+ map(1, date '2001-01-01', 2, null),
+ named_struct(
+ 'c1', 123456,
+ 'c2', 'hello',
+ 'c3', array('aa', 'bb', 'cc'),
+ 'c4', map('abc', 123, 'xyz', 456),
+ 'c5', named_struct('c5_1', 'bye', 'c5_2', 88))
+ from src limit 2;
+
+select * from json_serde1_2;
+
+drop table json_serde1_1;
+drop table json_serde1_2;
http://git-wip-us.apache.org/repos/asf/hive/blob/cc2adc73/ql/src/test/results/clientpositive/json_serde1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/json_serde1.q.out b/ql/src/test/results/clientpositive/json_serde1.q.out
new file mode 100644
index 0000000..6235aff
--- /dev/null
+++ b/ql/src/test/results/clientpositive/json_serde1.q.out
@@ -0,0 +1,113 @@
+PREHOOK: query: drop table if exists json_serde1_1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists json_serde1_1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists json_serde1_2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists json_serde1_2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table json_serde1_1 (a array<string>,b map<string,int>)
+ row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@json_serde1_1
+POSTHOOK: query: create table json_serde1_1 (a array<string>,b map<string,int>)
+ row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@json_serde1_1
+PREHOOK: query: insert into table json_serde1_1
+ select array('aaa'),map('aaa',1) from src limit 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@json_serde1_1
+POSTHOOK: query: insert into table json_serde1_1
+ select array('aaa'),map('aaa',1) from src limit 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@json_serde1_1
+POSTHOOK: Lineage: json_serde1_1.a EXPRESSION []
+POSTHOOK: Lineage: json_serde1_1.b EXPRESSION []
+PREHOOK: query: select * from json_serde1_1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@json_serde1_1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from json_serde1_1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@json_serde1_1
+#### A masked pattern was here ####
+["aaa"] {"aaa":1}
+["aaa"] {"aaa":1}
+PREHOOK: query: create table json_serde1_2 (
+ a array<int>,
+ b map<int,date>,
+ c struct<c1:int, c2:string, c3:array<string>, c4:map<string, int>, c5:struct<c5_1:string, c5_2:int>>
+) row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@json_serde1_2
+POSTHOOK: query: create table json_serde1_2 (
+ a array<int>,
+ b map<int,date>,
+ c struct<c1:int, c2:string, c3:array<string>, c4:map<string, int>, c5:struct<c5_1:string, c5_2:int>>
+) row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@json_serde1_2
+PREHOOK: query: insert into table json_serde1_2
+ select
+ array(3, 2, 1),
+ map(1, date '2001-01-01', 2, null),
+ named_struct(
+ 'c1', 123456,
+ 'c2', 'hello',
+ 'c3', array('aa', 'bb', 'cc'),
+ 'c4', map('abc', 123, 'xyz', 456),
+ 'c5', named_struct('c5_1', 'bye', 'c5_2', 88))
+ from src limit 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@json_serde1_2
+POSTHOOK: query: insert into table json_serde1_2
+ select
+ array(3, 2, 1),
+ map(1, date '2001-01-01', 2, null),
+ named_struct(
+ 'c1', 123456,
+ 'c2', 'hello',
+ 'c3', array('aa', 'bb', 'cc'),
+ 'c4', map('abc', 123, 'xyz', 456),
+ 'c5', named_struct('c5_1', 'bye', 'c5_2', 88))
+ from src limit 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@json_serde1_2
+POSTHOOK: Lineage: json_serde1_2.a EXPRESSION []
+POSTHOOK: Lineage: json_serde1_2.b EXPRESSION []
+POSTHOOK: Lineage: json_serde1_2.c EXPRESSION []
+PREHOOK: query: select * from json_serde1_2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@json_serde1_2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from json_serde1_2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@json_serde1_2
+#### A masked pattern was here ####
+[3,2,1] {1:"2001-01-01",2:null} {"c1":123456,"c2":"hello","c3":["aa","bb","cc"],"c4":{"xyz":456,"abc":123},"c5":{"c5_1":"bye","c5_2":88}}
+[3,2,1] {1:"2001-01-01",2:null} {"c1":123456,"c2":"hello","c3":["aa","bb","cc"],"c4":{"xyz":456,"abc":123},"c5":{"c5_1":"bye","c5_2":88}}
+PREHOOK: query: drop table json_serde1_1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@json_serde1_1
+PREHOOK: Output: default@json_serde1_1
+POSTHOOK: query: drop table json_serde1_1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@json_serde1_1
+POSTHOOK: Output: default@json_serde1_1
+PREHOOK: query: drop table json_serde1_2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@json_serde1_2
+PREHOOK: Output: default@json_serde1_2
+POSTHOOK: query: drop table json_serde1_2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@json_serde1_2
+POSTHOOK: Output: default@json_serde1_2