You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by px...@apache.org on 2016/07/22 18:26:24 UTC
hive git commit: HIVE-14291: count(*) on a table written by
hcatstorer returns incorrect result (Pengcheng Xiong,
reviewed by Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master b7f3d3a04 -> 64a053b69
HIVE-14291: count(*) on a table written by hcatstorer returns incorrect result (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/64a053b6
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/64a053b6
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/64a053b6
Branch: refs/heads/master
Commit: 64a053b69c982ea312654b14d002f1e523603e52
Parents: b7f3d3a
Author: Pengcheng Xiong <px...@apache.org>
Authored: Fri Jul 22 11:26:00 2016 -0700
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Fri Jul 22 11:26:00 2016 -0700
----------------------------------------------------------------------
.../mapreduce/FileOutputCommitterContainer.java | 64 ++++++++++----------
.../hive/hcatalog/mapreduce/HCatBaseTest.java | 1 +
.../hcatalog/mapreduce/HCatMapReduceTest.java | 4 +-
.../mapreduce/TestHCatDynamicPartitioned.java | 20 ++++++
.../mapreduce/TestHCatNonPartitioned.java | 21 ++++++-
5 files changed, 74 insertions(+), 36 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/64a053b6/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java
----------------------------------------------------------------------
diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java
index 9db3dc1..14f9c41 100644
--- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java
+++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java
@@ -34,6 +34,7 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hive.common.FileUtils;
+import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
@@ -443,7 +444,9 @@ class FileOutputCommitterContainer extends OutputCommitterContainer {
//Copy table level hcat.* keys to the partition
for (Entry<Object, Object> entry : storer.getProperties().entrySet()) {
- params.put(entry.getKey().toString(), entry.getValue().toString());
+ if (!entry.getKey().toString().equals(StatsSetupConst.COLUMN_STATS_ACCURATE)) {
+ params.put(entry.getKey().toString(), entry.getValue().toString());
+ }
}
return params;
}
@@ -763,49 +766,46 @@ class FileOutputCommitterContainer extends OutputCommitterContainer {
Table table = new Table(jobInfo.getTableInfo().getTable());
Path tblPath = new Path(table.getTTable().getSd().getLocation());
FileSystem fs = tblPath.getFileSystem(conf);
-
- if( table.getPartitionKeys().size() == 0 ) {
- //Move data from temp directory the actual table directory
- //No metastore operation required.
- Path src = new Path(jobInfo.getLocation());
- moveTaskOutputs(fs, src, src, tblPath, false, table.isImmutable());
- if (!src.equals(tblPath)){
- fs.delete(src, true);
- }
- return;
- }
-
IMetaStoreClient client = null;
HCatTableInfo tableInfo = jobInfo.getTableInfo();
List<Partition> partitionsAdded = new ArrayList<Partition>();
try {
HiveConf hiveConf = HCatUtil.getHiveConf(conf);
client = HCatUtil.getHiveMetastoreClient(hiveConf);
- StorerInfo storer = InternalUtil.extractStorerInfo(table.getTTable().getSd(),table.getParameters());
+ if (table.getPartitionKeys().size() == 0) {
+ // Move data from temp directory the actual table directory
+ // No metastore operation required.
+ Path src = new Path(jobInfo.getLocation());
+ moveTaskOutputs(fs, src, src, tblPath, false, table.isImmutable());
+ if (!src.equals(tblPath)) {
+ fs.delete(src, true);
+ }
+ if (table.getParameters() != null
+ && table.getParameters().containsKey(StatsSetupConst.COLUMN_STATS_ACCURATE)) {
+ table.getParameters().remove(StatsSetupConst.COLUMN_STATS_ACCURATE);
+ client.alter_table(table.getDbName(), table.getTableName(), table.getTTable());
+ }
+ return;
+ }
+
+ StorerInfo storer = InternalUtil.extractStorerInfo(table.getTTable().getSd(),
+ table.getParameters());
FileStatus tblStat = fs.getFileStatus(tblPath);
String grpName = tblStat.getGroup();
FsPermission perms = tblStat.getPermission();
List<Partition> partitionsToAdd = new ArrayList<Partition>();
- if (!dynamicPartitioningUsed){
- partitionsToAdd.add(
- constructPartition(
- context,jobInfo,
- tblPath.toString(), null, jobInfo.getPartitionValues()
- ,jobInfo.getOutputSchema(), getStorerParameterMap(storer)
- ,table, fs
- ,grpName,perms));
- }else{
- for (Entry<String,Map<String,String>> entry : partitionsDiscoveredByPath.entrySet()){
- partitionsToAdd.add(
- constructPartition(
- context,jobInfo,
- getPartitionRootLocation(entry.getKey(),entry.getValue().size())
- ,entry.getKey(), entry.getValue()
- ,jobInfo.getOutputSchema(), getStorerParameterMap(storer)
- ,table, fs
- ,grpName,perms));
+ if (!dynamicPartitioningUsed) {
+ partitionsToAdd.add(constructPartition(context, jobInfo, tblPath.toString(), null,
+ jobInfo.getPartitionValues(), jobInfo.getOutputSchema(), getStorerParameterMap(storer),
+ table, fs, grpName, perms));
+ } else {
+ for (Entry<String, Map<String, String>> entry : partitionsDiscoveredByPath.entrySet()) {
+ partitionsToAdd.add(constructPartition(context, jobInfo,
+ getPartitionRootLocation(entry.getKey(), entry.getValue().size()), entry.getKey(),
+ entry.getValue(), jobInfo.getOutputSchema(), getStorerParameterMap(storer), table,
+ fs, grpName, perms));
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/64a053b6/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatBaseTest.java
----------------------------------------------------------------------
diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatBaseTest.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatBaseTest.java
index 823e9a9..506d3f4 100644
--- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatBaseTest.java
+++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatBaseTest.java
@@ -84,6 +84,7 @@ public class HCatBaseTest {
hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false);
hiveConf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, TEST_WAREHOUSE_DIR);
hiveConf.setVar(HiveConf.ConfVars.HIVEMAPREDMODE, "nonstrict");
+ hiveConf.setBoolVar(HiveConf.ConfVars.HIVEOPTIMIZEMETADATAQUERIES, true);
hiveConf
.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER,
"org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory");
http://git-wip-us.apache.org/repos/asf/hive/blob/64a053b6/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java
----------------------------------------------------------------------
diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java
index f437079..deee3a0 100644
--- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java
+++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java
@@ -34,6 +34,7 @@ import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.TableType;
@@ -55,7 +56,6 @@ import org.apache.hadoop.mapreduce.JobStatus;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
-
import org.apache.hive.hcatalog.common.HCatConstants;
import org.apache.hive.hcatalog.common.HCatUtil;
import org.apache.hive.hcatalog.data.DefaultHCatRecord;
@@ -71,7 +71,6 @@ import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
-
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -217,6 +216,7 @@ public abstract class HCatMapReduceTest extends HCatBaseTest {
if (isTableImmutable()){
tableParams.put(hive_metastoreConstants.IS_IMMUTABLE,"true");
}
+ StatsSetupConst.setBasicStatsState(tableParams, StatsSetupConst.TRUE);
tbl.setParameters(tableParams);
client.createTable(tbl);
http://git-wip-us.apache.org/repos/asf/hive/blob/64a053b6/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatDynamicPartitioned.java
----------------------------------------------------------------------
diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatDynamicPartitioned.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatDynamicPartitioned.java
index 0d87c6c..9573098 100644
--- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatDynamicPartitioned.java
+++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatDynamicPartitioned.java
@@ -185,6 +185,26 @@ public class TestHCatDynamicPartitioned extends HCatMapReduceTest {
res = new ArrayList<String>();
driver.getResults(res);
assertEquals(NUM_RECORDS, res.size());
+
+ query = "select count(*) from " + tableName;
+ retCode = driver.run(query).getResponseCode();
+ if (retCode != 0) {
+ throw new Exception("Error " + retCode + " running query " + query);
+ }
+ res = new ArrayList<String>();
+ driver.getResults(res);
+ assertEquals(1, res.size());
+ assertEquals("20", res.get(0));
+
+ query = "select count(*) from " + tableName + " where p1=1";
+ retCode = driver.run(query).getResponseCode();
+ if (retCode != 0) {
+ throw new Exception("Error " + retCode + " running query " + query);
+ }
+ res = new ArrayList<String>();
+ driver.getResults(res);
+ assertEquals(1, res.size());
+ assertEquals("4", res.get(0));
}
//TODO 1.0 miniCluster is slow this test times out, make it work
http://git-wip-us.apache.org/repos/asf/hive/blob/64a053b6/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatNonPartitioned.java
----------------------------------------------------------------------
diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatNonPartitioned.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatNonPartitioned.java
index 174a92f..a73516c 100644
--- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatNonPartitioned.java
+++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatNonPartitioned.java
@@ -141,10 +141,27 @@ public class TestHCatNonPartitioned extends HCatMapReduceTest {
ArrayList<String> res = new ArrayList<String>();
driver.getResults(res);
- if (isTableImmutable()){
+ if (isTableImmutable()) {
assertEquals(10, res.size());
- }else {
+ } else {
assertEquals(30, res.size());
}
+
+ query = "select count(*) from " + tableName;
+ retCode = driver.run(query).getResponseCode();
+
+ if (retCode != 0) {
+ throw new Exception("Error " + retCode + " running query " + query);
+ }
+
+ res = new ArrayList<String>();
+ driver.getResults(res);
+ if (isTableImmutable()) {
+ assertEquals(1, res.size());
+ assertEquals("10", res.get(0));
+ } else {
+ assertEquals(1, res.size());
+ assertEquals("30", res.get(0));
+ }
}
}