You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by px...@apache.org on 2016/07/22 18:26:24 UTC

hive git commit: HIVE-14291: count(*) on a table written by hcatstorer returns incorrect result (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/master b7f3d3a04 -> 64a053b69


HIVE-14291: count(*) on a table written by hcatstorer returns incorrect result (Pengcheng Xiong, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/64a053b6
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/64a053b6
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/64a053b6

Branch: refs/heads/master
Commit: 64a053b69c982ea312654b14d002f1e523603e52
Parents: b7f3d3a
Author: Pengcheng Xiong <px...@apache.org>
Authored: Fri Jul 22 11:26:00 2016 -0700
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Fri Jul 22 11:26:00 2016 -0700

----------------------------------------------------------------------
 .../mapreduce/FileOutputCommitterContainer.java | 64 ++++++++++----------
 .../hive/hcatalog/mapreduce/HCatBaseTest.java   |  1 +
 .../hcatalog/mapreduce/HCatMapReduceTest.java   |  4 +-
 .../mapreduce/TestHCatDynamicPartitioned.java   | 20 ++++++
 .../mapreduce/TestHCatNonPartitioned.java       | 21 ++++++-
 5 files changed, 74 insertions(+), 36 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/64a053b6/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java
----------------------------------------------------------------------
diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java
index 9db3dc1..14f9c41 100644
--- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java
+++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java
@@ -34,6 +34,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hive.common.FileUtils;
+import org.apache.hadoop.hive.common.StatsSetupConst;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.MetaStoreUtils;
@@ -443,7 +444,9 @@ class FileOutputCommitterContainer extends OutputCommitterContainer {
 
     //Copy table level hcat.* keys to the partition
     for (Entry<Object, Object> entry : storer.getProperties().entrySet()) {
-      params.put(entry.getKey().toString(), entry.getValue().toString());
+      if (!entry.getKey().toString().equals(StatsSetupConst.COLUMN_STATS_ACCURATE)) {
+        params.put(entry.getKey().toString(), entry.getValue().toString());
+      }
     }
     return params;
   }
@@ -763,49 +766,46 @@ class FileOutputCommitterContainer extends OutputCommitterContainer {
     Table table = new Table(jobInfo.getTableInfo().getTable());
     Path tblPath = new Path(table.getTTable().getSd().getLocation());
     FileSystem fs = tblPath.getFileSystem(conf);
-
-    if( table.getPartitionKeys().size() == 0 ) {
-      //Move data from temp directory the actual table directory
-      //No metastore operation required.
-      Path src = new Path(jobInfo.getLocation());
-      moveTaskOutputs(fs, src, src, tblPath, false, table.isImmutable());
-      if (!src.equals(tblPath)){
-        fs.delete(src, true);
-      }
-      return;
-    }
-
     IMetaStoreClient client = null;
     HCatTableInfo tableInfo = jobInfo.getTableInfo();
     List<Partition> partitionsAdded = new ArrayList<Partition>();
     try {
       HiveConf hiveConf = HCatUtil.getHiveConf(conf);
       client = HCatUtil.getHiveMetastoreClient(hiveConf);
-      StorerInfo storer = InternalUtil.extractStorerInfo(table.getTTable().getSd(),table.getParameters());
+      if (table.getPartitionKeys().size() == 0) {
+        // Move data from temp directory the actual table directory
+        // No metastore operation required.
+        Path src = new Path(jobInfo.getLocation());
+        moveTaskOutputs(fs, src, src, tblPath, false, table.isImmutable());
+        if (!src.equals(tblPath)) {
+          fs.delete(src, true);
+        }
+        if (table.getParameters() != null
+            && table.getParameters().containsKey(StatsSetupConst.COLUMN_STATS_ACCURATE)) {
+          table.getParameters().remove(StatsSetupConst.COLUMN_STATS_ACCURATE);
+          client.alter_table(table.getDbName(), table.getTableName(), table.getTTable());
+        }
+        return;
+      }
+
+      StorerInfo storer = InternalUtil.extractStorerInfo(table.getTTable().getSd(),
+          table.getParameters());
 
       FileStatus tblStat = fs.getFileStatus(tblPath);
       String grpName = tblStat.getGroup();
       FsPermission perms = tblStat.getPermission();
 
       List<Partition> partitionsToAdd = new ArrayList<Partition>();
-      if (!dynamicPartitioningUsed){
-        partitionsToAdd.add(
-            constructPartition(
-                context,jobInfo,
-                tblPath.toString(), null, jobInfo.getPartitionValues()
-                ,jobInfo.getOutputSchema(), getStorerParameterMap(storer)
-                ,table, fs
-                ,grpName,perms));
-      }else{
-        for (Entry<String,Map<String,String>> entry : partitionsDiscoveredByPath.entrySet()){
-          partitionsToAdd.add(
-              constructPartition(
-                  context,jobInfo,
-                  getPartitionRootLocation(entry.getKey(),entry.getValue().size())
-                  ,entry.getKey(), entry.getValue()
-                  ,jobInfo.getOutputSchema(), getStorerParameterMap(storer)
-                  ,table, fs
-                  ,grpName,perms));
+      if (!dynamicPartitioningUsed) {
+        partitionsToAdd.add(constructPartition(context, jobInfo, tblPath.toString(), null,
+            jobInfo.getPartitionValues(), jobInfo.getOutputSchema(), getStorerParameterMap(storer),
+            table, fs, grpName, perms));
+      } else {
+        for (Entry<String, Map<String, String>> entry : partitionsDiscoveredByPath.entrySet()) {
+          partitionsToAdd.add(constructPartition(context, jobInfo,
+              getPartitionRootLocation(entry.getKey(), entry.getValue().size()), entry.getKey(),
+              entry.getValue(), jobInfo.getOutputSchema(), getStorerParameterMap(storer), table,
+              fs, grpName, perms));
         }
       }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/64a053b6/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatBaseTest.java
----------------------------------------------------------------------
diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatBaseTest.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatBaseTest.java
index 823e9a9..506d3f4 100644
--- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatBaseTest.java
+++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatBaseTest.java
@@ -84,6 +84,7 @@ public class HCatBaseTest {
     hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false);
     hiveConf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, TEST_WAREHOUSE_DIR);
     hiveConf.setVar(HiveConf.ConfVars.HIVEMAPREDMODE, "nonstrict");
+    hiveConf.setBoolVar(HiveConf.ConfVars.HIVEOPTIMIZEMETADATAQUERIES, true);
     hiveConf
     .setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER,
         "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory");

http://git-wip-us.apache.org/repos/asf/hive/blob/64a053b6/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java
----------------------------------------------------------------------
diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java
index f437079..deee3a0 100644
--- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java
+++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java
@@ -34,6 +34,7 @@ import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.LocalFileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.StatsSetupConst;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.MetaStoreUtils;
 import org.apache.hadoop.hive.metastore.TableType;
@@ -55,7 +56,6 @@ import org.apache.hadoop.mapreduce.JobStatus;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
-
 import org.apache.hive.hcatalog.common.HCatConstants;
 import org.apache.hive.hcatalog.common.HCatUtil;
 import org.apache.hive.hcatalog.data.DefaultHCatRecord;
@@ -71,7 +71,6 @@ import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
-
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -217,6 +216,7 @@ public abstract class HCatMapReduceTest extends HCatBaseTest {
     if (isTableImmutable()){
       tableParams.put(hive_metastoreConstants.IS_IMMUTABLE,"true");
     }
+    StatsSetupConst.setBasicStatsState(tableParams, StatsSetupConst.TRUE);
     tbl.setParameters(tableParams);
 
     client.createTable(tbl);

http://git-wip-us.apache.org/repos/asf/hive/blob/64a053b6/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatDynamicPartitioned.java
----------------------------------------------------------------------
diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatDynamicPartitioned.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatDynamicPartitioned.java
index 0d87c6c..9573098 100644
--- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatDynamicPartitioned.java
+++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatDynamicPartitioned.java
@@ -185,6 +185,26 @@ public class TestHCatDynamicPartitioned extends HCatMapReduceTest {
     res = new ArrayList<String>();
     driver.getResults(res);
     assertEquals(NUM_RECORDS, res.size());
+
+    query = "select count(*) from " + tableName;
+    retCode = driver.run(query).getResponseCode();
+    if (retCode != 0) {
+      throw new Exception("Error " + retCode + " running query " + query);
+    }
+    res = new ArrayList<String>();
+    driver.getResults(res);
+    assertEquals(1, res.size());
+    assertEquals("20", res.get(0));
+
+    query = "select count(*) from " + tableName + " where p1=1";
+    retCode = driver.run(query).getResponseCode();
+    if (retCode != 0) {
+      throw new Exception("Error " + retCode + " running query " + query);
+    }
+    res = new ArrayList<String>();
+    driver.getResults(res);
+    assertEquals(1, res.size());
+    assertEquals("4", res.get(0));
   }
 
   //TODO 1.0 miniCluster is slow this test times out, make it work

http://git-wip-us.apache.org/repos/asf/hive/blob/64a053b6/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatNonPartitioned.java
----------------------------------------------------------------------
diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatNonPartitioned.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatNonPartitioned.java
index 174a92f..a73516c 100644
--- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatNonPartitioned.java
+++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatNonPartitioned.java
@@ -141,10 +141,27 @@ public class TestHCatNonPartitioned extends HCatMapReduceTest {
 
     ArrayList<String> res = new ArrayList<String>();
     driver.getResults(res);
-    if (isTableImmutable()){
+    if (isTableImmutable()) {
       assertEquals(10, res.size());
-    }else {
+    } else {
       assertEquals(30, res.size());
     }
+
+    query = "select count(*) from " + tableName;
+    retCode = driver.run(query).getResponseCode();
+
+    if (retCode != 0) {
+      throw new Exception("Error " + retCode + " running query " + query);
+    }
+
+    res = new ArrayList<String>();
+    driver.getResults(res);
+    if (isTableImmutable()) {
+      assertEquals(1, res.size());
+      assertEquals("10", res.get(0));
+    } else {
+      assertEquals(1, res.size());
+      assertEquals("30", res.get(0));
+    }
   }
 }