You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by st...@apache.org on 2018/07/05 21:09:18 UTC
[2/2] hive git commit: HIVE-18118: Explain Extended should indicate
if a file being read is an EC file (Andrew Sherman, reviewed by Sahil Takiar)
HIVE-18118: Explain Extended should indicate if a file being read is an EC file (Andrew Sherman, reviewed by Sahil Takiar)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c2940a07
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c2940a07
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c2940a07
Branch: refs/heads/master
Commit: c2940a07cf0891e922672782b73ec22551a7eedd
Parents: e865b44
Author: Andrew Sherman <as...@cloudera.com>
Authored: Thu Jul 5 16:08:16 2018 -0500
Committer: Sahil Takiar <st...@cloudera.com>
Committed: Thu Jul 5 16:08:16 2018 -0500
----------------------------------------------------------------------
.../hive/common/util/HiveStringUtils.java | 21 +-
.../apache/hive/jdbc/TestJdbcWithMiniHS2.java | 2 +-
.../jdbc/TestJdbcWithMiniHS2ErasureCoding.java | 215 ++++++++++
.../test/resources/testconfiguration.properties | 3 +-
.../org/apache/hadoop/hive/ql/QTestUtil.java | 4 +-
.../org/apache/hive/jdbc/miniHS2/MiniHS2.java | 23 +-
.../org/apache/hadoop/hive/ql/exec/DDLTask.java | 44 +-
.../ql/metadata/SessionHiveMetaStoreClient.java | 2 +-
.../formatting/MetaDataFormatUtils.java | 11 +-
.../formatting/TextMetaDataFormatter.java | 10 +
.../optimizer/spark/SparkMapJoinOptimizer.java | 2 +-
.../stats/annotation/StatsRulesProcFactory.java | 2 +-
.../hadoop/hive/ql/plan/PartitionDesc.java | 4 +-
.../apache/hadoop/hive/ql/plan/PlanUtils.java | 31 ++
.../apache/hadoop/hive/ql/plan/Statistics.java | 12 +-
.../apache/hadoop/hive/ql/plan/TableDesc.java | 3 +-
.../hive/ql/stats/BasicStatsNoJobTask.java | 7 +-
.../hadoop/hive/ql/stats/BasicStatsTask.java | 4 +-
.../apache/hadoop/hive/ql/stats/StatsUtils.java | 20 +-
.../hive/ql/txn/compactor/CompactorMR.java | 4 +-
.../queries/clientpositive/erasure_explain.q | 24 ++
.../queries/clientpositive/erasure_simple.q | 13 +
.../clientnegative/unset_table_property.q.out | 1 +
.../materialized_view_create_rewrite.q.out | 2 +
.../clientpositive/druid/druidmini_mv.q.out | 3 +
.../erasurecoding/erasure_explain.q.out | 409 +++++++++++++++++++
.../erasurecoding/erasure_simple.q.out | 20 +-
.../llap/materialized_view_create.q.out | 1 +
.../llap/materialized_view_create_rewrite.q.out | 2 +
...materialized_view_create_rewrite_dummy.q.out | 2 +
...erialized_view_create_rewrite_multi_db.q.out | 2 +
.../llap/materialized_view_describe.q.out | 1 +
.../clientpositive/show_tblproperties.q.out | 5 +
.../unset_table_view_property.q.out | 9 +
.../hadoop/hive/common/StatsSetupConst.java | 21 +-
.../hadoop/hive/metastore/HiveMetaStore.java | 3 +-
.../hive/metastore/utils/MetaStoreUtils.java | 11 +-
.../metastore/utils/TestMetaStoreUtils.java | 37 +-
38 files changed, 877 insertions(+), 113 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/common/src/java/org/apache/hive/common/util/HiveStringUtils.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hive/common/util/HiveStringUtils.java b/common/src/java/org/apache/hive/common/util/HiveStringUtils.java
index cfe9b22..6b14ad9 100644
--- a/common/src/java/org/apache/hive/common/util/HiveStringUtils.java
+++ b/common/src/java/org/apache/hive/common/util/HiveStringUtils.java
@@ -31,19 +31,15 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Date;
+import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
-import java.util.Map;
-import java.util.HashMap;
import java.util.Locale;
-import java.util.Properties;
+import java.util.Map;
import java.util.StringTokenizer;
import java.util.regex.Pattern;
import com.google.common.base.Splitter;
-import com.google.common.collect.Interner;
-import com.google.common.collect.Interners;
-
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang3.text.translate.CharSequenceTranslator;
import org.apache.commons.lang3.text.translate.EntityArrays;
@@ -1062,19 +1058,6 @@ public class HiveStringUtils {
return identifier.trim().toLowerCase();
}
- public static Map getPropertiesExplain(Properties properties) {
- if (properties != null) {
- String value = properties.getProperty("columns.comments");
- if (value != null) {
- // should copy properties first
- Map clone = new HashMap(properties);
- clone.put("columns.comments", quoteComments(value));
- return clone;
- }
- }
- return properties;
- }
-
public static String quoteComments(String value) {
char[] chars = value.toCharArray();
if (!commentProvided(chars)) {
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
index d7d7097..7ef2ced 100644
--- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
+++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
@@ -1658,7 +1658,7 @@ public class TestJdbcWithMiniHS2 {
/**
* Get Detailed Table Information via jdbc
*/
- private String getDetailedTableDescription(Statement stmt, String table) throws SQLException {
+ static String getDetailedTableDescription(Statement stmt, String table) throws SQLException {
String extendedDescription = null;
try (ResultSet rs = stmt.executeQuery("describe extended " + table)) {
while (rs.next()) {
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2ErasureCoding.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2ErasureCoding.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2ErasureCoding.java
new file mode 100644
index 0000000..b0a0145
--- /dev/null
+++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2ErasureCoding.java
@@ -0,0 +1,215 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.jdbc;
+
+import java.io.IOException;
+import java.nio.file.Paths;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.Collections;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.shims.HadoopShims;
+import org.apache.hadoop.hive.shims.HadoopShims.HdfsErasureCodingShim;
+import org.apache.hadoop.hive.shims.HadoopShims.MiniDFSShim;
+import org.apache.hadoop.hive.shims.ShimLoader;
+import org.apache.hive.jdbc.miniHS2.MiniHS2;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import static org.apache.hadoop.hive.ql.QTestUtil.DEFAULT_TEST_EC_POLICY;
+import static org.apache.hive.jdbc.TestJdbcWithMiniHS2.getDetailedTableDescription;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Run erasure Coding tests with jdbc.
+ */
+public class TestJdbcWithMiniHS2ErasureCoding {
+ private static final String DB_NAME = "ecTestDb";
+ private static MiniHS2 miniHS2 = null;
+ private static HiveConf conf;
+ private Connection hs2Conn = null;
+
+ private static HiveConf createHiveOnSparkConf() {
+ HiveConf hiveConf = new HiveConf();
+ // Tell dfs not to consider load when choosing a datanode as this can cause failure as
+ // in a test we do not have spare datanode capacity.
+ hiveConf.setBoolean("dfs.namenode.redundancy.considerLoad", false);
+ hiveConf.set("hive.execution.engine", "spark");
+ hiveConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
+ hiveConf.set("spark.master", "local-cluster[2,2,1024]");
+ hiveConf.set("hive.spark.client.connect.timeout", "30000ms");
+ hiveConf.set("spark.local.dir",
+ Paths.get(System.getProperty("test.tmp.dir"), "TestJdbcWithMiniHS2ErasureCoding-local-dir")
+ .toString());
+ hiveConf.setBoolVar(ConfVars.HIVE_SUPPORT_CONCURRENCY, false); // avoid ZK errors
+ return hiveConf;
+ }
+
+ /**
+ * Setup a mini HS2 with miniMR.
+ */
+ @BeforeClass
+ public static void beforeTest() throws Exception {
+ Class.forName(MiniHS2.getJdbcDriverName());
+ conf = createHiveOnSparkConf();
+ DriverManager.setLoginTimeout(0);
+ miniHS2 = new MiniHS2.Builder()
+ .withConf(conf)
+ .withMiniMR()
+ .withDataNodes(5) // sufficient for RS-3-2-1024k
+ .build();
+ miniHS2.start(Collections.emptyMap());
+ createDb();
+ MiniDFSShim dfs = miniHS2.getDfs();
+ addErasurePolicy(dfs, "hdfs:///", DEFAULT_TEST_EC_POLICY);
+ }
+
+ /**
+ * Shutdown the mini HS2.
+ */
+ @AfterClass
+ public static void afterTest() {
+ if (miniHS2 != null && miniHS2.isStarted()) {
+ miniHS2.stop();
+ }
+ }
+
+ /**
+ * Setup a connection to the test database before each test.
+ */
+ @Before
+ public void setUp() throws Exception {
+ hs2Conn = DriverManager.getConnection(miniHS2.getJdbcURL(DB_NAME),
+ System.getProperty("user.name"), "bar");
+ }
+
+ /**
+ * Close connection after each test.
+ */
+ @After
+ public void tearDown() throws Exception {
+ if (hs2Conn != null) {
+ hs2Conn.close();
+ }
+ }
+
+ /**
+ * Create a database.
+ */
+ private static void createDb() throws Exception {
+ try (Connection conn = DriverManager.getConnection(miniHS2.getJdbcURL(),
+ System.getProperty("user.name"), "bar");
+ Statement stmt2 = conn.createStatement()) {
+ stmt2.execute("DROP DATABASE IF EXISTS " + DB_NAME + " CASCADE");
+ stmt2.execute("CREATE DATABASE " + DB_NAME);
+ }
+ }
+
+ /**
+ * Test EXPLAIN on fs with Erasure Coding.
+ */
+ @Test
+ public void testExplainErasureCoding() throws Exception {
+ try (Statement stmt = hs2Conn.createStatement()) {
+ String tableName = "pTableEc";
+ stmt.execute(
+ " CREATE TABLE " + tableName + " (userid VARCHAR(64), link STRING, source STRING) "
+ + "PARTITIONED BY (datestamp STRING, i int) "
+ + "CLUSTERED BY (userid) INTO 4 BUCKETS STORED AS PARQUET");
+ // insert data to create 2 partitions
+ stmt.execute("INSERT INTO TABLE " + tableName
+ + " PARTITION (datestamp = '2014-09-23', i = 1)(userid,link) VALUES ('jsmith', 'mail.com')");
+ stmt.execute("INSERT INTO TABLE " + tableName
+ + " PARTITION (datestamp = '2014-09-24', i = 2)(userid,link) VALUES ('mac', 'superchunk.com')");
+ String explain = getExtendedExplain(stmt, "select userid from " + tableName);
+ assertMatchAndCount(explain, " numFiles 4", 2);
+ assertMatchAndCount(explain, " numFilesErasureCoded 4", 2);
+ }
+ }
+
+ /**
+ * Test DESCRIBE on fs with Erasure Coding.
+ */
+ @Test
+ public void testDescribeErasureCoding() throws Exception {
+ try (Statement stmt = hs2Conn.createStatement()) {
+ String table = "pageviews";
+ stmt.execute(" CREATE TABLE " + table + " (userid VARCHAR(64), link STRING, source STRING) "
+ + "PARTITIONED BY (datestamp STRING, i int) CLUSTERED BY (userid) INTO 4 BUCKETS STORED AS PARQUET");
+ stmt.execute("INSERT INTO TABLE " + table + " PARTITION (datestamp = '2014-09-23', i = 1)"
+ + "(userid,link) VALUES ('jsmith', 'mail.com')");
+ stmt.execute("INSERT INTO TABLE " + table + " PARTITION (datestamp = '2014-09-24', i = 1)"
+ + "(userid,link) VALUES ('dpatel', 'gmail.com')");
+ String description = getDetailedTableDescription(stmt, table);
+ assertMatchAndCount(description, "numFiles=8", 1);
+ assertMatchAndCount(description, "numFilesErasureCoded=8", 1);
+ assertMatchAndCount(description, "numPartitions=2", 1);
+ }
+ }
+
+ /**
+ * Add a Erasure Coding Policy to a Path.
+ */
+ private static void addErasurePolicy(MiniDFSShim dfs, String pathString, String policyName) throws IOException {
+ HadoopShims hadoopShims = ShimLoader.getHadoopShims();
+ HdfsErasureCodingShim erasureCodingShim = hadoopShims.createHdfsErasureCodingShim(dfs.getFileSystem(), conf);
+ erasureCodingShim.enableErasureCodingPolicy(policyName);
+ Path fsRoot = new Path(pathString);
+ erasureCodingShim.setErasureCodingPolicy(fsRoot, policyName);
+ HadoopShims.HdfsFileErasureCodingPolicy erasureCodingPolicy =
+ erasureCodingShim.getErasureCodingPolicy(fsRoot);
+ assertEquals(policyName, erasureCodingPolicy.getName());
+ }
+
+ /**
+ * Get Extended Explain output via jdbc.
+ */
+ private static String getExtendedExplain(Statement stmt, String query) throws SQLException {
+ StringBuilder sb = new StringBuilder(2048);
+ try (ResultSet rs = stmt.executeQuery("explain extended " + query)) {
+ while (rs.next()) {
+ sb.append(rs.getString(1)).append('\n');
+ }
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Check that the expected string occurs correctly in the output string.
+ * @param output string to probe
+ * @param expectedString string to find in output
+ * @param expectedCount the expected number of occurrences of the expected string
+ */
+ private void assertMatchAndCount(String output, String expectedString, int expectedCount) {
+ assertTrue("Did not find expected '" + expectedString + "' in text " +
+ output, output.contains(expectedString));
+ assertEquals("wrong count of matches of '" + expectedString + "' in text " +
+ output, expectedCount, StringUtils.countMatches(output, expectedString));
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 978a806..fedb77b 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -1714,4 +1714,5 @@ druid.kafka.query.files=druidkafkamini_basic.q
erasurecoding.shared.query.files=erasure_commands.q
# tests to be run only by TestErasureCodingHDFSCliDriver
-erasurecoding.only.query.files=erasure_simple.q
+erasurecoding.only.query.files=erasure_simple.q,\
+ erasure_explain.q
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
----------------------------------------------------------------------
diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
index 2dfd2aa..98aae5c 100644
--- a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
+++ b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
@@ -150,9 +150,9 @@ public class QTestUtil {
public static final String TEST_HIVE_USER_PROPERTY = "test.hive.user";
/**
- * The Erasure Coding Policy to use in TestErasureCodingHDFSCliDriver.
+ * The default Erasure Coding Policy to use in Erasure Coding tests.
*/
- private static final String DEFAULT_TEST_EC_POLICY = "RS-3-2-1024k";
+ public static final String DEFAULT_TEST_EC_POLICY = "RS-3-2-1024k";
private String testWarehouse;
@Deprecated
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/itests/util/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java
----------------------------------------------------------------------
diff --git a/itests/util/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java b/itests/util/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java
index 1700c08..a78dd73 100644
--- a/itests/util/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java
+++ b/itests/util/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java
@@ -66,6 +66,7 @@ public class MiniHS2 extends AbstractHiveService {
private static final FsPermission FULL_PERM = new FsPermission((short)00777);
private static final FsPermission WRITE_ALL_PERM = new FsPermission((short)00733);
private static final String tmpDir = System.getProperty("test.tmp.dir");
+ private static final int DEFAULT_DATANODE_COUNT = 4;
private HiveServer2 hiveServer2 = null;
private final File baseDir;
private final Path baseFsDir;
@@ -104,6 +105,7 @@ public class MiniHS2 extends AbstractHiveService {
private boolean isMetastoreSecure;
private String metastoreServerPrincipal;
private String metastoreServerKeyTab;
+ private int dataNodes = DEFAULT_DATANODE_COUNT; // default number of datanodes for miniHS2
public Builder() {
}
@@ -162,6 +164,16 @@ public class MiniHS2 extends AbstractHiveService {
return this;
}
+ /**
+ * Set the number of datanodes to be used by HS2.
+ * @param count the number of datanodes
+ * @return this Builder
+ */
+ public Builder withDataNodes(int count) {
+ this.dataNodes = count;
+ return this;
+ }
+
public MiniHS2 build() throws Exception {
if (miniClusterType == MiniClusterType.MR && useMiniKdc) {
throw new IOException("Can't create secure miniMr ... yet");
@@ -173,7 +185,7 @@ public class MiniHS2 extends AbstractHiveService {
}
return new MiniHS2(hiveConf, miniClusterType, useMiniKdc, serverPrincipal, serverKeytab,
isMetastoreRemote, usePortsFromConf, authType, isHA, cleanupLocalDirOnStartup,
- isMetastoreSecure, metastoreServerPrincipal, metastoreServerKeyTab);
+ isMetastoreSecure, metastoreServerPrincipal, metastoreServerKeyTab, dataNodes);
}
}
@@ -212,9 +224,8 @@ public class MiniHS2 extends AbstractHiveService {
private MiniHS2(HiveConf hiveConf, MiniClusterType miniClusterType, boolean useMiniKdc,
String serverPrincipal, String serverKeytab, boolean isMetastoreRemote,
boolean usePortsFromConf, String authType, boolean isHA, boolean cleanupLocalDirOnStartup,
- boolean isMetastoreSecure,
- String metastoreServerPrincipal,
- String metastoreKeyTab) throws Exception {
+ boolean isMetastoreSecure, String metastoreServerPrincipal, String metastoreKeyTab,
+ int dataNodes) throws Exception {
// Always use localhost for hostname as some tests like SSL CN validation ones
// are tied to localhost being present in the certificate name
super(
@@ -242,7 +253,7 @@ public class MiniHS2 extends AbstractHiveService {
if (miniClusterType != MiniClusterType.LOCALFS_ONLY) {
// Initialize dfs
- dfs = ShimLoader.getHadoopShims().getMiniDfs(hiveConf, 4, true, null, isHA);
+ dfs = ShimLoader.getHadoopShims().getMiniDfs(hiveConf, dataNodes, true, null, isHA);
fs = dfs.getFileSystem();
String uriString = fs.getUri().toString();
@@ -334,7 +345,7 @@ public class MiniHS2 extends AbstractHiveService {
throws Exception {
this(hiveConf, clusterType, false, null, null,
false, usePortsFromConf, "KERBEROS", false, true,
- false, null, null);
+ false, null, null, DEFAULT_DATANODE_COUNT);
}
public void start(Map<String, String> confOverlay) throws Exception {
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
index b7babd6..ba0070d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
@@ -60,16 +60,12 @@ import com.google.common.util.concurrent.ListenableFuture;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FsShell;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.FileUtils;
-import org.apache.hadoop.hive.common.JavaUtils;
import org.apache.hadoop.hive.common.StatsSetupConst;
-import org.apache.hadoop.hive.common.ValidTxnList;
import org.apache.hadoop.hive.common.ValidTxnWriteIdList;
-import org.apache.hadoop.hive.common.ValidWriteIdList;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.conf.Constants;
import org.apache.hadoop.hive.conf.HiveConf;
@@ -293,39 +289,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.stringtemplate.v4.ST;
-import java.io.BufferedWriter;
-import java.io.DataOutputStream;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.io.Serializable;
-import java.io.Writer;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.nio.charset.StandardCharsets;
-import java.sql.SQLException;
-import java.util.AbstractList;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
-import java.util.SortedSet;
-import java.util.TreeMap;
-import java.util.TreeSet;
-import java.util.concurrent.ExecutionException;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
import static org.apache.commons.lang.StringUtils.join;
-import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE;
/**
* DDLTask implementation.
@@ -2713,7 +2677,7 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
String tbl_location = " '" + HiveStringUtils.escapeHiveCommand(sd.getLocation()) + "'";
// Table properties
- duplicateProps.addAll(Arrays.asList(StatsSetupConst.TABLE_PARAMS_STATS_KEYS));
+ duplicateProps.addAll(StatsSetupConst.TABLE_PARAMS_STATS_KEYS);
String tbl_properties = propertiesToString(tbl.getParameters(), duplicateProps);
createTab_stmt.add(TEMPORARY, tbl_temp);
@@ -3679,7 +3643,7 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
Map<String,String> tblProps = tbl.getParameters() == null ? new HashMap<String,String>() : tbl.getParameters();
Map<String, Long> valueMap = new HashMap<>();
Map<String, Boolean> stateMap = new HashMap<>();
- for (String stat : StatsSetupConst.supportedStats) {
+ for (String stat : StatsSetupConst.SUPPORTED_STATS) {
valueMap.put(stat, 0L);
stateMap.put(stat, true);
}
@@ -3688,7 +3652,7 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
for (Partition partition : parts) {
Map<String, String> props = partition.getParameters();
Boolean state = StatsSetupConst.areBasicStatsUptoDate(props);
- for (String stat : StatsSetupConst.supportedStats) {
+ for (String stat : StatsSetupConst.SUPPORTED_STATS) {
stateMap.put(stat, stateMap.get(stat) && state);
if (props != null && props.get(stat) != null) {
valueMap.put(stat, valueMap.get(stat) + Long.parseLong(props.get(stat)));
@@ -3696,7 +3660,7 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
}
numParts++;
}
- for (String stat : StatsSetupConst.supportedStats) {
+ for (String stat : StatsSetupConst.SUPPORTED_STATS) {
StatsSetupConst.setBasicStatsState(tblProps, Boolean.toString(stateMap.get(stat)));
tblProps.put(stat, valueMap.get(stat).toString());
}
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java
index 58c8960..5d382ae 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java
@@ -593,7 +593,7 @@ public class SessionHiveMetaStoreClient extends HiveMetaStoreClient implements I
return false;
}
boolean statsPresent = false;
- for (String stat : StatsSetupConst.supportedStats) {
+ for (String stat : StatsSetupConst.SUPPORTED_STATS) {
String statVal = props.get(stat);
if (statVal != null && Long.parseLong(statVal) > 0) {
statsPresent = true;
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java
index 7af6dab..36cd46a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.ql.metadata.formatting;
import org.apache.commons.lang.StringEscapeUtils;
+import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
@@ -474,10 +475,16 @@ public final class MetaDataFormatUtils {
List<String> keys = new ArrayList<String>(params.keySet());
Collections.sort(keys);
for (String key : keys) {
+ String value = params.get(key);
+ if (key.equals(StatsSetupConst.NUM_ERASURE_CODED_FILES)) {
+ if ("0".equals(value)) {
+ continue;
+ }
+ }
tableInfo.append(FIELD_DELIM); // Ensures all params are indented.
formatOutput(key,
- escapeUnicode ? StringEscapeUtils.escapeJava(params.get(key))
- : HiveStringUtils.escapeJava(params.get(key)),
+ escapeUnicode ? StringEscapeUtils.escapeJava(value)
+ : HiveStringUtils.escapeJava(value),
tableInfo, isOutputPadded);
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java
index 326cbed..705365b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/TextMetaDataFormatter.java
@@ -366,6 +366,7 @@ class TextMetaDataFormatter implements MetaDataFormatter {
public long lastAccessTime = 0;
public long lastUpdateTime = 0;
public int numOfFiles = 0;
+ int numOfErasureCodedFiles = 0;
}
// TODO: why is this in text formatter?!!
@@ -416,6 +417,12 @@ class TextMetaDataFormatter implements MetaDataFormatter {
outStream.write((unknown ? unknownString : "" + fd.numOfFiles).getBytes("UTF-8"));
outStream.write(terminator);
+ if (fd.numOfErasureCodedFiles > 0) {
+ outStream.write("totalNumberErasureCodedFiles:".getBytes("UTF-8"));
+ outStream.write((unknown ? unknownString : "" + fd.numOfErasureCodedFiles).getBytes("UTF-8"));
+ outStream.write(terminator);
+ }
+
for (int k = 0; k < indent; k++) {
outStream.write(Utilities.INDENT.getBytes("UTF-8"));
}
@@ -473,6 +480,9 @@ class TextMetaDataFormatter implements MetaDataFormatter {
continue;
}
fd.numOfFiles++;
+ if (currentStatus.isErasureCoded()) {
+ fd.numOfErasureCodedFiles++;
+ }
long fileLen = currentStatus.getLen();
fd.totalFileSize += fileLen;
if (fileLen > fd.maxFileSize) {
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
index 8e75db9..689c888 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
@@ -216,7 +216,7 @@ public class SparkMapJoinOptimizer implements NodeProcessor {
LOG.debug("Found a big table branch with parent operator {} and position {}", parentOp, pos);
bigTablePosition = pos;
bigTableFound = true;
- bigInputStat = new Statistics(0, Long.MAX_VALUE);
+ bigInputStat = new Statistics(0, Long.MAX_VALUE, 0);
} else {
// Either we've found multiple big table branches, or the current branch cannot
// be a big table branch. Disable mapjoin for these cases.
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index d0be33b..3c2b085 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -1818,7 +1818,7 @@ public class StatsRulesProcFactory {
}
}
- Statistics wcStats = new Statistics(newNumRows, newDataSize);
+ Statistics wcStats = new Statistics(newNumRows, newDataSize, 0);
wcStats.setBasicStatsState(statsState);
// evaluate filter expression and update statistics
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java
index 61458b4..821e428 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java
@@ -45,10 +45,8 @@ import org.apache.hadoop.hive.serde2.SerDeUtils;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.OutputFormat;
import org.apache.hive.common.util.ReflectionUtil;
-import org.apache.hive.common.util.HiveStringUtils;
import org.apache.hadoop.hive.ql.plan.Explain.Level;
-
/**
* PartitionDesc.
*
@@ -221,7 +219,7 @@ public class PartitionDesc implements Serializable, Cloneable {
@Explain(displayName = "properties", explainLevels = { Level.EXTENDED })
public Map getPropertiesExplain() {
- return HiveStringUtils.getPropertiesExplain(getProperties());
+ return PlanUtils.getPropertiesExplain(getProperties());
}
public void setProperties(final Properties properties) {
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java
index 2c5b655..250a085 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java
@@ -23,6 +23,7 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
+import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
@@ -31,6 +32,7 @@ import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.common.JavaUtils;
+import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.llap.LlapOutputFormat;
@@ -78,6 +80,7 @@ import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.TextInputFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import static org.apache.hive.common.util.HiveStringUtils.quoteComments;
/**
* PlanUtils.
@@ -1203,4 +1206,32 @@ public final class PlanUtils {
public static Class<? extends AbstractSerDe> getDefaultSerDe() {
return LazySimpleSerDe.class;
}
+
+ /**
+ * Get a Map of table or partition properties to be used in explain extended output.
+ * Do some filtering to make output readable and/or concise.
+ */
+ static Map getPropertiesExplain(Properties properties) {
+ if (properties != null) {
+ Map<Object, Object> clone = null;
+ String value = properties.getProperty("columns.comments");
+ if (value != null) {
+ // should copy properties first
+ clone = new HashMap<>(properties);
+ clone.put("columns.comments", quoteComments(value));
+ }
+ value = properties.getProperty(StatsSetupConst.NUM_ERASURE_CODED_FILES);
+ if ("0".equals(value)) {
+ // should copy properties first
+ if (clone == null) {
+ clone = new HashMap<>(properties);
+ }
+ clone.remove(StatsSetupConst.NUM_ERASURE_CODED_FILES);
+ }
+ if (clone != null) {
+ return clone;
+ }
+ }
+ return properties;
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
index 6babe49..bc5f9d9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
@@ -53,18 +53,20 @@ public class Statistics implements Serializable {
private long numRows;
private long runTimeNumRows;
private long dataSize;
+ private long numErasureCodedFiles;
private State basicStatsState;
private Map<String, ColStatistics> columnStats;
private State columnStatsState;
private boolean runtimeStats;
public Statistics() {
- this(0, 0);
+ this(0, 0, 0);
}
- public Statistics(long nr, long ds) {
+ public Statistics(long nr, long ds, long numEcFiles) {
numRows = nr;
dataSize = ds;
+ numErasureCodedFiles = numEcFiles;
runTimeNumRows = -1;
columnStats = null;
columnStatsState = State.NONE;
@@ -137,6 +139,10 @@ public class Statistics implements Serializable {
}
sb.append(" Data size: ");
sb.append(dataSize);
+ if (numErasureCodedFiles > 0) {
+ sb.append(" Erasure files: ");
+ sb.append(numErasureCodedFiles);
+ }
sb.append(" Basic stats: ");
sb.append(basicStatsState);
sb.append(" Column stats: ");
@@ -185,7 +191,7 @@ public class Statistics implements Serializable {
@Override
public Statistics clone() {
- Statistics clone = new Statistics(numRows, dataSize);
+ Statistics clone = new Statistics(numRows, dataSize, numErasureCodedFiles);
clone.setRunTimeNumRows(runTimeNumRows);
clone.setBasicStatsState(basicStatsState);
clone.setColumnStatsState(columnStatsState);
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java
index 4068e56..bbce940 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableDesc.java
@@ -29,7 +29,6 @@ import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.SerDeUtils;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.OutputFormat;
-import org.apache.hive.common.util.HiveStringUtils;
import org.apache.hive.common.util.ReflectionUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -127,7 +126,7 @@ public class TableDesc implements Serializable, Cloneable {
@Explain(displayName = "properties", explainLevels = { Level.EXTENDED })
public Map getPropertiesExplain() {
- return HiveStringUtils.getPropertiesExplain(getProperties());
+ return PlanUtils.getPropertiesExplain(getProperties());
}
public void setProperties(final Properties properties) {
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java
index d4d46a3..3128ee8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsNoJobTask.java
@@ -163,6 +163,7 @@ public class BasicStatsNoJobTask implements IStatsProcessor {
long rawDataSize = 0;
long fileSize = 0;
long numFiles = 0;
+ long numErasureCodedFiles = 0;
// Note: this code would be invalid for transactional tables of any kind.
Utilities.FILE_OP_LOGGER.debug("Aggregating stats for {}", dir);
List<FileStatus> fileList = null;
@@ -190,6 +191,9 @@ public class BasicStatsNoJobTask implements IStatsProcessor {
numRows += statsRR.getStats().getRowCount();
fileSize += file.getLen();
numFiles += 1;
+ if (file.isErasureCoded()) {
+ numErasureCodedFiles++;
+ }
} else {
throw new HiveException(String.format("Unexpected file found during reading footers for: %s ", file));
}
@@ -206,6 +210,7 @@ public class BasicStatsNoJobTask implements IStatsProcessor {
parameters.put(StatsSetupConst.RAW_DATA_SIZE, String.valueOf(rawDataSize));
parameters.put(StatsSetupConst.TOTAL_SIZE, String.valueOf(fileSize));
parameters.put(StatsSetupConst.NUM_FILES, String.valueOf(numFiles));
+ parameters.put(StatsSetupConst.NUM_ERASURE_CODED_FILES, String.valueOf(numErasureCodedFiles));
if (partish.getPartition() != null) {
result = new Partition(partish.getTable(), partish.getPartition().getTPartition());
@@ -224,7 +229,7 @@ public class BasicStatsNoJobTask implements IStatsProcessor {
private String toString(Map<String, String> parameters) {
StringBuilder builder = new StringBuilder();
- for (String statType : StatsSetupConst.supportedStats) {
+ for (String statType : StatsSetupConst.SUPPORTED_STATS) {
String value = parameters.get(statType);
if (value != null) {
if (builder.length() > 0) {
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java
index f31c170..0db90b0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java
@@ -207,7 +207,7 @@ public class BasicStatsTask implements Serializable, IStatsProcessor {
private void updateStats(StatsAggregator statsAggregator, Map<String, String> parameters,
String aggKey) throws HiveException {
- for (String statType : StatsSetupConst.statsRequireCompute) {
+ for (String statType : StatsSetupConst.STATS_REQUIRE_COMPUTE) {
String value = statsAggregator.aggregateStats(aggKey, statType);
if (value != null && !value.isEmpty()) {
long longValue = Long.parseLong(value);
@@ -411,7 +411,7 @@ public class BasicStatsTask implements Serializable, IStatsProcessor {
private String toString(Map<String, String> parameters) {
StringBuilder builder = new StringBuilder();
- for (String statType : StatsSetupConst.supportedStats) {
+ for (String statType : StatsSetupConst.SUPPORTED_STATS) {
String value = parameters.get(statType);
if (value != null) {
if (builder.length() > 0) {
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index 494939a..95a4440 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -261,6 +261,8 @@ public class StatsUtils {
long nr = basicStats.getNumRows();
List<ColStatistics> colStats = Lists.newArrayList();
+ long numErasureCodedFiles = getErasureCodedFiles(table);
+
if (fetchColStats) {
colStats = getTableColumnStats(table, schema, neededColumns, colStatsCache);
if(colStats == null) {
@@ -273,7 +275,7 @@ public class StatsUtils {
long betterDS = getDataSizeFromColumnStats(nr, colStats);
ds = (betterDS < 1 || colStats.isEmpty()) ? ds : betterDS;
}
- stats = new Statistics(nr, ds);
+ stats = new Statistics(nr, ds, numErasureCodedFiles);
// infer if any column can be primary key based on column statistics
inferAndSetPrimaryKey(stats.getNumRows(), colStats);
@@ -308,10 +310,14 @@ public class StatsUtils {
long nr = bbs.getNumRows();
long ds = bbs.getDataSize();
+ List<Long> erasureCodedFiles = getBasicStatForPartitions(table, partList.getNotDeniedPartns(),
+ StatsSetupConst.NUM_ERASURE_CODED_FILES);
+ long numErasureCodedFiles = getSumIgnoreNegatives(erasureCodedFiles);
+
if (nr == 0) {
nr=1;
}
- stats = new Statistics(nr, ds);
+ stats = new Statistics(nr, ds, numErasureCodedFiles);
stats.setBasicStatsState(bbs.getState());
if (nr > 0) {
// FIXME: this promotion process should be removed later
@@ -1656,6 +1662,14 @@ public class StatsUtils {
}
/**
+ * Get number of Erasure Coded files for a table
+ * @return count of EC files
+ */
+ public static long getErasureCodedFiles(Table table) {
+ return getBasicStatForTable(table, StatsSetupConst.NUM_ERASURE_CODED_FILES);
+ }
+
+ /**
* Get basic stats of table
* @param table
* - table
@@ -1782,7 +1796,7 @@ public class StatsUtils {
}
/**
- * Get qualified column name from output key column names
+ * Get qualified column name from output key column names.
* @param keyExprs
* - output key names
* @return list of qualified names
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
index 6044719..611f85a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
@@ -21,8 +21,6 @@ import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
import java.util.HashSet;
-import com.google.common.collect.Lists;
-
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
@@ -469,7 +467,7 @@ public class CompactorMR {
HiveStringUtils.escapeHiveCommand(location)).append("' TBLPROPERTIES (");
// Exclude all standard table properties.
Set<String> excludes = getHiveMetastoreConstants();
- excludes.addAll(Lists.newArrayList(StatsSetupConst.TABLE_PARAMS_STATS_KEYS));
+ excludes.addAll(StatsSetupConst.TABLE_PARAMS_STATS_KEYS);
isFirst = true;
for (Map.Entry<String, String> e : t.getParameters().entrySet()) {
if (e.getValue() == null) continue;
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/test/queries/clientpositive/erasure_explain.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/erasure_explain.q b/ql/src/test/queries/clientpositive/erasure_explain.q
new file mode 100644
index 0000000..e2954d4
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/erasure_explain.q
@@ -0,0 +1,24 @@
+--! qt:dataset:src
+--! qt:dataset:srcpart
+-- Test explain diagnostics with Erasure Coding
+
+ERASURE echo listPolicies originally was;
+ERASURE listPolicies;
+
+show table extended like srcpart;
+
+desc formatted srcpart;
+
+explain select key, value from srcpart;
+
+explain extended select key, value from srcpart;
+
+show table extended like src;
+
+desc formatted src;
+
+explain select key, value from src;
+
+explain extended select key, value from src;
+
+
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/test/queries/clientpositive/erasure_simple.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/erasure_simple.q b/ql/src/test/queries/clientpositive/erasure_simple.q
index c08409c..cc886c2 100644
--- a/ql/src/test/queries/clientpositive/erasure_simple.q
+++ b/ql/src/test/queries/clientpositive/erasure_simple.q
@@ -5,6 +5,7 @@
ERASURE echo listPolicies originally was;
ERASURE listPolicies;
ERASURE enablePolicy --policy RS-10-4-1024k;
+ERASURE enablePolicy --policy XOR-2-1-1024k;
ERASURE echo listPolicies after enablePolicy;
ERASURE listPolicies;
@@ -25,8 +26,20 @@ ERASURE getPolicy --path hdfs:///tmp/erasure_coding1;
create table erasure_table (a int) location 'hdfs:///tmp/erasure_coding1/location1';
+-- insert some data with the default policy (RS-3-2-1024k) from the fs root
insert into erasure_table values(4);
+
+-- set a new policy on the directory and insert some data
+ERASURE setPolicy --path hdfs:///tmp/erasure_coding1 --policy XOR-2-1-1024k;
+insert into erasure_table values(5);
+
+ERASURE echo policy on older file is;
+ERASURE getPolicy --path hdfs:///tmp/erasure_coding1/location1/000000_0;
+ERASURE echo policy on newer file is;
+ERASURE getPolicy --path hdfs:///tmp/erasure_coding1/location1/000000_0_copy_1;
+
+-- show that data is present
select * from erasure_table;
drop table if exists erasure_table2;
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/test/results/clientnegative/unset_table_property.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/unset_table_property.q.out b/ql/src/test/results/clientnegative/unset_table_property.q.out
index eb308eb..20378a1 100644
--- a/ql/src/test/results/clientnegative/unset_table_property.q.out
+++ b/ql/src/test/results/clientnegative/unset_table_property.q.out
@@ -24,6 +24,7 @@ bucketing_version 2
c 3
#### A masked pattern was here ####
numFiles 0
+numFilesErasureCoded 0
numRows 0
rawDataSize 0
totalSize 0
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/test/results/clientpositive/beeline/materialized_view_create_rewrite.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/beeline/materialized_view_create_rewrite.q.out b/ql/src/test/results/clientpositive/beeline/materialized_view_create_rewrite.q.out
index ff02643..4db5d70 100644
--- a/ql/src/test/results/clientpositive/beeline/materialized_view_create_rewrite.q.out
+++ b/ql/src/test/results/clientpositive/beeline/materialized_view_create_rewrite.q.out
@@ -67,6 +67,7 @@ POSTHOOK: type: SHOW_TBLPROPERTIES
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
bucketing_version 2
numFiles 1
+numFilesErasureCoded 0
numRows 2
rawDataSize 408
totalSize 457
@@ -100,6 +101,7 @@ POSTHOOK: type: SHOW_TBLPROPERTIES
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
bucketing_version 2
numFiles 1
+numFilesErasureCoded 0
numRows 2
rawDataSize 232
totalSize 326
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out b/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out
index 2e44e14..383f2dc 100644
--- a/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out
+++ b/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out
@@ -89,6 +89,7 @@ bucketing_version 2
druid.datasource default.cmv_mat_view_n2
druid.segment.granularity HOUR
numFiles 0
+numFilesErasureCoded 0
numRows 2
rawDataSize 0
storage_handler org.apache.hadoop.hive.druid.DruidStorageHandler
@@ -136,6 +137,7 @@ bucketing_version 2
druid.datasource default.cmv_mat_view2_n0
druid.segment.granularity HOUR
numFiles 0
+numFilesErasureCoded 0
numRows 3
rawDataSize 0
storage_handler org.apache.hadoop.hive.druid.DruidStorageHandler
@@ -515,6 +517,7 @@ druid.datasource default.cmv_mat_view2_n0
druid.segment.granularity HOUR
#### A masked pattern was here ####
numFiles 0
+numFilesErasureCoded 0
numRows 3
rawDataSize 0
storage_handler org.apache.hadoop.hive.druid.DruidStorageHandler
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/test/results/clientpositive/erasurecoding/erasure_explain.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/erasurecoding/erasure_explain.q.out b/ql/src/test/results/clientpositive/erasurecoding/erasure_explain.q.out
new file mode 100644
index 0000000..8ada9b6
--- /dev/null
+++ b/ql/src/test/results/clientpositive/erasurecoding/erasure_explain.q.out
@@ -0,0 +1,409 @@
+ECHO listPolicies originally was
+Policy: RS-10-4-1024k DISABLED
+Policy: RS-3-2-1024k ENABLED
+Policy: RS-6-3-1024k ENABLED
+Policy: RS-LEGACY-6-3-1024k DISABLED
+Policy: XOR-2-1-1024k DISABLED
+PREHOOK: query: show table extended like srcpart
+PREHOOK: type: SHOW_TABLESTATUS
+POSTHOOK: query: show table extended like srcpart
+POSTHOOK: type: SHOW_TABLESTATUS
+tableName:srcpart
+#### A masked pattern was here ####
+location:hdfs://### HDFS PATH ###
+inputformat:org.apache.hadoop.mapred.TextInputFormat
+outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+columns:struct columns { string key, string value}
+partitioned:true
+partitionColumns:struct partition_columns { string ds, string hr}
+totalNumberFiles:4
+totalNumberErasureCodedFiles:4
+totalFileSize:23248
+maxFileSize:5812
+minFileSize:5812
+#### A masked pattern was here ####
+
+PREHOOK: query: desc formatted srcpart
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@srcpart
+POSTHOOK: query: desc formatted srcpart
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@srcpart
+# col_name data_type comment
+key string default
+value string default
+
+# Partition Information
+# col_name data_type comment
+ds string
+hr string
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ bucketing_version 2
+ numFiles 4
+ numFilesErasureCoded 4
+ numPartitions 4
+ numRows 2000
+ rawDataSize 21248
+ totalSize 23248
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: explain select key, value from srcpart
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select key, value from srcpart
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: srcpart
+ Statistics: Num rows: 2000 Data size: 21248 Erasure files: 4 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2000 Data size: 21248 Erasure files: 4 Basic stats: COMPLETE Column stats: NONE
+ ListSink
+
+PREHOOK: query: explain extended select key, value from srcpart
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended select key, value from srcpart
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Partition Description:
+ Partition
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 11
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+ bucket_count -1
+ column.name.delimiter ,
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ location hdfs://### HDFS PATH ###
+ name default.srcpart
+ numFiles 1
+ numFilesErasureCoded 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ location hdfs://### HDFS PATH ###
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
+ Partition
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-08
+ hr 12
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+ bucket_count -1
+ column.name.delimiter ,
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ location hdfs://### HDFS PATH ###
+ name default.srcpart
+ numFiles 1
+ numFilesErasureCoded 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ location hdfs://### HDFS PATH ###
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
+ Partition
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-09
+ hr 11
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+ bucket_count -1
+ column.name.delimiter ,
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ location hdfs://### HDFS PATH ###
+ name default.srcpart
+ numFiles 1
+ numFilesErasureCoded 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ location hdfs://### HDFS PATH ###
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
+ Partition
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2008-04-09
+ hr 12
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+ bucket_count -1
+ column.name.delimiter ,
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ location hdfs://### HDFS PATH ###
+ name default.srcpart
+ numFiles 1
+ numFilesErasureCoded 1
+ numRows 500
+ partition_columns ds/hr
+ partition_columns.types string:string
+ rawDataSize 5312
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns key,value
+ columns.comments 'default','default'
+ columns.types string:string
+#### A masked pattern was here ####
+ location hdfs://### HDFS PATH ###
+ name default.srcpart
+ partition_columns ds/hr
+ partition_columns.types string:string
+ serialization.ddl struct srcpart { string key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcpart
+ name: default.srcpart
+ Processor Tree:
+ TableScan
+ alias: srcpart
+ Statistics: Num rows: 2000 Data size: 21248 Erasure files: 4 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2000 Data size: 21248 Erasure files: 4 Basic stats: COMPLETE Column stats: NONE
+ ListSink
+
+PREHOOK: query: show table extended like src
+PREHOOK: type: SHOW_TABLESTATUS
+POSTHOOK: query: show table extended like src
+POSTHOOK: type: SHOW_TABLESTATUS
+tableName:src
+#### A masked pattern was here ####
+location:hdfs://### HDFS PATH ###
+inputformat:org.apache.hadoop.mapred.TextInputFormat
+outputformat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+columns:struct columns { string key, string value}
+partitioned:false
+partitionColumns:
+totalNumberFiles:1
+totalNumberErasureCodedFiles:1
+totalFileSize:5812
+maxFileSize:5812
+minFileSize:5812
+#### A masked pattern was here ####
+
+PREHOOK: query: desc formatted src
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@src
+POSTHOOK: query: desc formatted src
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@src
+# col_name data_type comment
+key string default
+value string default
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
+ bucketing_version 2
+ numFiles 1
+ numFilesErasureCoded 1
+ numRows 500
+ rawDataSize 5312
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: explain select key, value from src
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select key, value from src
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Erasure files: 1 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Erasure files: 1 Basic stats: COMPLETE Column stats: NONE
+ ListSink
+
+PREHOOK: query: explain extended select key, value from src
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended select key, value from src
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Erasure files: 1 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Erasure files: 1 Basic stats: COMPLETE Column stats: NONE
+ ListSink
+
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/test/results/clientpositive/erasurecoding/erasure_simple.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/erasurecoding/erasure_simple.q.out b/ql/src/test/results/clientpositive/erasurecoding/erasure_simple.q.out
index 01f6015..b44cb7d 100644
--- a/ql/src/test/results/clientpositive/erasurecoding/erasure_simple.q.out
+++ b/ql/src/test/results/clientpositive/erasurecoding/erasure_simple.q.out
@@ -5,12 +5,13 @@ Policy: RS-6-3-1024k ENABLED
Policy: RS-LEGACY-6-3-1024k DISABLED
Policy: XOR-2-1-1024k DISABLED
Enabled EC policy 'RS-10-4-1024k'
+Enabled EC policy 'XOR-2-1-1024k'
ECHO listPolicies after enablePolicy
Policy: RS-10-4-1024k ENABLED
Policy: RS-3-2-1024k ENABLED
Policy: RS-6-3-1024k ENABLED
Policy: RS-LEGACY-6-3-1024k DISABLED
-Policy: XOR-2-1-1024k DISABLED
+Policy: XOR-2-1-1024k ENABLED
ECHO original policy on erasure_coding1
EC policy is 'RS-3-2-1024k'
ECHO set the default policy on erasure_coding1
@@ -39,6 +40,20 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@erasure_table
POSTHOOK: Lineage: erasure_table.a SCRIPT []
+Set EC policy' XOR-2-1-1024k
+PREHOOK: query: insert into erasure_table values(5)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@erasure_table
+POSTHOOK: query: insert into erasure_table values(5)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@erasure_table
+POSTHOOK: Lineage: erasure_table.a SCRIPT []
+ECHO policy on older file is
+EC policy is 'RS-3-2-1024k'
+ECHO policy on newer file is
+EC policy is 'XOR-2-1-1024k'
PREHOOK: query: select * from erasure_table
PREHOOK: type: QUERY
PREHOOK: Input: default@erasure_table
@@ -48,6 +63,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@erasure_table
POSTHOOK: Output: hdfs://### HDFS PATH ###
4
+5
PREHOOK: query: drop table if exists erasure_table2
PREHOOK: type: DROPTABLE
POSTHOOK: query: drop table if exists erasure_table2
@@ -88,6 +104,7 @@ columns:struct columns { string key, string value}
partitioned:false
partitionColumns:
totalNumberFiles:1
+totalNumberErasureCodedFiles:1
totalFileSize:5812
maxFileSize:5812
minFileSize:5812
@@ -100,6 +117,7 @@ POSTHOOK: query: SHOW TBLPROPERTIES erasure_table2
POSTHOOK: type: SHOW_TBLPROPERTIES
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
numFiles 1
+numFilesErasureCoded 1
numRows 500
rawDataSize 5312
totalSize 5812
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/test/results/clientpositive/llap/materialized_view_create.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create.q.out
index 9a70096..95f8966 100644
--- a/ql/src/test/results/clientpositive/llap/materialized_view_create.q.out
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_create.q.out
@@ -245,6 +245,7 @@ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
bucketing_version 2
key value
numFiles 1
+numFilesErasureCoded 0
numRows 5
rawDataSize 1605
totalSize 703
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite.q.out
index c3cd893..71adebb 100644
--- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite.q.out
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite.q.out
@@ -67,6 +67,7 @@ POSTHOOK: type: SHOW_TBLPROPERTIES
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
bucketing_version 2
numFiles 1
+numFilesErasureCoded 0
numRows 2
rawDataSize 408
totalSize 457
@@ -100,6 +101,7 @@ POSTHOOK: type: SHOW_TBLPROPERTIES
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
bucketing_version 2
numFiles 1
+numFilesErasureCoded 0
numRows 2
rawDataSize 232
totalSize 326
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_dummy.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_dummy.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_dummy.q.out
index e2972f3..ce1c281 100644
--- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_dummy.q.out
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_dummy.q.out
@@ -67,6 +67,7 @@ POSTHOOK: type: SHOW_TBLPROPERTIES
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
bucketing_version 2
numFiles 1
+numFilesErasureCoded 0
numRows 2
rawDataSize 408
totalSize 457
@@ -100,6 +101,7 @@ POSTHOOK: type: SHOW_TBLPROPERTIES
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
bucketing_version 2
numFiles 1
+numFilesErasureCoded 0
numRows 2
rawDataSize 232
totalSize 326
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_multi_db.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_multi_db.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_multi_db.q.out
index e5fb23d..98f7437 100644
--- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_multi_db.q.out
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_multi_db.q.out
@@ -91,6 +91,7 @@ POSTHOOK: type: SHOW_TBLPROPERTIES
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
bucketing_version 2
numFiles 1
+numFilesErasureCoded 0
numRows 2
rawDataSize 408
totalSize 457
@@ -124,6 +125,7 @@ POSTHOOK: type: SHOW_TBLPROPERTIES
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
bucketing_version 2
numFiles 1
+numFilesErasureCoded 0
numRows 2
rawDataSize 232
totalSize 326
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/test/results/clientpositive/llap/materialized_view_describe.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_describe.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_describe.q.out
index 85092a0..c68c127 100644
--- a/ql/src/test/results/clientpositive/llap/materialized_view_describe.q.out
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_describe.q.out
@@ -98,6 +98,7 @@ bucketing_version 2
comment this is the first view
key foo
numFiles 1
+numFilesErasureCoded 0
numRows 5
rawDataSize 580
totalSize 345
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/test/results/clientpositive/show_tblproperties.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/show_tblproperties.q.out b/ql/src/test/results/clientpositive/show_tblproperties.q.out
index e4bda1d..83e1ebd 100644
--- a/ql/src/test/results/clientpositive/show_tblproperties.q.out
+++ b/ql/src/test/results/clientpositive/show_tblproperties.q.out
@@ -41,6 +41,7 @@ bar bar value
bucketing_version 2
#### A masked pattern was here ####
numFiles 0
+numFilesErasureCoded 0
numRows 0
rawDataSize 0
tmp true
@@ -60,6 +61,7 @@ bar bar value
bucketing_version 2
#### A masked pattern was here ####
numFiles 0
+numFilesErasureCoded 0
numRows 0
rawDataSize 0
tmp true
@@ -115,6 +117,7 @@ bar bar value
bucketing_version 2
#### A masked pattern was here ####
numFiles 0
+numFilesErasureCoded 0
numRows 0
rawDataSize 0
tmp true
@@ -134,6 +137,7 @@ bar bar value1
bucketing_version 2
#### A masked pattern was here ####
numFiles 0
+numFilesErasureCoded 0
numRows 0
rawDataSize 0
tmp true1
@@ -159,6 +163,7 @@ bar bar value1
bucketing_version 2
#### A masked pattern was here ####
numFiles 0
+numFilesErasureCoded 0
numRows 0
rawDataSize 0
tmp true1
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/ql/src/test/results/clientpositive/unset_table_view_property.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/unset_table_view_property.q.out b/ql/src/test/results/clientpositive/unset_table_view_property.q.out
index 5887971..5d140d6 100644
--- a/ql/src/test/results/clientpositive/unset_table_view_property.q.out
+++ b/ql/src/test/results/clientpositive/unset_table_view_property.q.out
@@ -19,6 +19,7 @@ POSTHOOK: type: SHOW_TBLPROPERTIES
COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"col1":"true","col2":"true"}}
bucketing_version 2
numFiles 0
+numFilesErasureCoded 0
numRows 0
rawDataSize 0
totalSize 0
@@ -41,6 +42,7 @@ bucketing_version 2
c 3
#### A masked pattern was here ####
numFiles 0
+numFilesErasureCoded 0
numRows 0
rawDataSize 0
totalSize 0
@@ -61,6 +63,7 @@ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"col1":"true","col2"
bucketing_version 2
#### A masked pattern was here ####
numFiles 0
+numFilesErasureCoded 0
numRows 0
rawDataSize 0
totalSize 0
@@ -84,6 +87,7 @@ c 3
d 4
#### A masked pattern was here ####
numFiles 0
+numFilesErasureCoded 0
numRows 0
rawDataSize 0
totalSize 0
@@ -105,6 +109,7 @@ bucketing_version 2
c 3
#### A masked pattern was here ####
numFiles 0
+numFilesErasureCoded 0
numRows 0
rawDataSize 0
totalSize 0
@@ -125,6 +130,7 @@ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"col1":"true","col2"
bucketing_version 2
#### A masked pattern was here ####
numFiles 0
+numFilesErasureCoded 0
numRows 0
rawDataSize 0
totalSize 0
@@ -149,6 +155,7 @@ c 3
d 4
#### A masked pattern was here ####
numFiles 0
+numFilesErasureCoded 0
numRows 0
rawDataSize 0
totalSize 0
@@ -171,6 +178,7 @@ bucketing_version 2
c 3
#### A masked pattern was here ####
numFiles 0
+numFilesErasureCoded 0
numRows 0
rawDataSize 0
totalSize 0
@@ -192,6 +200,7 @@ a 1
bucketing_version 2
#### A masked pattern was here ####
numFiles 0
+numFilesErasureCoded 0
numRows 0
rawDataSize 0
totalSize 0
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java
index 78ea01d..a7ca05a 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java
@@ -22,6 +22,7 @@ import java.util.List;
import java.util.Map;
import java.util.TreeMap;
+import com.google.common.collect.ImmutableList;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars;
@@ -105,6 +106,11 @@ public class StatsSetupConst {
public static final String RAW_DATA_SIZE = "rawDataSize";
/**
+ * The name of the statistic for Number of Erasure Coded Files - to be published or gathered.
+ */
+ public static final String NUM_ERASURE_CODED_FILES = "numFilesErasureCoded";
+
+ /**
* Temp dir for writing stats from tasks.
*/
public static final String STATS_TMP_LOC = "hive.stats.tmp.loc";
@@ -113,18 +119,20 @@ public class StatsSetupConst {
/**
* List of all supported statistics
*/
- public static final String[] supportedStats = {NUM_FILES,ROW_COUNT,TOTAL_SIZE,RAW_DATA_SIZE};
+ public static final List<String> SUPPORTED_STATS = ImmutableList.of(
+ NUM_FILES, ROW_COUNT, TOTAL_SIZE, RAW_DATA_SIZE, NUM_ERASURE_CODED_FILES);
/**
* List of all statistics that need to be collected during query execution. These are
* statistics that inherently require a scan of the data.
*/
- public static final String[] statsRequireCompute = new String[] {ROW_COUNT,RAW_DATA_SIZE};
+ public static final List<String> STATS_REQUIRE_COMPUTE = ImmutableList.of(ROW_COUNT, RAW_DATA_SIZE);
/**
* List of statistics that can be collected quickly without requiring a scan of the data.
*/
- public static final String[] fastStats = new String[] {NUM_FILES,TOTAL_SIZE};
+ public static final List<String> FAST_STATS = ImmutableList.of(
+ NUM_FILES, TOTAL_SIZE, NUM_ERASURE_CODED_FILES);
// This string constant is used to indicate to AlterHandler that
// alterPartition/alterTable is happening via statsTask or via user.
@@ -154,8 +162,9 @@ public class StatsSetupConst {
public static final String FALSE = "false";
// The parameter keys for the table statistics. Those keys are excluded from 'show create table' command output.
- public static final String[] TABLE_PARAMS_STATS_KEYS = new String[] {
- COLUMN_STATS_ACCURATE, NUM_FILES, TOTAL_SIZE,ROW_COUNT, RAW_DATA_SIZE, NUM_PARTITIONS};
+ public static final List<String> TABLE_PARAMS_STATS_KEYS = ImmutableList.of(
+ COLUMN_STATS_ACCURATE, NUM_FILES, TOTAL_SIZE, ROW_COUNT, RAW_DATA_SIZE, NUM_PARTITIONS,
+ NUM_ERASURE_CODED_FILES);
private static class ColumnStatsAccurate {
private static ObjectReader objectReader;
@@ -299,7 +308,7 @@ public class StatsSetupConst {
public static void setStatsStateForCreateTable(Map<String, String> params,
List<String> cols, String setting) {
if (TRUE.equals(setting)) {
- for (String stat : StatsSetupConst.supportedStats) {
+ for (String stat : StatsSetupConst.SUPPORTED_STATS) {
params.put(stat, "0");
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
index c6c04b7..31bf615 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
@@ -18,7 +18,6 @@
package org.apache.hadoop.hive.metastore;
import static org.apache.commons.lang.StringUtils.join;
-import static org.apache.hadoop.hive.metastore.ReplChangeManager.SOURCE_OF_REPLICATION;
import static org.apache.hadoop.hive.metastore.Warehouse.DEFAULT_DATABASE_COMMENT;
import static org.apache.hadoop.hive.metastore.Warehouse.DEFAULT_DATABASE_NAME;
import static org.apache.hadoop.hive.metastore.Warehouse.DEFAULT_CATALOG_NAME;
@@ -2674,7 +2673,7 @@ public class HiveMetaStore extends ThriftHiveMetastore {
if (null == props) {
return;
}
- for (String stat : StatsSetupConst.supportedStats) {
+ for (String stat : StatsSetupConst.SUPPORTED_STATS) {
String statVal = props.get(stat);
if (statVal != null) {
//In the case of truncate table, we set the stats to be 0.
http://git-wip-us.apache.org/repos/asf/hive/blob/c2940a07/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java
index cbe89b6..73924ee 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java
@@ -95,7 +95,6 @@ import java.util.Properties;
import java.util.Map.Entry;
import java.util.SortedMap;
import java.util.SortedSet;
-import java.util.StringJoiner;
import java.util.TimeZone;
import java.util.TreeMap;
import java.util.TreeSet;
@@ -628,7 +627,7 @@ public class MetaStoreUtils {
* @return True if the passed Parameters Map contains values for all "Fast Stats".
*/
private static boolean containsAllFastStats(Map<String, String> partParams) {
- for (String stat : StatsSetupConst.fastStats) {
+ for (String stat : StatsSetupConst.FAST_STATS) {
if (!partParams.containsKey(stat)) {
return false;
}
@@ -639,7 +638,7 @@ public class MetaStoreUtils {
public static boolean isFastStatsSame(Partition oldPart, Partition newPart) {
// requires to calculate stats if new and old have different fast stats
if ((oldPart != null) && (oldPart.getParameters() != null)) {
- for (String stat : StatsSetupConst.fastStats) {
+ for (String stat : StatsSetupConst.FAST_STATS) {
if (oldPart.getParameters().containsKey(stat)) {
Long oldStat = Long.parseLong(oldPart.getParameters().get(stat));
Long newStat = Long.parseLong(newPart.getParameters().get(stat));
@@ -720,20 +719,26 @@ public class MetaStoreUtils {
LOG.trace("Populating quick stats based on {} files", fileStatus.size());
int numFiles = 0;
long tableSize = 0L;
+ int numErasureCodedFiles = 0;
for (FileStatus status : fileStatus) {
// don't take directories into account for quick stats TODO: wtf?
if (!status.isDir()) {
tableSize += status.getLen();
numFiles += 1;
+ if (status.isErasureCoded()) {
+ numErasureCodedFiles++;
+ }
}
}
params.put(StatsSetupConst.NUM_FILES, Integer.toString(numFiles));
params.put(StatsSetupConst.TOTAL_SIZE, Long.toString(tableSize));
+ params.put(StatsSetupConst.NUM_ERASURE_CODED_FILES, Integer.toString(numErasureCodedFiles));
}
public static void clearQuickStats(Map<String, String> params) {
params.remove(StatsSetupConst.NUM_FILES);
params.remove(StatsSetupConst.TOTAL_SIZE);
+ params.remove(StatsSetupConst.NUM_ERASURE_CODED_FILES);
}