You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jd...@apache.org on 2016/04/20 00:25:34 UTC
[42/58] [abbrv] hive git commit: HIVE-13496. Create initial test data
once across multiple test runs - TestCliDriver. (Siddharth Seth,
reviewed by Ashutosh Chauhan)
HIVE-13496. Create initial test data once across multiple test runs - TestCliDriver. (Siddharth Seth, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/976e628f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/976e628f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/976e628f
Branch: refs/heads/llap
Commit: 976e628fc01911936caa19e61ea3342f3a19455a
Parents: 0dd4621
Author: Siddharth Seth <ss...@apache.org>
Authored: Thu Apr 14 10:25:53 2016 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Thu Apr 14 10:25:53 2016 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hive/ql/QTestUtil.java | 247 +++++++++++++++++--
pom.xml | 2 +
ql/src/test/templates/TestCliDriver.vm | 38 ++-
3 files changed, 265 insertions(+), 22 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/976e628f/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
----------------------------------------------------------------------
diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
index 2f109ab..79646cd 100644
--- a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
+++ b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
@@ -51,7 +51,6 @@ import java.util.Collection;
import java.util.Comparator;
import java.util.Deque;
import java.util.HashSet;
-import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
@@ -63,6 +62,7 @@ import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import com.google.common.base.Preconditions;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
@@ -84,9 +84,7 @@ import org.apache.hadoop.hive.common.io.SortPrintStream;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.llap.LlapItUtils;
-import org.apache.hadoop.hive.llap.configuration.LlapDaemonConfiguration;
import org.apache.hadoop.hive.llap.daemon.MiniLlapCluster;
-import org.apache.hadoop.hive.llap.daemon.impl.LlapDaemon;
import org.apache.hadoop.hive.llap.io.api.LlapProxy;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.api.Index;
@@ -99,6 +97,7 @@ import org.apache.hadoop.hive.ql.exec.tez.TezSessionState;
import org.apache.hadoop.hive.ql.lockmgr.zookeeper.CuratorFrameworkSingleton;
import org.apache.hadoop.hive.ql.lockmgr.zookeeper.ZooKeeperHiveLockManager;
import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.InvalidTableException;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.parse.ASTNode;
@@ -139,6 +138,8 @@ public class QTestUtil {
// security property names
private static final String SECURITY_KEY_PROVIDER_URI_NAME = "dfs.encryption.key.provider.uri";
private static final String CRLF = System.getProperty("line.separator");
+ private static final String TEST_BUILD_DIR = System.getProperty("test.build.dir");
+ private static final String CACHED_DATA_DIR_NAME = "cachedData";
private static final Logger LOG = LoggerFactory.getLogger("QTestUtil");
private static final String QTEST_LEAVE_FILES = "QTEST_LEAVE_FILES";
@@ -183,6 +184,16 @@ public class QTestUtil {
private final String cleanupScript;
private boolean useHBaseMetastore = false;
+ // Parameters which help tracking cached data generation.
+ private final String driverName;
+ private Path cachedDataPath;
+ private String metaStorePathString;
+ private Path metaStorePath;
+ private FileSystem localFs;
+ private boolean attemptingCacheUsage;
+
+ private boolean dbEtcSetup = false;
+
public interface SuiteAddTestFunctor {
public void addTestToSuite(TestSuite suite, Object setup, String tName);
}
@@ -378,11 +389,34 @@ public class QTestUtil {
}
public QTestUtil(String outDir, String logDir, MiniClusterType clusterType,
+ String confDir, String hadoopVer, String initScript, String cleanupScript,
+ boolean useHBaseMetastore, boolean withLlapIo) throws Exception {
+ // For now, to avoid changing multiple test templates, a null driver name avoids
+ // data generation optimizations.
+ this(outDir, logDir, clusterType, confDir, hadoopVer, initScript, cleanupScript,
+ useHBaseMetastore, withLlapIo, null);
+ }
+
+ public QTestUtil(String outDir, String logDir, MiniClusterType clusterType,
String confDir, String hadoopVer, String initScript, String cleanupScript,
- boolean useHBaseMetastore, boolean withLlapIo)
+ boolean useHBaseMetastore, boolean withLlapIo, String driverName)
throws Exception {
+ this.attemptingCacheUsage = (StringUtils.isEmpty(TEST_BUILD_DIR) ||
+ StringUtils.isEmpty(driverName) || useHBaseMetastore) ? false : true;
+ this.driverName = driverName;
this.outDir = outDir;
this.logDir = logDir;
+ LOG.info("Creating QTestUtil with settings: "
+ + "driverName=" + driverName
+ + ", attemptingCacheUsage=" + attemptingCacheUsage
+ + ", test.build.dir=" + System.getProperty("test.build.dir")
+ + ", useHbaseMetaStore=" + useHBaseMetastore
+ + ", withLlapIo=" + withLlapIo
+ + ", confDir=" + confDir
+ + ", outDir=" + outDir
+ + ", logDir=" + logDir
+ + ", initScript=" + initScript
+ + ", cleanupScript=" + cleanupScript);
this.useHBaseMetastore = useHBaseMetastore;
if (confDir != null && !confDir.isEmpty()) {
@@ -471,6 +505,7 @@ public class QTestUtil {
if (scriptsDir == null) {
scriptsDir = new File(".").getAbsolutePath() + "/data/scripts";
}
+ LOG.info("Using DataDir=" + dataDir + ", ScriptsDir=" + scriptsDir);
this.initScript = scriptsDir + File.separator + initScript;
this.cleanupScript = scriptsDir + File.separator + cleanupScript;
@@ -832,6 +867,17 @@ public class QTestUtil {
return;
}
+ if (!attemptingCacheUsage) {
+ cleanupNonCacheUsage();
+ } else {
+ cleanupCacheUsage();
+ }
+
+ FunctionRegistry.unregisterTemporaryUDF("test_udaf");
+ FunctionRegistry.unregisterTemporaryUDF("test_error");
+ }
+
+ private void cleanupNonCacheUsage() throws Exception {
clearTablesCreatedDuringTests();
clearKeysCreatedInTests();
@@ -849,21 +895,42 @@ public class QTestUtil {
LOG.info("No cleanup script detected. Skipping.");
}
+ cleanupWarehouseDir();
+ }
+
+ private void cleanupCacheUsage() throws IOException {
+ // Remove the Warehouse and metastore directories completely.
+ // Also close the current db, since files are going to come in to replace it soon.
+ Preconditions.checkState(attemptingCacheUsage);
+ Preconditions.checkNotNull(metaStorePath);
+ Preconditions.checkNotNull(localFs);
+ Hive.closeCurrent();
+ cleanupMetastoreDir();
+ cleanupWarehouseDir();
+ }
+
+ private void cleanupWarehouseDir() throws IOException {
// delete any contents in the warehouse dir
Path p = new Path(testWarehouse);
FileSystem fs = p.getFileSystem(conf);
try {
- FileStatus [] ls = fs.listStatus(p);
- for (int i=0; (ls != null) && (i<ls.length); i++) {
+ FileStatus[] ls = fs.listStatus(p);
+ for (int i = 0; (ls != null) && (i < ls.length); i++) {
fs.delete(ls[i].getPath(), true);
}
} catch (FileNotFoundException e) {
// Best effort
}
+ }
- FunctionRegistry.unregisterTemporaryUDF("test_udaf");
- FunctionRegistry.unregisterTemporaryUDF("test_error");
+ private void cleanupMetastoreDir() throws IOException {
+ try {
+ LOG.info("Cleaning up metastore Dir: {}", metaStorePath);
+ localFs.delete(metaStorePath, true);
+ } catch (FileNotFoundException e) {
+ // Best effort
+ }
}
protected void runCreateTableCmd(String createTableCmd) throws Exception {
@@ -893,6 +960,10 @@ public class QTestUtil {
}
public void createSources(String tname) throws Exception {
+ createSources(tname, false);
+ }
+
+ public void createSources(String tname, boolean forceCreate) throws Exception {
boolean canReuseSession = (tname == null) || !qNoSessionReuseQuerySet.contains(tname);
if(!isSessionStateStarted) {
startSessionState(canReuseSession);
@@ -901,34 +972,173 @@ public class QTestUtil {
if(cliDriver == null) {
cliDriver = new CliDriver();
}
- cliDriver.processLine("set test.data.dir=" + testFiles + ";");
+
File scriptFile = new File(this.initScript);
if (!scriptFile.isFile()) {
LOG.info("No init script detected. Skipping");
+ if (attemptingCacheUsage) {
+ setupDbsEtc(true, true);
+ }
return;
}
- conf.setBoolean("hive.test.init.phase", true);
+ if (!attemptingCacheUsage || forceCreate) {
+ LOG.info("Creating sources without data caching. attemptingCacheUsage={}, forceCreate={}",
+ attemptingCacheUsage, forceCreate);
+ cliDriver.processLine("set test.data.dir=" + testFiles + ";");
+ conf.setBoolean("hive.test.init.phase", true);
+ createSourcesNonCached(scriptFile);
+ } else {
+ LOG.info("Creating sources with data caching");
+ createSourcesCached(scriptFile);
+ }
+
+ conf.setBoolean("hive.test.init.phase", false);
+ }
+
+ private void createSourcesNonCached(File scriptFile) throws IOException {
String initCommands = readEntireFileIntoString(scriptFile);
LOG.info("Initial setup (" + initScript + "):\n" + initCommands);
cliDriver.processLine(initCommands);
+ }
- conf.setBoolean("hive.test.init.phase", false);
+ private void createSourcesCached(File scriptFile) throws IOException, HiveException {
+
+ // First check if the cache already exists. If it does just copy it over.
+ Path cachedWarehousePath = new Path(cachedDataPath, "warehouse");
+ Path cachedMetaStorePtah = new Path(cachedDataPath, "metastore");
+ if (localFs.exists(cachedDataPath)) {
+ if (localFs.exists(cachedWarehousePath) && localFs.exists(cachedMetaStorePtah)) {
+ LOG.info("Cached data found in {}. Attempting to use it", cachedDataPath);
+ // Data is alredy cached
+ // Copy the files over to where they should be
+ Path warehousePath = new Path(testWarehouse);
+ FileSystem warehouseFs = warehousePath.getFileSystem(conf);
+ try {
+ warehouseFs.delete(warehousePath, false);
+ } catch (FileNotFoundException e) {
+ // Does not matter if it does not exist.
+ }
+ warehouseFs.copyFromLocalFile(false, cachedWarehousePath, warehousePath);
+
+ try {
+ localFs.delete(metaStorePath, false);
+ } catch (IOException e) {
+ // Does not matter if it does not exist.
+ }
+ localFs.copyFromLocalFile(false, cachedMetaStorePtah, metaStorePath);
+ setupDbsEtc(true, false);
+ cliDriver.processLine("set test.data.dir=" + testFiles + ";");
+ conf.setBoolean("hive.test.init.phase", true);
+
+ return;
+ } else {
+ // Something is missing. Cleanup. Re-generate and cache
+ LOG.info("Partial or no cached data found at {}. Cache will be created", cachedDataPath);
+ localFs.delete(cachedDataPath, true);
+ }
+ } else {
+ LOG.info("No cached data found at {}. Cache will be created", cachedDataPath);
+ // No caching. Re-generate the data and cache it.
+ }
+
+ // Generate and cache the data
+ setupDbsEtc(true, true);
+ cliDriver.processLine("set test.data.dir=" + testFiles + ";");
+ conf.setBoolean("hive.test.init.phase", true);
+ createSourcesNonCached(scriptFile);
+
+ // Close the DB so that contents can be copied out safely.
+ Hive.closeCurrent();
+
+ // Cache the sources
+ localFs.mkdirs(cachedDataPath);
+
+ Path warehousePath = new Path(testWarehouse);
+ FileSystem warehouseFs = warehousePath.getFileSystem(conf);
+
+ warehouseFs.copyToLocalFile(false, warehousePath, cachedWarehousePath, true);
+ localFs.copyToLocalFile(false, metaStorePath, cachedMetaStorePtah, true);
+
+ // Re-open the DB etc.
+ setupDbsEtc(true, false);
}
- public void init() throws Exception {
+ private static final Pattern metaStoreUriPattern =
+ Pattern.compile("derby.*?databaseName=(.*?)(;|$)");
+ private String getDerbyDbPath(String jdbcConnectString) {
+ if (StringUtils.isEmpty(jdbcConnectString)) {
+ return null;
+ }
+ Matcher matcher = metaStoreUriPattern.matcher(jdbcConnectString);
+ if (matcher.find()) {
+ return matcher.group(1);
+ } else {
+ return null;
+ }
+ }
+
+ public void init() throws Exception {
+ LOG.info("init");
testWarehouse = conf.getVar(HiveConf.ConfVars.METASTOREWAREHOUSE);
+ LOG.info("TestWarehouseDir set to: [{}]", testWarehouse);
+ if (attemptingCacheUsage) {
+ // The derby path comes from METASTORECONNECTURLKEY. Default ends up being target/junit_metastore_db
+ String metaStoreConnectUrl = conf.getVar(ConfVars.METASTORECONNECTURLKEY);
+ LOG.info("MetastoreConnectUrl: " + metaStoreConnectUrl);
+ metaStorePathString = getDerbyDbPath(metaStoreConnectUrl);
+
+ if (metaStorePathString == null) {
+ LOG.warn(
+ "Disabling attempted cache usage since metastore path cannot be determined from {}",
+ metaStoreConnectUrl);
+ attemptingCacheUsage = false;
+ } else {
+ LOG.info("Metastore url path: " + metaStorePathString);
+ metaStorePath = new Path(metaStorePathString);
+ if (metaStorePath.isAbsolute() && metaStorePathString.split(File.separator).length >= 3) {
+ // Turn this on only if the path is absolute, and is at least 3 deep - since we'll be deleting files later.
+ localFs = FileSystem.getLocal(conf).getRaw();
+ assert(TEST_BUILD_DIR != null);
+ cachedDataPath = new Path(TEST_BUILD_DIR, CACHED_DATA_DIR_NAME);
+ cachedDataPath = new Path(cachedDataPath, driverName);
+ LOG.info("Using cachedDataPath: " + cachedDataPath);
+ } else {
+ LOG.warn(
+ "Disableing attempted cache usage since metastore path may not be absolute, or depth is < 3. MetaStorePath={}",
+ metaStorePathString);
+ metaStorePath = null;
+ attemptingCacheUsage = false;
+ }
+
+ }
+ }
String execEngine = conf.get("hive.execution.engine");
conf.set("hive.execution.engine", "mr");
SessionState.start(conf);
conf.set("hive.execution.engine", execEngine);
- db = Hive.get(conf);
- drv = new Driver(conf);
- drv.init();
- pd = new ParseDriver();
- sem = new SemanticAnalyzer(conf);
+
+ if (!attemptingCacheUsage) {
+ setupDbsEtc(true, true);
+ }
+ }
+
+ private void setupDbsEtc(boolean force, boolean isNewDb) throws HiveException {
+ if (!dbEtcSetup || force) {
+ if (isNewDb) {
+ db = Hive.get(conf);
+ } else {
+ db = Hive.getWithFastCheck(conf, false);
+ }
+ LOG.info("Obtained db");
+ drv = new Driver(conf);
+ drv.init();
+ pd = new ParseDriver();
+ sem = new SemanticAnalyzer(conf);
+ dbEtcSetup = true;
+ }
}
public void init(String tname) throws Exception {
@@ -944,8 +1154,9 @@ public class QTestUtil {
public String cliInit(String tname, boolean recreate) throws Exception {
if (recreate) {
cleanUp(tname);
- createSources(tname);
+ createSources(tname, true);
}
+ setupDbsEtc(false, true);
HiveConf.setVar(conf, HiveConf.ConfVars.HIVE_AUTHENTICATOR_MANAGER,
"org.apache.hadoop.hive.ql.security.DummyAuthenticator");
http://git-wip-us.apache.org/repos/asf/hive/blob/976e628f/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 77cfaeb..08ef998 100644
--- a/pom.xml
+++ b/pom.xml
@@ -75,6 +75,7 @@
<test.hive.hadoop.classpath>${maven.test.classpath}</test.hive.hadoop.classpath>
<test.log4j.scheme>file://</test.log4j.scheme>
<test.tmp.dir>${project.build.directory}/tmp</test.tmp.dir>
+ <test.build.dir>${project.build.directory}</test.build.dir>
<test.tmp.dir.uri>file://${test.tmp.dir}</test.tmp.dir.uri>
<test.warehouse.dir>${project.build.directory}/warehouse</test.warehouse.dir>
<test.warehouse.scheme>pfile://</test.warehouse.scheme>
@@ -1026,6 +1027,7 @@
<test.data.dir>${basedir}/${hive.path.to.root}/data/files</test.data.dir>
<test.tmp.dir>${test.tmp.dir}</test.tmp.dir>
<test.tmp.dir.uri>${test.tmp.dir.uri}</test.tmp.dir.uri>
+ <test.build.dir>${test.build.dir}</test.build.dir>
<test.dfs.mkdir>${test.dfs.mkdir}</test.dfs.mkdir>
<test.output.overwrite>${test.output.overwrite}</test.output.overwrite>
<test.warehouse.dir>${test.warehouse.scheme}${test.warehouse.dir}</test.warehouse.dir>
http://git-wip-us.apache.org/repos/asf/hive/blob/976e628f/ql/src/test/templates/TestCliDriver.vm
----------------------------------------------------------------------
diff --git a/ql/src/test/templates/TestCliDriver.vm b/ql/src/test/templates/TestCliDriver.vm
index 72cfab9..1961c75 100644
--- a/ql/src/test/templates/TestCliDriver.vm
+++ b/ql/src/test/templates/TestCliDriver.vm
@@ -17,23 +17,34 @@
*/
package org.apache.hadoop.hive.cli;
+import com.google.common.base.Stopwatch;
import org.apache.hadoop.hive.ql.QTestUtil;
import org.apache.hadoop.hive.ql.QTestUtil.MiniClusterType;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
+import java.util.concurrent.TimeUnit;
+
public class $className {
+ private static final Logger LOG = LoggerFactory.getLogger(${className}.class);
+
private static final String HIVE_ROOT = QTestUtil.ensurePathEndsInSlash(System.getProperty("hive.root"));
private static QTestUtil qt;
static {
+ Stopwatch stopwatch = new Stopwatch().start();
+ String message = "Starting TestCliDriver run at " + System.currentTimeMillis();
+ LOG.info(message);
+ System.err.println(message);
MiniClusterType miniMR = MiniClusterType.valueForString("$clusterMode");
String hiveConfDir = "$hiveConfDir";
String initScript = "$initScript";
@@ -44,15 +55,29 @@ public class $className {
if (!hiveConfDir.isEmpty()) {
hiveConfDir = HIVE_ROOT + hiveConfDir;
}
+ // TODO Is ZK startup required for TestCliDriver
+ // TODO Is LlapIo enabled required for TestCliDriver
qt = new QTestUtil((HIVE_ROOT + "$resultsDir"), (HIVE_ROOT + "$logDir"), miniMR,
- hiveConfDir, hadoopVer, initScript, cleanupScript, useHBaseMetastore, true);
+ hiveConfDir, hadoopVer, initScript, cleanupScript, useHBaseMetastore, true, "$className");
+ message = "QTestUtil instance created. ElapsedTimeSinceStart=" + stopwatch.elapsed(
+ TimeUnit.MILLISECONDS);
+ LOG.info(message);
+ System.err.println(message);
// do a one time initialization
qt.cleanUp();
+ message = "Initialization cleanup done. ElapsedTimeSinceStart=" + stopwatch.elapsed(TimeUnit.MILLISECONDS);
+ LOG.info(message);
+ System.err.println(message);
+
qt.createSources();
+ message = "Initialization createSources done. ElapsedTimeSinceStart=" + stopwatch.elapsed(TimeUnit.MILLISECONDS);
+ LOG.info(message);
+ System.err.println(message);
} catch (Exception e) {
- System.err.println("Exception: " + e.getMessage());
+ System.err.println("Exception: " + e.getMessage() + ". ElapsedTimeSinceStart="
+ + stopwatch.elapsed(TimeUnit.MILLISECONDS));
e.printStackTrace();
System.err.flush();
fail("Unexpected exception in static initialization: "+e.getMessage());
@@ -62,6 +87,7 @@ public class $className {
@Before
public void setUp() {
try {
+ // TODO This restarts ZK for each test. Is that requried ?
qt.clearTestSideEffects();
} catch (Exception e) {
System.err.println("Exception: " + e.getMessage());
@@ -113,7 +139,9 @@ public class $className {
private void runTest(String tname, String fname, String fpath) throws Exception {
long startTime = System.currentTimeMillis();
try {
- System.err.println("Begin query: " + fname);
+ String message = "Begin query: " + fname + ", startTime=" + startTime;
+ System.err.println(message);
+ LOG.info(message);
qt.addFile(fpath);
@@ -136,7 +164,9 @@ public class $className {
}
long elapsedTime = System.currentTimeMillis() - startTime;
- System.err.println("Done query: " + fname + " elapsedTime=" + elapsedTime/1000 + "s");
+ String message = "Done query: " + fname + " elapsedTime=" + elapsedTime/1000 + "s";
+ System.err.println(message);
+ LOG.info(message);
assertTrue("Test passed", true);
}
}