You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jd...@apache.org on 2016/04/20 00:25:34 UTC

[42/58] [abbrv] hive git commit: HIVE-13496. Create initial test data once across multiple test runs - TestCliDriver. (Siddharth Seth, reviewed by Ashutosh Chauhan)

HIVE-13496. Create initial test data once across multiple test runs - TestCliDriver. (Siddharth Seth, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/976e628f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/976e628f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/976e628f

Branch: refs/heads/llap
Commit: 976e628fc01911936caa19e61ea3342f3a19455a
Parents: 0dd4621
Author: Siddharth Seth <ss...@apache.org>
Authored: Thu Apr 14 10:25:53 2016 -0700
Committer: Siddharth Seth <ss...@apache.org>
Committed: Thu Apr 14 10:25:53 2016 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/ql/QTestUtil.java    | 247 +++++++++++++++++--
 pom.xml                                         |   2 +
 ql/src/test/templates/TestCliDriver.vm          |  38 ++-
 3 files changed, 265 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/976e628f/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
----------------------------------------------------------------------
diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
index 2f109ab..79646cd 100644
--- a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
+++ b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
@@ -51,7 +51,6 @@ import java.util.Collection;
 import java.util.Comparator;
 import java.util.Deque;
 import java.util.HashSet;
-import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
@@ -63,6 +62,7 @@ import java.util.concurrent.TimeUnit;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import com.google.common.base.Preconditions;
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang.StringUtils;
@@ -84,9 +84,7 @@ import org.apache.hadoop.hive.common.io.SortPrintStream;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.llap.LlapItUtils;
-import org.apache.hadoop.hive.llap.configuration.LlapDaemonConfiguration;
 import org.apache.hadoop.hive.llap.daemon.MiniLlapCluster;
-import org.apache.hadoop.hive.llap.daemon.impl.LlapDaemon;
 import org.apache.hadoop.hive.llap.io.api.LlapProxy;
 import org.apache.hadoop.hive.metastore.MetaStoreUtils;
 import org.apache.hadoop.hive.metastore.api.Index;
@@ -99,6 +97,7 @@ import org.apache.hadoop.hive.ql.exec.tez.TezSessionState;
 import org.apache.hadoop.hive.ql.lockmgr.zookeeper.CuratorFrameworkSingleton;
 import org.apache.hadoop.hive.ql.lockmgr.zookeeper.ZooKeeperHiveLockManager;
 import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.InvalidTableException;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.parse.ASTNode;
@@ -139,6 +138,8 @@ public class QTestUtil {
   // security property names
   private static final String SECURITY_KEY_PROVIDER_URI_NAME = "dfs.encryption.key.provider.uri";
   private static final String CRLF = System.getProperty("line.separator");
+  private static final String TEST_BUILD_DIR = System.getProperty("test.build.dir");
+  private static final String CACHED_DATA_DIR_NAME = "cachedData";
 
   private static final Logger LOG = LoggerFactory.getLogger("QTestUtil");
   private static final String QTEST_LEAVE_FILES = "QTEST_LEAVE_FILES";
@@ -183,6 +184,16 @@ public class QTestUtil {
   private final String cleanupScript;
   private boolean useHBaseMetastore = false;
 
+  // Parameters which help tracking cached data generation.
+  private final String driverName;
+  private Path cachedDataPath;
+  private String metaStorePathString;
+  private Path metaStorePath;
+  private FileSystem localFs;
+  private boolean attemptingCacheUsage;
+
+  private boolean dbEtcSetup = false;
+
   public interface SuiteAddTestFunctor {
     public void addTestToSuite(TestSuite suite, Object setup, String tName);
   }
@@ -378,11 +389,34 @@ public class QTestUtil {
   }
 
   public QTestUtil(String outDir, String logDir, MiniClusterType clusterType,
+                   String confDir, String hadoopVer, String initScript, String cleanupScript,
+                   boolean useHBaseMetastore, boolean withLlapIo) throws Exception {
+    // For now, to avoid changing multiple test templates, a null driver name avoids
+    // data generation optimizations.
+    this(outDir, logDir, clusterType, confDir, hadoopVer, initScript, cleanupScript,
+        useHBaseMetastore, withLlapIo, null);
+  }
+
+  public QTestUtil(String outDir, String logDir, MiniClusterType clusterType,
       String confDir, String hadoopVer, String initScript, String cleanupScript,
-      boolean useHBaseMetastore, boolean withLlapIo)
+      boolean useHBaseMetastore, boolean withLlapIo, String driverName)
     throws Exception {
+    this.attemptingCacheUsage = (StringUtils.isEmpty(TEST_BUILD_DIR) ||
+        StringUtils.isEmpty(driverName) || useHBaseMetastore) ? false : true;
+    this.driverName = driverName;
     this.outDir = outDir;
     this.logDir = logDir;
+    LOG.info("Creating QTestUtil with settings: "
+        + "driverName=" + driverName
+        + ", attemptingCacheUsage=" + attemptingCacheUsage
+        + ", test.build.dir=" + System.getProperty("test.build.dir")
+        + ", useHbaseMetaStore=" + useHBaseMetastore
+        + ", withLlapIo=" + withLlapIo
+        + ", confDir=" + confDir
+        + ", outDir=" + outDir
+        + ", logDir=" + logDir
+        + ", initScript=" + initScript
+        + ", cleanupScript=" + cleanupScript);
     this.useHBaseMetastore = useHBaseMetastore;
 
     if (confDir != null && !confDir.isEmpty()) {
@@ -471,6 +505,7 @@ public class QTestUtil {
     if (scriptsDir == null) {
       scriptsDir = new File(".").getAbsolutePath() + "/data/scripts";
     }
+    LOG.info("Using DataDir=" + dataDir + ", ScriptsDir=" + scriptsDir);
 
     this.initScript = scriptsDir + File.separator + initScript;
     this.cleanupScript = scriptsDir + File.separator + cleanupScript;
@@ -832,6 +867,17 @@ public class QTestUtil {
       return;
     }
 
+    if (!attemptingCacheUsage) {
+      cleanupNonCacheUsage();
+    } else {
+      cleanupCacheUsage();
+    }
+
+    FunctionRegistry.unregisterTemporaryUDF("test_udaf");
+    FunctionRegistry.unregisterTemporaryUDF("test_error");
+  }
+
+  private void cleanupNonCacheUsage() throws Exception {
     clearTablesCreatedDuringTests();
     clearKeysCreatedInTests();
 
@@ -849,21 +895,42 @@ public class QTestUtil {
       LOG.info("No cleanup script detected. Skipping.");
     }
 
+    cleanupWarehouseDir();
+  }
+
+  private void cleanupCacheUsage() throws IOException {
+    // Remove the Warehouse and metastore directories completely.
+    // Also close the current db, since files are going to come in to replace it soon.
+    Preconditions.checkState(attemptingCacheUsage);
+    Preconditions.checkNotNull(metaStorePath);
+    Preconditions.checkNotNull(localFs);
+    Hive.closeCurrent();
+    cleanupMetastoreDir();
+    cleanupWarehouseDir();
+  }
+
+  private void cleanupWarehouseDir() throws IOException {
     // delete any contents in the warehouse dir
     Path p = new Path(testWarehouse);
     FileSystem fs = p.getFileSystem(conf);
 
     try {
-      FileStatus [] ls = fs.listStatus(p);
-      for (int i=0; (ls != null) && (i<ls.length); i++) {
+      FileStatus[] ls = fs.listStatus(p);
+      for (int i = 0; (ls != null) && (i < ls.length); i++) {
         fs.delete(ls[i].getPath(), true);
       }
     } catch (FileNotFoundException e) {
       // Best effort
     }
+  }
 
-    FunctionRegistry.unregisterTemporaryUDF("test_udaf");
-    FunctionRegistry.unregisterTemporaryUDF("test_error");
+  private void cleanupMetastoreDir() throws IOException {
+    try {
+      LOG.info("Cleaning up metastore Dir: {}", metaStorePath);
+      localFs.delete(metaStorePath, true);
+    } catch (FileNotFoundException e) {
+      // Best effort
+    }
   }
 
   protected void runCreateTableCmd(String createTableCmd) throws Exception {
@@ -893,6 +960,10 @@ public class QTestUtil {
   }
 
   public void createSources(String tname) throws Exception {
+    createSources(tname, false);
+  }
+
+  public void createSources(String tname, boolean forceCreate) throws Exception {
     boolean canReuseSession = (tname == null) || !qNoSessionReuseQuerySet.contains(tname);
     if(!isSessionStateStarted) {
       startSessionState(canReuseSession);
@@ -901,34 +972,173 @@ public class QTestUtil {
     if(cliDriver == null) {
       cliDriver = new CliDriver();
     }
-    cliDriver.processLine("set test.data.dir=" + testFiles + ";");
+
     File scriptFile = new File(this.initScript);
     if (!scriptFile.isFile()) {
       LOG.info("No init script detected. Skipping");
+      if (attemptingCacheUsage) {
+        setupDbsEtc(true, true);
+      }
       return;
     }
-    conf.setBoolean("hive.test.init.phase", true);
 
+    if (!attemptingCacheUsage || forceCreate) {
+      LOG.info("Creating sources without data caching. attemptingCacheUsage={}, forceCreate={}",
+          attemptingCacheUsage, forceCreate);
+      cliDriver.processLine("set test.data.dir=" + testFiles + ";");
+      conf.setBoolean("hive.test.init.phase", true);
+      createSourcesNonCached(scriptFile);
+    } else {
+      LOG.info("Creating sources with data caching");
+      createSourcesCached(scriptFile);
+    }
+
+    conf.setBoolean("hive.test.init.phase", false);
+  }
+
+  private void createSourcesNonCached(File scriptFile) throws IOException {
     String initCommands = readEntireFileIntoString(scriptFile);
     LOG.info("Initial setup (" + initScript + "):\n" + initCommands);
 
     cliDriver.processLine(initCommands);
+  }
 
-    conf.setBoolean("hive.test.init.phase", false);
+  private void createSourcesCached(File scriptFile) throws IOException, HiveException {
+
+    // First check if the cache already exists. If it does just copy it over.
+    Path cachedWarehousePath = new Path(cachedDataPath, "warehouse");
+    Path cachedMetaStorePtah = new Path(cachedDataPath, "metastore");
+    if (localFs.exists(cachedDataPath)) {
+      if (localFs.exists(cachedWarehousePath) && localFs.exists(cachedMetaStorePtah)) {
+        LOG.info("Cached data found in {}. Attempting to use it", cachedDataPath);
+        // Data is alredy cached
+        // Copy the files over to where they should be
+        Path warehousePath = new Path(testWarehouse);
+        FileSystem warehouseFs = warehousePath.getFileSystem(conf);
+        try {
+          warehouseFs.delete(warehousePath, false);
+        } catch (FileNotFoundException e) {
+          // Does not matter if it does not exist.
+        }
+        warehouseFs.copyFromLocalFile(false, cachedWarehousePath, warehousePath);
+
+        try {
+          localFs.delete(metaStorePath, false);
+        } catch (IOException e) {
+          // Does not matter if it does not exist.
+        }
+        localFs.copyFromLocalFile(false, cachedMetaStorePtah, metaStorePath);
+        setupDbsEtc(true, false);
+        cliDriver.processLine("set test.data.dir=" + testFiles + ";");
+        conf.setBoolean("hive.test.init.phase", true);
+
+        return;
+      } else {
+        // Something is missing. Cleanup. Re-generate and cache
+        LOG.info("Partial or no cached data found at {}. Cache will be created", cachedDataPath);
+        localFs.delete(cachedDataPath, true);
+      }
+    } else {
+      LOG.info("No cached data found at {}. Cache will be created", cachedDataPath);
+      // No caching. Re-generate the data and cache it.
+    }
+
+    // Generate and cache the data
+    setupDbsEtc(true, true);
+    cliDriver.processLine("set test.data.dir=" + testFiles + ";");
+    conf.setBoolean("hive.test.init.phase", true);
+    createSourcesNonCached(scriptFile);
+
+    // Close the DB so that contents can be copied out safely.
+    Hive.closeCurrent();
+
+    // Cache the sources
+    localFs.mkdirs(cachedDataPath);
+
+    Path warehousePath = new Path(testWarehouse);
+    FileSystem warehouseFs = warehousePath.getFileSystem(conf);
+
+    warehouseFs.copyToLocalFile(false, warehousePath, cachedWarehousePath, true);
+    localFs.copyToLocalFile(false, metaStorePath, cachedMetaStorePtah, true);
+
+    // Re-open the DB etc.
+    setupDbsEtc(true, false);
   }
 
-  public void init() throws Exception {
+  private static final Pattern metaStoreUriPattern =
+      Pattern.compile("derby.*?databaseName=(.*?)(;|$)");
 
+  private String getDerbyDbPath(String jdbcConnectString) {
+    if (StringUtils.isEmpty(jdbcConnectString)) {
+      return null;
+    }
+    Matcher matcher = metaStoreUriPattern.matcher(jdbcConnectString);
+    if (matcher.find()) {
+      return matcher.group(1);
+    } else {
+      return null;
+    }
+  }
+
+  public void init() throws Exception {
+    LOG.info("init");
     testWarehouse = conf.getVar(HiveConf.ConfVars.METASTOREWAREHOUSE);
+    LOG.info("TestWarehouseDir set to: [{}]", testWarehouse);
+    if (attemptingCacheUsage) {
+      // The derby path comes from METASTORECONNECTURLKEY. Default ends up being target/junit_metastore_db
+      String metaStoreConnectUrl = conf.getVar(ConfVars.METASTORECONNECTURLKEY);
+      LOG.info("MetastoreConnectUrl: " + metaStoreConnectUrl);
+      metaStorePathString = getDerbyDbPath(metaStoreConnectUrl);
+
+      if (metaStorePathString == null) {
+        LOG.warn(
+            "Disabling attempted cache usage since metastore path cannot be determined from {}",
+            metaStoreConnectUrl);
+        attemptingCacheUsage = false;
+      } else {
+        LOG.info("Metastore url path: " + metaStorePathString);
+        metaStorePath = new Path(metaStorePathString);
+        if (metaStorePath.isAbsolute() && metaStorePathString.split(File.separator).length >= 3) {
+          // Turn this on only if the path is absolute, and is at least 3 deep - since we'll be deleting files later.
+          localFs = FileSystem.getLocal(conf).getRaw();
+          assert(TEST_BUILD_DIR != null);
+          cachedDataPath = new Path(TEST_BUILD_DIR, CACHED_DATA_DIR_NAME);
+          cachedDataPath = new Path(cachedDataPath, driverName);
+          LOG.info("Using cachedDataPath: " + cachedDataPath);
+        } else {
+          LOG.warn(
+              "Disableing attempted cache usage since metastore path may not be absolute, or depth is < 3. MetaStorePath={}",
+              metaStorePathString);
+          metaStorePath = null;
+          attemptingCacheUsage = false;
+        }
+
+      }
+    }
     String execEngine = conf.get("hive.execution.engine");
     conf.set("hive.execution.engine", "mr");
     SessionState.start(conf);
     conf.set("hive.execution.engine", execEngine);
-    db = Hive.get(conf);
-    drv = new Driver(conf);
-    drv.init();
-    pd = new ParseDriver();
-    sem = new SemanticAnalyzer(conf);
+
+    if (!attemptingCacheUsage) {
+      setupDbsEtc(true, true);
+    }
+  }
+
+  private void setupDbsEtc(boolean force, boolean isNewDb) throws HiveException {
+    if (!dbEtcSetup || force) {
+      if (isNewDb) {
+        db = Hive.get(conf);
+      } else {
+        db = Hive.getWithFastCheck(conf, false);
+      }
+      LOG.info("Obtained db");
+      drv = new Driver(conf);
+      drv.init();
+      pd = new ParseDriver();
+      sem = new SemanticAnalyzer(conf);
+      dbEtcSetup = true;
+    }
   }
 
   public void init(String tname) throws Exception {
@@ -944,8 +1154,9 @@ public class QTestUtil {
   public String cliInit(String tname, boolean recreate) throws Exception {
     if (recreate) {
       cleanUp(tname);
-      createSources(tname);
+      createSources(tname, true);
     }
+    setupDbsEtc(false, true);
 
     HiveConf.setVar(conf, HiveConf.ConfVars.HIVE_AUTHENTICATOR_MANAGER,
     "org.apache.hadoop.hive.ql.security.DummyAuthenticator");

http://git-wip-us.apache.org/repos/asf/hive/blob/976e628f/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 77cfaeb..08ef998 100644
--- a/pom.xml
+++ b/pom.xml
@@ -75,6 +75,7 @@
     <test.hive.hadoop.classpath>${maven.test.classpath}</test.hive.hadoop.classpath>
     <test.log4j.scheme>file://</test.log4j.scheme>
     <test.tmp.dir>${project.build.directory}/tmp</test.tmp.dir>
+    <test.build.dir>${project.build.directory}</test.build.dir>
     <test.tmp.dir.uri>file://${test.tmp.dir}</test.tmp.dir.uri>
     <test.warehouse.dir>${project.build.directory}/warehouse</test.warehouse.dir>
     <test.warehouse.scheme>pfile://</test.warehouse.scheme>
@@ -1026,6 +1027,7 @@
             <test.data.dir>${basedir}/${hive.path.to.root}/data/files</test.data.dir>
             <test.tmp.dir>${test.tmp.dir}</test.tmp.dir>
             <test.tmp.dir.uri>${test.tmp.dir.uri}</test.tmp.dir.uri>
+            <test.build.dir>${test.build.dir}</test.build.dir>
             <test.dfs.mkdir>${test.dfs.mkdir}</test.dfs.mkdir>
             <test.output.overwrite>${test.output.overwrite}</test.output.overwrite>
             <test.warehouse.dir>${test.warehouse.scheme}${test.warehouse.dir}</test.warehouse.dir>

http://git-wip-us.apache.org/repos/asf/hive/blob/976e628f/ql/src/test/templates/TestCliDriver.vm
----------------------------------------------------------------------
diff --git a/ql/src/test/templates/TestCliDriver.vm b/ql/src/test/templates/TestCliDriver.vm
index 72cfab9..1961c75 100644
--- a/ql/src/test/templates/TestCliDriver.vm
+++ b/ql/src/test/templates/TestCliDriver.vm
@@ -17,23 +17,34 @@
  */
 package org.apache.hadoop.hive.cli;
 
+import com.google.common.base.Stopwatch;
 import org.apache.hadoop.hive.ql.QTestUtil;
 import org.apache.hadoop.hive.ql.QTestUtil.MiniClusterType;
 import org.junit.After;
 import org.junit.AfterClass;
 import org.junit.Before;
 import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
+import java.util.concurrent.TimeUnit;
+
 public class $className {
 
+  private static final Logger LOG = LoggerFactory.getLogger(${className}.class);
+
   private static final String HIVE_ROOT = QTestUtil.ensurePathEndsInSlash(System.getProperty("hive.root"));
   private static QTestUtil qt;
 
   static {
 
+    Stopwatch stopwatch = new Stopwatch().start();
+    String message = "Starting TestCliDriver run at " + System.currentTimeMillis();
+    LOG.info(message);
+    System.err.println(message);
     MiniClusterType miniMR = MiniClusterType.valueForString("$clusterMode");
     String hiveConfDir = "$hiveConfDir";
     String initScript = "$initScript";
@@ -44,15 +55,29 @@ public class $className {
       if (!hiveConfDir.isEmpty()) {
         hiveConfDir = HIVE_ROOT + hiveConfDir;
       }
+      // TODO Is ZK startup required for TestCliDriver
+      // TODO Is LlapIo enabled required for TestCliDriver
       qt = new QTestUtil((HIVE_ROOT + "$resultsDir"), (HIVE_ROOT + "$logDir"), miniMR,
-      hiveConfDir, hadoopVer, initScript, cleanupScript, useHBaseMetastore, true);
+      hiveConfDir, hadoopVer, initScript, cleanupScript, useHBaseMetastore, true, "$className");
+      message = "QTestUtil instance created. ElapsedTimeSinceStart=" + stopwatch.elapsed(
+          TimeUnit.MILLISECONDS);
+      LOG.info(message);
+      System.err.println(message);
 
       // do a one time initialization
       qt.cleanUp();
+      message = "Initialization cleanup done. ElapsedTimeSinceStart=" + stopwatch.elapsed(TimeUnit.MILLISECONDS);
+      LOG.info(message);
+      System.err.println(message);
+
       qt.createSources();
+      message = "Initialization createSources done. ElapsedTimeSinceStart=" + stopwatch.elapsed(TimeUnit.MILLISECONDS);
+      LOG.info(message);
+      System.err.println(message);
 
     } catch (Exception e) {
-      System.err.println("Exception: " + e.getMessage());
+      System.err.println("Exception: " + e.getMessage() + ". ElapsedTimeSinceStart="
+          + stopwatch.elapsed(TimeUnit.MILLISECONDS));
       e.printStackTrace();
       System.err.flush();
       fail("Unexpected exception in static initialization: "+e.getMessage());
@@ -62,6 +87,7 @@ public class $className {
   @Before
   public void setUp() {
     try {
+      // TODO This restarts ZK for each test. Is that requried ?
       qt.clearTestSideEffects();
     } catch (Exception e) {
       System.err.println("Exception: " + e.getMessage());
@@ -113,7 +139,9 @@ public class $className {
   private void runTest(String tname, String fname, String fpath) throws Exception {
     long startTime = System.currentTimeMillis();
     try {
-      System.err.println("Begin query: " + fname);
+      String message = "Begin query: " + fname + ", startTime=" + startTime;
+      System.err.println(message);
+      LOG.info(message);
 
       qt.addFile(fpath);
 
@@ -136,7 +164,9 @@ public class $className {
     }
 
     long elapsedTime = System.currentTimeMillis() - startTime;
-    System.err.println("Done query: " + fname + " elapsedTime=" + elapsedTime/1000 + "s");
+    String message = "Done query: " + fname + " elapsedTime=" + elapsedTime/1000 + "s";
+    System.err.println(message);
+    LOG.info(message);
     assertTrue("Test passed", true);
   }
 }