You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2014/09/18 04:17:11 UTC

svn commit: r1625879 - in /hive/branches/cbo: ./ common/src/java/org/apache/hadoop/hive/conf/ itests/hive-unit/src/main/java/org/apache/hive/jdbc/miniHS2/ itests/hive-unit/src/test/java/org/apache/hive/jdbc/ packaging/ ql/src/java/org/apache/hadoop/hiv...

Author: gunther
Date: Thu Sep 18 02:17:11 2014
New Revision: 1625879

URL: http://svn.apache.org/r1625879
Log:
Merge latest trunk into cbo branch. (Gunther Hagleitner)

Added:
    hive/branches/cbo/ql/src/test/queries/clientpositive/acid_vectorization.q
      - copied unchanged from r1625875, hive/trunk/ql/src/test/queries/clientpositive/acid_vectorization.q
    hive/branches/cbo/ql/src/test/results/clientpositive/acid_vectorization.q.out
      - copied unchanged from r1625875, hive/trunk/ql/src/test/results/clientpositive/acid_vectorization.q.out
Modified:
    hive/branches/cbo/   (props changed)
    hive/branches/cbo/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    hive/branches/cbo/itests/hive-unit/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java
    hive/branches/cbo/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
    hive/branches/cbo/packaging/pom.xml
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java

Propchange: hive/branches/cbo/
------------------------------------------------------------------------------
  Merged /hive/trunk:r1625438-1625875

Modified: hive/branches/cbo/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1625879&r1=1625878&r2=1625879&view=diff
==============================================================================
--- hive/branches/cbo/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/branches/cbo/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Thu Sep 18 02:17:11 2014
@@ -206,7 +206,7 @@ public class HiveConf extends Configurat
         "Query plan format serialization between client and task nodes. \n" +
         "Two supported values are : kryo and javaXML. Kryo is default."),
     SCRATCHDIR("hive.exec.scratchdir", "/tmp/hive",
-        "HDFS root scratch dir for Hive jobs which gets created with 777 permission. " +
+        "HDFS root scratch dir for Hive jobs which gets created with write all (733) permission. " +
         "For each connecting user, an HDFS scratch dir: ${hive.exec.scratchdir}/<username> is created, " +
         "with ${hive.scratch.dir.permission}."),
     LOCALSCRATCHDIR("hive.exec.local.scratchdir",

Modified: hive/branches/cbo/itests/hive-unit/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/itests/hive-unit/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java?rev=1625879&r1=1625878&r2=1625879&view=diff
==============================================================================
--- hive/branches/cbo/itests/hive-unit/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java (original)
+++ hive/branches/cbo/itests/hive-unit/src/main/java/org/apache/hive/jdbc/miniHS2/MiniHS2.java Thu Sep 18 02:17:11 2014
@@ -32,6 +32,7 @@ import org.apache.hadoop.fs.permission.F
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.metastore.MetaStoreUtils;
+import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.shims.HadoopShims.MiniDFSShim;
 import org.apache.hadoop.hive.shims.HadoopShims.MiniMrShim;
 import org.apache.hadoop.hive.shims.ShimLoader;
@@ -50,6 +51,7 @@ public class MiniHS2 extends AbstractHiv
   public static final String HS2_HTTP_MODE = "http";
   private static final String driverName = "org.apache.hive.jdbc.HiveDriver";
   private static final FsPermission FULL_PERM = new FsPermission((short)00777);
+  private static final FsPermission WRITE_ALL_PERM = new FsPermission((short)00733);
   private HiveServer2 hiveServer2 = null;
   private final File baseDir;
   private final Path baseDfsDir;
@@ -200,9 +202,8 @@ public class MiniHS2 extends AbstractHiv
     hiveConf.setIntVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_PORT, getHttpPort());
 
     Path scratchDir = new Path(baseDfsDir, "scratch");
-
-    // Create scratchdir with 777, so that user impersonation has no issues.
-    FileSystem.mkdirs(fs, scratchDir, FULL_PERM);
+    // Create root scratchdir with write all, so that user impersonation has no issues.
+    Utilities.createDirsWithPermission(hiveConf, scratchDir, WRITE_ALL_PERM, true);
     System.setProperty(HiveConf.ConfVars.SCRATCHDIR.varname, scratchDir.toString());
     hiveConf.setVar(ConfVars.SCRATCHDIR, scratchDir.toString());
 

Modified: hive/branches/cbo/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java?rev=1625879&r1=1625878&r2=1625879&view=diff
==============================================================================
--- hive/branches/cbo/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java (original)
+++ hive/branches/cbo/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java Thu Sep 18 02:17:11 2014
@@ -388,7 +388,7 @@ public class TestJdbcWithMiniHS2 {
   }
 
   /**
-   * Tests the creation of the root hdfs scratch dir, which should be writable by all (777).
+   * Tests the creation of the root hdfs scratch dir, which should be writable by all.
    *
    * @throws Exception
    */
@@ -410,7 +410,7 @@ public class TestJdbcWithMiniHS2 {
     hs2Conn = getConnection(miniHS2.getJdbcURL(), userName, "password");
     // FS
     FileSystem fs = miniHS2.getLocalFS();
-    FsPermission expectedFSPermission = new FsPermission("777");
+    FsPermission expectedFSPermission = new FsPermission((short)00733);
     // Verify scratch dir paths and permission
     // HDFS scratch dir
     scratchDirPath = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.SCRATCHDIR));

Modified: hive/branches/cbo/packaging/pom.xml
URL: http://svn.apache.org/viewvc/hive/branches/cbo/packaging/pom.xml?rev=1625879&r1=1625878&r2=1625879&view=diff
==============================================================================
--- hive/branches/cbo/packaging/pom.xml (original)
+++ hive/branches/cbo/packaging/pom.xml Thu Sep 18 02:17:11 2014
@@ -60,33 +60,6 @@
             </executions>
           </plugin>
           <plugin>
-            <groupId>org.apache.maven.plugins</groupId>
-            <artifactId>maven-dependency-plugin</artifactId>
-            <executions>
-              <execution>
-                <id>copy</id>
-                <phase>package</phase>
-                <goals>
-                  <goal>copy</goal>
-                </goals>
-                <configuration>
-                  <artifactItems>
-                    <artifactItem>
-                      <groupId>${project.groupId}</groupId>
-                      <artifactId>hive-jdbc</artifactId>
-                      <version>${project.version}</version>
-                      <type>jar</type>
-                      <classifier>${hive.jdbc.driver.classifier}</classifier>
-                      <overWrite>true</overWrite>
-                      <outputDirectory>${project.build.directory}</outputDirectory>
-                      <destFileName>${hive.jdbc.driver.jar}</destFileName>
-                    </artifactItem>
-                  </artifactItems>
-                </configuration>
-              </execution>
-            </executions>
-          </plugin>
-          <plugin>
             <groupId>org.codehaus.mojo</groupId>
             <artifactId>build-helper-maven-plugin</artifactId>
             <executions>
@@ -158,6 +131,12 @@
     </dependency>
     <dependency>
       <groupId>org.apache.hive</groupId>
+      <artifactId>hive-jdbc</artifactId>
+      <version>${project.version}</version>
+      <classifier>${hive.jdbc.driver.classifier}</classifier>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hive</groupId>
       <artifactId>hive-beeline</artifactId>
       <version>${project.version}</version>
     </dependency>

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java?rev=1625879&r1=1625878&r2=1625879&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java Thu Sep 18 02:17:11 2014
@@ -38,6 +38,7 @@ import org.apache.hadoop.hive.common.Obj
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext;
 import org.apache.hadoop.hive.ql.exec.FooterBuffer;
+import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader;
 import org.apache.hadoop.hive.ql.io.HiveInputFormat;
 import org.apache.hadoop.hive.ql.io.HiveRecordReader;
@@ -748,7 +749,8 @@ public class FetchOperator implements Se
    */
   private FileStatus[] listStatusUnderPath(FileSystem fs, Path p) throws IOException {
     boolean recursive = HiveConf.getBoolVar(job, HiveConf.ConfVars.HADOOPMAPREDINPUTDIRRECURSIVE);
-    if (!recursive) {
+    // If this is in acid format always read it recursively regardless of what the jobconf says.
+    if (!recursive && !AcidUtils.isAcid(p, job)) {
       return fs.listStatus(p);
     }
     List<FileStatus> results = new ArrayList<FileStatus>();

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java?rev=1625879&r1=1625878&r2=1625879&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java Thu Sep 18 02:17:11 2014
@@ -141,8 +141,7 @@ public class TezJobMonitor {
           case RUNNING:
             if (!running) {
               perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_SUBMIT_TO_RUNNING);
-              console.printInfo("Status: Running (application id: "
-                +dagClient.getExecutionContext()+")\n");
+              console.printInfo("Status: Running (" + dagClient.getExecutionContext() + ")\n");
               for (String s: progressMap.keySet()) {
                 perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_RUN_VERTEX + s);
               }

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java?rev=1625879&r1=1625878&r2=1625879&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java Thu Sep 18 02:17:11 2014
@@ -305,6 +305,28 @@ public class AcidUtils {
   }
 
   /**
+   * Is the given directory in ACID format?
+   * @param directory the partition directory to check
+   * @param conf the query configuration
+   * @return true, if it is an ACID directory
+   * @throws IOException
+   */
+  public static boolean isAcid(Path directory,
+                               Configuration conf) throws IOException {
+    FileSystem fs = directory.getFileSystem(conf);
+    for(FileStatus file: fs.listStatus(directory)) {
+      String filename = file.getPath().getName();
+      if (filename.startsWith(BASE_PREFIX) ||
+          filename.startsWith(DELTA_PREFIX)) {
+        if (file.isDirectory()) {
+          return true;
+        }
+      }
+    }
+    return false;
+  }
+
+  /**
    * Get the ACID state of the given directory. It finds the minimal set of
    * base and diff directories. Note that because major compactions don't
    * preserve the history, we can't use a base directory that includes a

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1625879&r1=1625878&r2=1625879&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Thu Sep 18 02:17:11 2014
@@ -77,6 +77,7 @@ import com.google.common.collect.Compari
 class RecordReaderImpl implements RecordReader {
 
   private static final Log LOG = LogFactory.getLog(RecordReaderImpl.class);
+  private static final boolean isLogTraceEnabled = LOG.isTraceEnabled();
 
   private final FSDataInputStream file;
   private final long firstRow;
@@ -3133,9 +3134,9 @@ class RecordReaderImpl implements Record
     // find the next row
     rowInStripe += 1;
     advanceToNextRow(rowInStripe + rowBaseInStripe);
-    if (LOG.isDebugEnabled()) {
-      LOG.debug("row from " + reader.path);
-      LOG.debug("orc row = " + result);
+    if (isLogTraceEnabled) {
+      LOG.trace("row from " + reader.path);
+      LOG.trace("orc row = " + result);
     }
     return result;
   }

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1625879&r1=1625878&r2=1625879&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Thu Sep 18 02:17:11 2014
@@ -5982,7 +5982,7 @@ public class SemanticAnalyzer extends Ba
       if (!isNonNativeTable) {
         AcidUtils.Operation acidOp = getAcidType(table_desc.getOutputFileFormatClass());
         if (acidOp != AcidUtils.Operation.NOT_ACID) {
-          checkIfAcidAndOverwriting(qb, table_desc);
+          checkAcidConstraints(qb, table_desc);
         }
         ltd = new LoadTableDesc(queryTmpdir,table_desc, dpCtx, acidOp);
         ltd.setReplace(!qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(),
@@ -6089,7 +6089,7 @@ public class SemanticAnalyzer extends Ba
           dest_part.isStoredAsSubDirectories(), conf);
       AcidUtils.Operation acidOp = getAcidType(table_desc.getOutputFileFormatClass());
       if (acidOp != AcidUtils.Operation.NOT_ACID) {
-        checkIfAcidAndOverwriting(qb, table_desc);
+        checkAcidConstraints(qb, table_desc);
       }
       ltd = new LoadTableDesc(queryTmpdir, table_desc, dest_part.getSpec(), acidOp);
       ltd.setReplace(!qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(),
@@ -6349,15 +6349,19 @@ public class SemanticAnalyzer extends Ba
     return output;
   }
 
-  // Check if we are overwriting any tables.  If so, throw an exception as that is not allowed
-  // when using an Acid compliant txn manager and operating on an acid table.
-  private void checkIfAcidAndOverwriting(QB qb, TableDesc tableDesc) throws SemanticException {
+  // Check constraints on acid tables.  This includes
+  // * no insert overwrites
+  // * no use of vectorization
+  private void checkAcidConstraints(QB qb, TableDesc tableDesc) throws SemanticException {
     String tableName = tableDesc.getTableName();
     if (!qb.getParseInfo().isInsertIntoTable(tableName)) {
       LOG.debug("Couldn't find table " + tableName + " in insertIntoTable");
       throw new SemanticException(ErrorMsg.NO_INSERT_OVERWRITE_WITH_ACID.getMsg());
     }
-
+    if (conf.getBoolVar(ConfVars.HIVE_VECTORIZATION_ENABLED)) {
+      LOG.info("Turning off vectorization for acid write operation");
+      conf.setBoolVar(ConfVars.HIVE_VECTORIZATION_ENABLED, false);
+    }
   }
 
   /**

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java?rev=1625879&r1=1625878&r2=1625879&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java Thu Sep 18 02:17:11 2014
@@ -515,16 +515,17 @@ public class SessionState {
    */
   private Path createRootHDFSDir(HiveConf conf) throws IOException {
     Path rootHDFSDirPath = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.SCRATCHDIR));
-    FsPermission expectedHDFSDirPermission = new FsPermission("777");
+    FsPermission writableHDFSDirPermission = new FsPermission((short)00733);
     FileSystem fs = rootHDFSDirPath.getFileSystem(conf);
     if (!fs.exists(rootHDFSDirPath)) {
-      Utilities.createDirsWithPermission(conf, rootHDFSDirPath, expectedHDFSDirPermission, true);
+      Utilities.createDirsWithPermission(conf, rootHDFSDirPath, writableHDFSDirPermission, true);
     }
     FsPermission currentHDFSDirPermission = fs.getFileStatus(rootHDFSDirPath).getPermission();
     LOG.debug("HDFS root scratch dir: " + rootHDFSDirPath + ", permission: "
         + currentHDFSDirPermission);
-    // If the root HDFS scratch dir already exists, make sure the permissions are 777.
-    if (!expectedHDFSDirPermission.equals(fs.getFileStatus(rootHDFSDirPath).getPermission())) {
+    // If the root HDFS scratch dir already exists, make sure it is writeable.
+    if (!((currentHDFSDirPermission.toShort() & writableHDFSDirPermission
+        .toShort()) == writableHDFSDirPermission.toShort())) {
       throw new RuntimeException("The root scratch dir: " + rootHDFSDirPath
           + " on HDFS should be writable. Current permissions are: " + currentHDFSDirPermission);
     }