You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@hive.apache.org by se...@apache.org on 2018/05/22 19:14:26 UTC

[1/5] hive git commit: HIVE-18866 : Semijoin and analyze: Implement a Long -> Hash64 vector fast-path (Gopal Vijayaraghavan, Sergey Shelukhin, reviewed by Prasanth Jayachandran)

Repository: hive
Updated Branches:
  refs/heads/master df12aec50 -> ffb7e043e


HIVE-18866 : Semijoin and analyze: Implement a Long -> Hash64 vector fast-path (Gopal Vijayaraghavan, Sergey Shelukhin, reviewed by Prasanth Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5df1eb3f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5df1eb3f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5df1eb3f

Branch: refs/heads/master
Commit: 5df1eb3f16111bbf70ae8d17b915a4bffb67594b
Parents: 43e2f96
Author: sergey <se...@apache.org>
Authored: Tue May 22 12:06:12 2018 -0700
Committer: sergey <se...@apache.org>
Committed: Tue May 22 12:14:11 2018 -0700

----------------------------------------------------------------------
 itests/hive-jmh/pom.xml                         |   5 +
 .../hive/benchmark/hash/Murmur3Bench.java       | 107 +++++++++++++++++++
 .../hadoop/hive/common/ndv/hll/HyperLogLog.java |  21 ++--
 .../apache/hive/common/util/BloomKFilter.java   |   4 +-
 .../org/apache/hive/common/util/Murmur3.java    |  46 ++++++++
 .../apache/hive/common/util/TestMurmur3.java    |  29 ++++-
 6 files changed, 193 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/5df1eb3f/itests/hive-jmh/pom.xml
----------------------------------------------------------------------
diff --git a/itests/hive-jmh/pom.xml b/itests/hive-jmh/pom.xml
index c0a6564..5eb3026 100644
--- a/itests/hive-jmh/pom.xml
+++ b/itests/hive-jmh/pom.xml
@@ -65,6 +65,11 @@
     </dependency>
     <dependency>
       <groupId>org.apache.hive</groupId>
+      <artifactId>hive-storage-api</artifactId>
+       <version>2.7.0-SNAPSHOT</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hive</groupId>
       <artifactId>hive-exec</artifactId>
        <version>${project.version}</version>
     </dependency>

http://git-wip-us.apache.org/repos/asf/hive/blob/5df1eb3f/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/hash/Murmur3Bench.java
----------------------------------------------------------------------
diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/hash/Murmur3Bench.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/hash/Murmur3Bench.java
new file mode 100644
index 0000000..cd85148
--- /dev/null
+++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/hash/Murmur3Bench.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.benchmark.hash;
+
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColLikeStringScalar;
+import org.apache.hive.common.util.Murmur3;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+/**
+ * This test measures the performance for vectorization.
+ * <p/>
+ * This test uses JMH framework for benchmarking.
+ * You may execute this benchmark tool using JMH command line in different ways:
+ * <p/>
+ * To use the settings shown in the main() function, use:
+ * $ java -cp target/benchmarks.jar org.apache.hive.benchmark.hash.Murmur3Bench
+ * <p/>
+ * To use the default settings used by JMH, use:
+ * $ java -jar target/benchmarks.jar org.apache.hive.benchmark.hash.Murmur3Bench
+ * <p/>
+ * To specify different parameters, use:
+ * - This command will use 10 warm-up iterations, 5 test iterations, and 2 forks. And it will
+ * display the Average Time (avgt) in Microseconds (us)
+ * - Benchmark mode. Available modes are:
+ * [Throughput/thrpt, AverageTime/avgt, SampleTime/sample, SingleShotTime/ss, All/all]
+ * - Output time unit. Available time units are: [m, s, ms, us, ns].
+ * <p/>
+ * $ java -jar target/benchmarks.jar org.apache.hive.benchmark.hash.Murmur3Bench
+ * -wi 10 -i 5 -f 2 -bm avgt -tu us
+ */
+@State(Scope.Benchmark)
+public class Murmur3Bench {
+  @BenchmarkMode(Mode.AverageTime)
+  @Fork(1)
+  @State(Scope.Thread)
+  @OutputTimeUnit(TimeUnit.MILLISECONDS)
+  public static class Hash64Bench {
+    
+    @Param({ "-1"}) //"123456789", "987654321", "1234", "4321",
+    long v;
+    
+    
+    
+    @Benchmark
+    @Warmup(iterations = 2, time = 2, timeUnit = TimeUnit.SECONDS)
+    @Measurement(iterations = 20, time = 2, timeUnit = TimeUnit.SECONDS)
+    public long longHash() {
+      long k = 0;
+      for (int i = 0; i < 4096; i++) {
+        k += Murmur3.hash64(v);
+      }
+      return k;
+    }
+    
+    @Benchmark
+    @Warmup(iterations = 2, time = 2, timeUnit = TimeUnit.SECONDS)
+    @Measurement(iterations = 20, time = 2, timeUnit = TimeUnit.SECONDS)
+    public long longBytesHash() {
+      ByteBuffer LONG_BUFFER = ByteBuffer.allocate(Long.BYTES);
+      long k = 0;
+      for (int i = 0; i < 4096; i++) {
+        LONG_BUFFER.putLong(0, v+i);
+        k += Murmur3.hash64(LONG_BUFFER.array());
+      }
+      return k;
+    }
+  }
+
+  public static void main(String[] args) throws RunnerException {
+    Options opt = new OptionsBuilder().include(".*" + Murmur3Bench.class.getSimpleName() +
+        ".*").build();
+    new Runner(opt).run();
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/5df1eb3f/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
index 8bdb47b..07a93c6 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
@@ -62,9 +62,6 @@ public class HyperLogLog implements NumDistinctValueEstimator {
   private final static int DEFAULT_HASH_BITS = 64;
   private final static long HASH64_ZERO = Murmur3.hash64(new byte[] {0});
   private final static long HASH64_ONE = Murmur3.hash64(new byte[] {1});
-  private final static ByteBuffer SHORT_BUFFER = ByteBuffer.allocate(Short.BYTES);
-  private final static ByteBuffer INT_BUFFER = ByteBuffer.allocate(Integer.BYTES);
-  private final static ByteBuffer LONG_BUFFER = ByteBuffer.allocate(Long.BYTES);
 
   public enum EncodingType {
     SPARSE, DENSE
@@ -212,33 +209,27 @@ public class HyperLogLog implements NumDistinctValueEstimator {
   }
 
   public void addShort(short val) {
-    SHORT_BUFFER.putShort(0, val);
-    add(Murmur3.hash64(SHORT_BUFFER.array()));
+    add(Murmur3.hash64(val));
   }
 
   public void addInt(int val) {
-    INT_BUFFER.putInt(0, val);
-    add(Murmur3.hash64(INT_BUFFER.array()));
+    add(Murmur3.hash64(val));
   }
 
   public void addLong(long val) {
-    LONG_BUFFER.putLong(0, val);
-    add(Murmur3.hash64(LONG_BUFFER.array()));
+    add(Murmur3.hash64(val));
   }
 
   public void addFloat(float val) {
-    INT_BUFFER.putFloat(0, val);
-    add(Murmur3.hash64(INT_BUFFER.array()));
+    add(Murmur3.hash64(Float.floatToIntBits(val)));
   }
 
   public void addDouble(double val) {
-    LONG_BUFFER.putDouble(0, val);
-    add(Murmur3.hash64(LONG_BUFFER.array()));
+    add(Murmur3.hash64(Double.doubleToLongBits(val)));
   }
 
   public void addChar(char val) {
-    SHORT_BUFFER.putChar(0, val);
-    add(Murmur3.hash64(SHORT_BUFFER.array()));
+    add(Murmur3.hash64((short)val));
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hive/blob/5df1eb3f/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java b/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java
index 6ccf5ab..5b1914d 100644
--- a/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java
+++ b/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java
@@ -156,7 +156,7 @@ public class BloomKFilter {
 
   public void addLong(long val) {
     // puts long in little endian order
-    addBytes(longToByteArrayLE(val));
+    addHash(Murmur3.hash64(val));
   }
 
   public void addFloat(float val) {
@@ -239,7 +239,7 @@ public class BloomKFilter {
   }
 
   public boolean testLong(long val) {
-    return testBytes(longToByteArrayLE(val));
+    return testHash(Murmur3.hash64(val));
   }
 
   public boolean testFloat(float val) {

http://git-wip-us.apache.org/repos/asf/hive/blob/5df1eb3f/storage-api/src/java/org/apache/hive/common/util/Murmur3.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hive/common/util/Murmur3.java b/storage-api/src/java/org/apache/hive/common/util/Murmur3.java
index c896fa7..8aae28b 100644
--- a/storage-api/src/java/org/apache/hive/common/util/Murmur3.java
+++ b/storage-api/src/java/org/apache/hive/common/util/Murmur3.java
@@ -155,6 +155,52 @@ public class Murmur3 {
     return hash64(data, 0, data.length, DEFAULT_SEED);
   }
 
+  public static long hash64(long data) {
+    long hash = DEFAULT_SEED;
+    long k = Long.reverseBytes(data);
+    int length = Long.BYTES;
+    // mix functions
+    k *= C1;
+    k = Long.rotateLeft(k, R1);
+    k *= C2;
+    hash ^= k;
+    hash = Long.rotateLeft(hash, R2) * M + N1;
+    // finalization
+    hash ^= length;
+    hash = fmix64(hash);
+    return hash;
+  }
+
+  public static long hash64(int data) {
+    long k1 = Integer.reverseBytes(data) & (-1L >>> 32);
+    int length = Integer.BYTES;
+    long hash = DEFAULT_SEED;
+    k1 *= C1;
+    k1 = Long.rotateLeft(k1, R1);
+    k1 *= C2;
+    hash ^= k1;
+    // finalization
+    hash ^= length;
+    hash = fmix64(hash);
+    return hash;
+  }
+
+  public static long hash64(short data) {
+    long hash = DEFAULT_SEED;
+    long k1 = 0;
+    k1 ^= ((long) data & 0xff) << 8;
+    k1 ^= ((long)((data & 0xFF00) >> 8) & 0xff);
+    k1 *= C1;
+    k1 = Long.rotateLeft(k1, R1);
+    k1 *= C2;
+    hash ^= k1;
+
+    // finalization
+    hash ^= Short.BYTES;
+    hash = fmix64(hash);
+    return hash;
+  }
+
   public static long hash64(byte[] data, int offset, int length) {
     return hash64(data, offset, length, DEFAULT_SEED);
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/5df1eb3f/storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java
----------------------------------------------------------------------
diff --git a/storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java b/storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java
index f20366b..16955c1 100644
--- a/storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java
+++ b/storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java
@@ -18,7 +18,7 @@
 
 package org.apache.hive.common.util;
 
-import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.*;
 import org.apache.hive.common.util.Murmur3.IncrementalHash32;
 
 import com.google.common.hash.HashFunction;
@@ -222,7 +222,32 @@ public class TestMurmur3 {
       assertEquals(gl2, m2);
     }
   }
-  
+
+  @Test
+  public void test64() {
+    final int seed = 123, iters = 1000000;
+    ByteBuffer SHORT_BUFFER = ByteBuffer.allocate(Short.BYTES);
+    ByteBuffer INT_BUFFER = ByteBuffer.allocate(Integer.BYTES);
+    ByteBuffer LONG_BUFFER = ByteBuffer.allocate(Long.BYTES);
+    Random rdm = new Random(seed);
+    for (int i = 0; i < iters; ++i) {
+      long ln = rdm.nextLong();
+      int in = rdm.nextInt();
+      short sn = (short) (rdm.nextInt(2* Short.MAX_VALUE - 1) - Short.MAX_VALUE);
+      float fn = rdm.nextFloat();
+      double dn = rdm.nextDouble();
+      SHORT_BUFFER.putShort(0, sn);
+      assertEquals(Murmur3.hash64(SHORT_BUFFER.array()), Murmur3.hash64(sn));
+      INT_BUFFER.putInt(0, in);
+      assertEquals(Murmur3.hash64(INT_BUFFER.array()), Murmur3.hash64(in));
+      LONG_BUFFER.putLong(0, ln);
+      assertEquals(Murmur3.hash64(LONG_BUFFER.array()), Murmur3.hash64(ln));
+      INT_BUFFER.putFloat(0, fn);
+      assertEquals(Murmur3.hash64(INT_BUFFER.array()), Murmur3.hash64(Float.floatToIntBits(fn)));
+      LONG_BUFFER.putDouble(0, dn);
+      assertEquals(Murmur3.hash64(LONG_BUFFER.array()), Murmur3.hash64(Double.doubleToLongBits(dn)));
+    }
+  }
 
   @Test
   public void testIncremental() {

[2/5] hive git commit: HIVE-19628 : possible NPE in LLAP testSigning (Sergey Shelukhin, reviewed by Jason Dere)

Posted by se...@apache.org.

HIVE-19628 : possible NPE in LLAP testSigning (Sergey Shelukhin, reviewed by Jason Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/43e2f963
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/43e2f963
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/43e2f963

Branch: refs/heads/master
Commit: 43e2f9632130d569c93ebeeb11297813ca34b80c
Parents: f4352e5
Author: sergey <se...@apache.org>
Authored: Tue May 22 12:03:35 2018 -0700
Committer: sergey <se...@apache.org>
Committed: Tue May 22 12:14:11 2018 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/llap/security/TestLlapSignerImpl.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/43e2f963/llap-server/src/test/org/apache/hadoop/hive/llap/security/TestLlapSignerImpl.java
----------------------------------------------------------------------
diff --git a/llap-server/src/test/org/apache/hadoop/hive/llap/security/TestLlapSignerImpl.java b/llap-server/src/test/org/apache/hadoop/hive/llap/security/TestLlapSignerImpl.java
index 19a90c7..6be6836 100644
--- a/llap-server/src/test/org/apache/hadoop/hive/llap/security/TestLlapSignerImpl.java
+++ b/llap-server/src/test/org/apache/hadoop/hive/llap/security/TestLlapSignerImpl.java
@@ -106,9 +106,9 @@ public class TestLlapSignerImpl {
   private FakeSecretManager rollKey(FakeSecretManager fsm, int idToPreserve) throws IOException {
     // Adding keys is PITA - there's no way to plug into timed rolling; just create a new fsm.
     DelegationKey dk = fsm.getDelegationKey(idToPreserve), curDk = fsm.getCurrentKey();
-    if (curDk.getKeyId() != idToPreserve) {
+    if (curDk == null || curDk.getKeyId() != idToPreserve) {
       LOG.warn("The current key is not the one we expect; key rolled in background? Signed with "
-          + idToPreserve + " but got " + curDk.getKeyId());
+          + idToPreserve + " but got " + (curDk == null ? "null" : curDk.getKeyId()));
     }
     // Regardless of the above, we should have the key we've signed with.
     assertNotNull(dk);

[4/5] hive git commit: HIVE-19258 : add originals support to MM tables (and make the conversion a metadata only operation) (Sergey Shelukhin, reviewed by Jason Dere)

Posted by se...@apache.org.

HIVE-19258 : add originals support to MM tables (and make the conversion a metadata only operation) (Sergey Shelukhin, reviewed by Jason Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f4352e53
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f4352e53
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f4352e53

Branch: refs/heads/master
Commit: f4352e5339694d290b1a146feb2577d4f96d14eb
Parents: 37e86e9
Author: sergey <se...@apache.org>
Authored: Tue May 22 12:02:46 2018 -0700
Committer: sergey <se...@apache.org>
Committed: Tue May 22 12:14:11 2018 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   4 +
 .../hive/ql/txn/compactor/TestCompactor.java    |  96 +++++-
 .../org/apache/hadoop/hive/ql/exec/DDLTask.java |  17 +-
 .../hadoop/hive/ql/exec/FetchOperator.java      |  89 ++++--
 .../org/apache/hadoop/hive/ql/io/AcidUtils.java | 108 ++++---
 .../hive/ql/io/BucketizedHiveInputFormat.java   |  39 ++-
 .../hadoop/hive/ql/io/HiveInputFormat.java      | 215 +++++++------
 .../hadoop/hive/ql/io/orc/OrcInputFormat.java   |  43 ++-
 .../ql/parse/repl/dump/io/FileOperations.java   |  81 ++---
 .../ql/plan/ConditionalResolverMergeFiles.java  |   2 +-
 .../hive/ql/txn/compactor/CompactorMR.java      |   9 +-
 .../apache/hadoop/hive/ql/TestTxnCommands.java  |   4 +-
 .../hadoop/hive/ql/TxnCommandsBaseForTests.java |   4 +
 .../queries/clientpositive/mm_conversions.q     |  40 ++-
 .../clientpositive/llap/mm_conversions.q.out    | 307 ++++++++++++++++++-
 15 files changed, 825 insertions(+), 233 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/f4352e53/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index c14caf6..931533a 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2392,6 +2392,10 @@ public class HiveConf extends Configuration {
     HIVE_LOCK_QUERY_STRING_MAX_LENGTH("hive.lock.query.string.max.length", 1000000,
         "The maximum length of the query string to store in the lock.\n" +
         "The default value is 1000000, since the data limit of a znode is 1MB"),
+    HIVE_MM_ALLOW_ORIGINALS("hive.mm.allow.originals", false,
+        "Whether to allow original files in MM tables. Conversion to MM may be expensive if\n" +
+        "this is set to false, however unless MAPREDUCE-7086 fix is present, queries that\n" +
+        "read MM tables with original files will fail. The default in Hive 3.0 is false."),
 
      // Zookeeper related configs
     HIVE_ZOOKEEPER_QUORUM("hive.zookeeper.quorum", "",

http://git-wip-us.apache.org/repos/asf/hive/blob/f4352e53/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
index de61d71..4a0e834 100644
--- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
+++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
@@ -45,6 +45,7 @@ import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.hive.cli.CliSessionState;
 import org.apache.hadoop.hive.common.ValidWriteIdList;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
 import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
@@ -154,6 +155,7 @@ public class TestCompactor {
     TxnDbUtil.prepDb(hiveConf);
 
     conf = hiveConf;
+    HiveConf.setBoolVar(conf, ConfVars.HIVE_MM_ALLOW_ORIGINALS, true);
     msClient = new HiveMetaStoreClient(conf);
     driver = DriverFactory.newDriver(hiveConf);
     SessionState.start(new CliSessionState(hiveConf));
@@ -984,6 +986,96 @@ public class TestCompactor {
   }
 
   @Test
+  public void mmTableOriginalsOrc() throws Exception {
+    mmTableOriginals("ORC");
+  }
+
+  @Test
+  public void mmTableOriginalsText() throws Exception {
+    mmTableOriginals("TEXTFILE");
+  }
+
+  private void mmTableOriginals(String format) throws Exception {
+    // Originals split won't work due to MAPREDUCE-7086 issue in FileInputFormat.
+    boolean isBrokenUntilMapreduce7086 = "TEXTFILE".equals(format);
+    String dbName = "default";
+    String tblName = "mm_nonpart";
+    executeStatementOnDriver("drop table if exists " + tblName, driver);
+    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) STORED AS " +
+        format + " TBLPROPERTIES ('transactional'='false')", driver);
+    IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
+    Table table = msClient.getTable(dbName, tblName);
+
+    executeStatementOnDriver("INSERT INTO " + tblName + "(a,b) VALUES(1, 'foo')", driver);
+    executeStatementOnDriver("INSERT INTO " + tblName +" (a,b) VALUES(2, 'bar')", driver);
+    executeStatementOnDriver("INSERT INTO " + tblName + "(a,b) SELECT a,b FROM "
+        + tblName + " UNION ALL SELECT a,b FROM " + tblName, driver);
+
+    verifyFooBarResult(tblName, 3);
+
+    FileSystem fs = FileSystem.get(conf);
+    executeStatementOnDriver("ALTER TABLE " + tblName + " SET TBLPROPERTIES "
+       + "('transactional'='true', 'transactional_properties'='insert_only')", driver);
+
+    verifyFooBarResult(tblName, 3);
+
+    runMajorCompaction(dbName, tblName);
+    verifyFooBarResult(tblName, 3);
+    verifyHasBase(table.getSd(), fs, "base_0000001");
+
+    // Try with an extra delta.
+    executeStatementOnDriver("drop table if exists " + tblName, driver);
+    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) STORED AS " +
+        format + " TBLPROPERTIES ('transactional'='false')", driver);
+    table = msClient.getTable(dbName, tblName);
+
+    executeStatementOnDriver("INSERT INTO " + tblName + "(a,b) VALUES(1, 'foo')", driver);
+    executeStatementOnDriver("INSERT INTO " + tblName +" (a,b) VALUES(2, 'bar')", driver);
+    executeStatementOnDriver("INSERT INTO " + tblName + "(a,b) SELECT a,b FROM "
+        + tblName + " UNION ALL SELECT a,b FROM " + tblName, driver);
+    verifyFooBarResult(tblName, 3);
+
+    executeStatementOnDriver("ALTER TABLE " + tblName + " SET TBLPROPERTIES "
+       + "('transactional'='true', 'transactional_properties'='insert_only')", driver);
+    executeStatementOnDriver("INSERT INTO " + tblName + "(a,b) SELECT a,b FROM "
+        + tblName + " UNION ALL SELECT a,b FROM " + tblName, driver);
+
+    // Neither select nor compaction (which is a select) wil work after this.
+    if (isBrokenUntilMapreduce7086) return;
+
+    verifyFooBarResult(tblName, 9);
+
+    runMajorCompaction(dbName, tblName);
+    verifyFooBarResult(tblName, 9);
+    verifyHasBase(table.getSd(), fs, "base_0000002");
+
+    // Try with an extra base.
+    executeStatementOnDriver("drop table if exists " + tblName, driver);
+    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) STORED AS " +
+        format + " TBLPROPERTIES ('transactional'='false')", driver);
+    table = msClient.getTable(dbName, tblName);
+
+    executeStatementOnDriver("INSERT INTO " + tblName + "(a,b) VALUES(1, 'foo')", driver);
+    executeStatementOnDriver("INSERT INTO " + tblName +" (a,b) VALUES(2, 'bar')", driver);
+    executeStatementOnDriver("INSERT INTO " + tblName + "(a,b) SELECT a,b FROM "
+        + tblName + " UNION ALL SELECT a,b FROM " + tblName, driver);
+    verifyFooBarResult(tblName, 3);
+
+    executeStatementOnDriver("ALTER TABLE " + tblName + " SET TBLPROPERTIES "
+       + "('transactional'='true', 'transactional_properties'='insert_only')", driver);
+    executeStatementOnDriver("INSERT OVERWRITE TABLE " + tblName + " SELECT a,b FROM "
+        + tblName + " UNION ALL SELECT a,b FROM " + tblName, driver);
+    verifyFooBarResult(tblName, 6);
+
+    runMajorCompaction(dbName, tblName);
+    verifyFooBarResult(tblName, 6);
+    verifyHasBase(table.getSd(), fs, "base_0000002");
+
+    msClient.close();
+  }
+
+
+  @Test
   public void mmTableBucketed() throws Exception {
     String dbName = "default";
     String tblName = "mm_nonpart";
@@ -1054,7 +1146,7 @@ public class TestCompactor {
     msClient.abortTxns(Lists.newArrayList(openTxnId)); // Now abort 3.
     runMajorCompaction(dbName, tblName); // Compact 4 and 5.
     verifyFooBarResult(tblName, 2);
-    verifyHasBase(table.getSd(), fs, "base_0000005"); 
+    verifyHasBase(table.getSd(), fs, "base_0000005");
     runCleaner(conf);
     verifyDeltaCount(table.getSd(), fs, 0);
   }
@@ -1108,7 +1200,7 @@ public class TestCompactor {
         p2 = msClient.getPartition(dbName, tblName, "ds=2"),
         p3 = msClient.getPartition(dbName, tblName, "ds=3");
     msClient.close();
- 
+
     FileSystem fs = FileSystem.get(conf);
     verifyDeltaCount(p1.getSd(), fs, 3);
     verifyDeltaCount(p2.getSd(), fs, 2);

http://git-wip-us.apache.org/repos/asf/hive/blob/f4352e53/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
index 63fe8ad..16d0854 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
@@ -4478,7 +4478,22 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
       Boolean isToMmTable = AcidUtils.isToInsertOnlyTable(tbl, alterTbl.getProps());
       if (isToMmTable != null) {
         if (!isFromMmTable && isToMmTable) {
-          result = generateAddMmTasks(tbl, alterTbl.getWriteId());
+          if (!HiveConf.getBoolVar(conf, ConfVars.HIVE_MM_ALLOW_ORIGINALS)) {
+            result = generateAddMmTasks(tbl, alterTbl.getWriteId());
+          } else {
+            if (tbl.getPartitionKeys().size() > 0) {
+              Hive db = getHive();
+              PartitionIterable parts = new PartitionIterable(db, tbl, null,
+                  HiveConf.getIntVar(conf, ConfVars.METASTORE_BATCH_RETRIEVE_MAX));
+              Iterator<Partition> partIter = parts.iterator();
+              while (partIter.hasNext()) {
+                Partition part0 = partIter.next();
+                checkMmLb(part0);
+              }
+            } else {
+              checkMmLb(tbl);
+            }
+          }
         } else if (isFromMmTable && !isToMmTable) {
           throw new HiveException("Cannot convert an ACID table to non-ACID");
         }

http://git-wip-us.apache.org/repos/asf/hive/blob/f4352e53/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java
index 969c591..2246901 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java
@@ -379,34 +379,59 @@ public class FetchOperator implements Serializable {
       Class<? extends InputFormat> formatter = currDesc.getInputFileFormatClass();
       Utilities.copyTableJobPropertiesToConf(currDesc.getTableDesc(), job);
       InputFormat inputFormat = getInputFormatFromCache(formatter, job);
-      String inputs = processCurrPathForMmWriteIds(inputFormat);
-      if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
-        Utilities.FILE_OP_LOGGER.trace("Setting fetch inputs to " + inputs);
+      List<Path> dirs = new ArrayList<>(), dirsWithOriginals = new ArrayList<>();
+      processCurrPathForMmWriteIds(inputFormat, dirs, dirsWithOriginals);
+      if (dirs.isEmpty() && dirsWithOriginals.isEmpty()) {
+        LOG.debug("No valid directories for " + currPath);
+        continue;
       }
-      if (inputs == null) return null;
-      job.set("mapred.input.dir", inputs);
 
-      InputSplit[] splits = inputFormat.getSplits(job, 1);
-      FetchInputFormatSplit[] inputSplits = new FetchInputFormatSplit[splits.length];
-      for (int i = 0; i < splits.length; i++) {
-        inputSplits[i] = new FetchInputFormatSplit(splits[i], inputFormat);
+      List<FetchInputFormatSplit> inputSplits = new ArrayList<>();
+      if (!dirs.isEmpty()) {
+        String inputs = makeInputString(dirs);
+        Utilities.FILE_OP_LOGGER.trace("Setting fetch inputs to {}", inputs);
+        job.set("mapred.input.dir", inputs);
+
+        generateWrappedSplits(inputFormat, inputSplits, job);
+      }
+
+      if (!dirsWithOriginals.isEmpty()) {
+        String inputs = makeInputString(dirsWithOriginals);
+        Utilities.FILE_OP_LOGGER.trace("Setting originals fetch inputs to {}", inputs);
+        JobConf jobNoRec = HiveInputFormat.createConfForMmOriginalsSplit(job, dirsWithOriginals);
+        jobNoRec.set("mapred.input.dir", inputs);
+        generateWrappedSplits(inputFormat, inputSplits, jobNoRec);
       }
+
       if (work.getSplitSample() != null) {
         inputSplits = splitSampling(work.getSplitSample(), inputSplits);
       }
-      if (inputSplits.length > 0) {
-        if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_IN_TEST)) {
-          Arrays.sort(inputSplits, new FetchInputFormatSplitComparator());
-        }
-        return inputSplits;
+
+      if (inputSplits.isEmpty()) {
+        LOG.debug("No splits for " + currPath);
+        continue;
+      }
+      if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_IN_TEST)) {
+        Collections.sort(inputSplits, new FetchInputFormatSplitComparator());
       }
+      return inputSplits.toArray(new FetchInputFormatSplit[inputSplits.size()]);
     }
+
     return null;
   }
 
-  private String processCurrPathForMmWriteIds(InputFormat inputFormat) throws IOException {
+  private void generateWrappedSplits(InputFormat inputFormat,
+      List<FetchInputFormatSplit> inputSplits, JobConf job) throws IOException {
+    InputSplit[] splits = inputFormat.getSplits(job, 1);
+    for (int i = 0; i < splits.length; i++) {
+      inputSplits.add(new FetchInputFormatSplit(splits[i], inputFormat));
+    }
+  }
+
+  private void processCurrPathForMmWriteIds(InputFormat inputFormat,
+      List<Path> dirs, List<Path> dirsWithOriginals) throws IOException {
     if (inputFormat instanceof HiveInputFormat) {
-      return StringUtils.escapeString(currPath.toString()); // No need to process here.
+      dirs.add(currPath); // No need to process here.
     }
     ValidWriteIdList validWriteIdList;
     if (AcidUtils.isInsertOnlyTable(currDesc.getTableDesc().getProperties())) {
@@ -418,17 +443,19 @@ public class FetchOperator implements Serializable {
       Utilities.FILE_OP_LOGGER.info("Processing " + currDesc.getTableName() + " for MM paths");
     }
 
-    Path[] dirs = HiveInputFormat.processPathsForMmRead(Lists.newArrayList(currPath), job, validWriteIdList);
-    if (dirs == null || dirs.length == 0) {
-      return null; // No valid inputs. This condition is logged inside the call.
-    }
-    StringBuffer str = new StringBuffer(StringUtils.escapeString(dirs[0].toString()));
-    for(int i = 1; i < dirs.length;i++) {
-      str.append(",").append(StringUtils.escapeString(dirs[i].toString()));
+    HiveInputFormat.processPathsForMmRead(
+        Lists.newArrayList(currPath), job, validWriteIdList, dirs, dirsWithOriginals);
+  }
+
+  private String makeInputString(List<Path> dirs) {
+    if (dirs == null || dirs.isEmpty()) return "";
+    StringBuffer str = new StringBuffer(StringUtils.escapeString(dirs.get(0).toString()));
+    for(int i = 1; i < dirs.size(); i++) {
+      str.append(",").append(StringUtils.escapeString(dirs.get(i).toString()));
     }
     return str.toString();
-  }
 
+  }
   private ValidWriteIdList extractValidWriteIdList() {
     if (currDesc.getTableName() == null || !org.apache.commons.lang.StringUtils.isBlank(currDesc.getTableName())) {
       String txnString = job.get(ValidWriteIdList.VALID_WRITEIDS_KEY);
@@ -438,18 +465,18 @@ public class FetchOperator implements Serializable {
     return null;  // not fetching from a table directly but from a temp location
   }
 
-  private FetchInputFormatSplit[] splitSampling(SplitSample splitSample,
-      FetchInputFormatSplit[] splits) {
+  private List<FetchInputFormatSplit> splitSampling(SplitSample splitSample,
+      List<FetchInputFormatSplit> splits) {
     long totalSize = 0;
     for (FetchInputFormatSplit split: splits) {
         totalSize += split.getLength();
     }
-    List<FetchInputFormatSplit> result = new ArrayList<FetchInputFormatSplit>(splits.length);
+    List<FetchInputFormatSplit> result = new ArrayList<FetchInputFormatSplit>(splits.size());
     long targetSize = splitSample.getTargetSize(totalSize);
-    int startIndex = splitSample.getSeedNum() % splits.length;
+    int startIndex = splitSample.getSeedNum() % splits.size();
     long size = 0;
-    for (int i = 0; i < splits.length; i++) {
-      FetchInputFormatSplit split = splits[(startIndex + i) % splits.length];
+    for (int i = 0; i < splits.size(); i++) {
+      FetchInputFormatSplit split = splits.get((startIndex + i) % splits.size());
       result.add(split);
       long splitgLength = split.getLength();
       if (size + splitgLength >= targetSize) {
@@ -460,7 +487,7 @@ public class FetchOperator implements Serializable {
       }
       size += splitgLength;
     }
-    return result.toArray(new FetchInputFormatSplit[result.size()]);
+    return result;
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hive/blob/f4352e53/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
index 183515a..10f7bd2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
@@ -51,6 +51,7 @@ import org.apache.hadoop.hive.metastore.api.DataOperationType;
 import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
 import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.io.AcidUtils.ParsedDelta;
 import org.apache.hadoop.hive.ql.io.orc.OrcFile;
 import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
 import org.apache.hadoop.hive.ql.io.orc.OrcRecordUpdater;
@@ -149,6 +150,7 @@ public class AcidUtils {
   public static final int MAX_STATEMENTS_PER_TXN = 10000;
   public static final Pattern BUCKET_DIGIT_PATTERN = Pattern.compile("[0-9]{5}$");
   public static final Pattern   LEGACY_BUCKET_DIGIT_PATTERN = Pattern.compile("^[0-9]{6}");
+
   /**
    * A write into a non-aicd table produces files like 0000_0 or 0000_0_copy_1
    * (Unless via Load Data statement)
@@ -380,6 +382,57 @@ public class AcidUtils {
     }
     return result;
   }
+
+  public static final class DirectoryImpl implements Directory {
+    private final List<FileStatus> abortedDirectories;
+    private final boolean isBaseInRawFormat;
+    private final List<HdfsFileStatusWithId> original;
+    private final List<FileStatus> obsolete;
+    private final List<ParsedDelta> deltas;
+    private final Path base;
+
+    public DirectoryImpl(List<FileStatus> abortedDirectories,
+        boolean isBaseInRawFormat, List<HdfsFileStatusWithId> original,
+        List<FileStatus> obsolete, List<ParsedDelta> deltas, Path base) {
+      this.abortedDirectories = abortedDirectories;
+      this.isBaseInRawFormat = isBaseInRawFormat;
+      this.original = original;
+      this.obsolete = obsolete;
+      this.deltas = deltas;
+      this.base = base;
+    }
+
+    @Override
+    public Path getBaseDirectory() {
+      return base;
+    }
+
+    @Override
+    public boolean isBaseInRawFormat() {
+      return isBaseInRawFormat;
+    }
+
+    @Override
+    public List<HdfsFileStatusWithId> getOriginalFiles() {
+      return original;
+    }
+
+    @Override
+    public List<ParsedDelta> getCurrentDirectories() {
+      return deltas;
+    }
+
+    @Override
+    public List<FileStatus> getObsolete() {
+      return obsolete;
+    }
+
+    @Override
+    public List<FileStatus> getAbortedDirectories() {
+      return abortedDirectories;
+    }
+  }
+
   //This is used for (full) Acid tables.  InsertOnly use NOT_ACID
   public enum Operation implements Serializable {
     NOT_ACID, INSERT, UPDATE, DELETE;
@@ -974,7 +1027,7 @@ public class AcidUtils {
       // Okay, we're going to need these originals.  Recurse through them and figure out what we
       // really need.
       for (FileStatus origDir : originalDirectories) {
-        findOriginals(fs, origDir, original, useFileIds, ignoreEmptyFiles);
+        findOriginals(fs, origDir, original, useFileIds, ignoreEmptyFiles, true);
       }
     }
 
@@ -1046,7 +1099,7 @@ public class AcidUtils {
      * If this sort order is changed and there are tables that have been converted to transactional
      * and have had any update/delete/merge operations performed but not yet MAJOR compacted, it
      * may result in data loss since it may change how
-     * {@link org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.OriginalReaderPair} assigns 
+     * {@link org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.OriginalReaderPair} assigns
      * {@link RecordIdentifier#rowId} for read (that have happened) and compaction (yet to happen).
      */
     Collections.sort(original, (HdfsFileStatusWithId o1, HdfsFileStatusWithId o2) -> {
@@ -1056,37 +1109,8 @@ public class AcidUtils {
 
     // Note: isRawFormat is invalid for non-ORC tables. It will always return true, so we're good.
     final boolean isBaseInRawFormat = base != null && MetaDataFile.isRawFormat(base, fs);
-    return new Directory() {
-
-      @Override
-      public Path getBaseDirectory() {
-        return base;
-      }
-      @Override
-      public boolean isBaseInRawFormat() {
-        return isBaseInRawFormat;
-      }
-
-      @Override
-      public List<HdfsFileStatusWithId> getOriginalFiles() {
-        return original;
-      }
-
-      @Override
-      public List<ParsedDelta> getCurrentDirectories() {
-        return deltas;
-      }
-
-      @Override
-      public List<FileStatus> getObsolete() {
-        return obsolete;
-      }
-
-      @Override
-      public List<FileStatus> getAbortedDirectories() {
-        return abortedDirectories;
-      }
-    };
+    return new DirectoryImpl(abortedDirectories, isBaseInRawFormat, original,
+        obsolete, deltas, base);
   }
   /**
    * We can only use a 'base' if it doesn't have an open txn (from specific reader's point of view)
@@ -1198,8 +1222,9 @@ public class AcidUtils {
    * @param original the list of original files
    * @throws IOException
    */
-  private static void findOriginals(FileSystem fs, FileStatus stat,
-      List<HdfsFileStatusWithId> original, Ref<Boolean> useFileIds, boolean ignoreEmptyFiles) throws IOException {
+  public static void findOriginals(FileSystem fs, FileStatus stat,
+      List<HdfsFileStatusWithId> original, Ref<Boolean> useFileIds,
+      boolean ignoreEmptyFiles, boolean recursive) throws IOException {
     assert stat.isDir();
     List<HdfsFileStatusWithId> childrenWithId = null;
     Boolean val = useFileIds.value;
@@ -1218,8 +1243,10 @@ public class AcidUtils {
     }
     if (childrenWithId != null) {
       for (HdfsFileStatusWithId child : childrenWithId) {
-        if (child.getFileStatus().isDir()) {
-          findOriginals(fs, child.getFileStatus(), original, useFileIds, ignoreEmptyFiles);
+        if (child.getFileStatus().isDirectory()) {
+          if (recursive) {
+            findOriginals(fs, child.getFileStatus(), original, useFileIds, ignoreEmptyFiles, true);
+          }
         } else {
           if(!ignoreEmptyFiles || child.getFileStatus().getLen() > 0) {
             original.add(child);
@@ -1230,7 +1257,9 @@ public class AcidUtils {
       List<FileStatus> children = HdfsUtils.listLocatedStatus(fs, stat.getPath(), hiddenFileFilter);
       for (FileStatus child : children) {
         if (child.isDir()) {
-          findOriginals(fs, child, original, useFileIds, ignoreEmptyFiles);
+          if (recursive) {
+            findOriginals(fs, child, original, useFileIds, ignoreEmptyFiles, true);
+          }
         } else {
           if(!ignoreEmptyFiles || child.getLen() > 0) {
             original.add(createOriginalObj(null, child));
@@ -1240,6 +1269,7 @@ public class AcidUtils {
     }
   }
 
+
   public static boolean isTablePropertyTransactional(Properties props) {
     String resultStr = props.getProperty(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL);
     if (resultStr == null) {
@@ -1807,7 +1837,7 @@ public class AcidUtils {
       return null;
     }
     Directory acidInfo = AcidUtils.getAcidState(dir, jc, idList);
-    // Assume that for an MM table, or if there's only the base directory, we are good. 
+    // Assume that for an MM table, or if there's only the base directory, we are good.
     if (!acidInfo.getCurrentDirectories().isEmpty() && AcidUtils.isFullAcidTable(table)) {
       Utilities.FILE_OP_LOGGER.warn(
           "Computing stats for an ACID table; stats may be inaccurate");

http://git-wip-us.apache.org/repos/asf/hive/blob/f4352e53/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java
index 75fa09d..5d20931 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java
@@ -18,21 +18,16 @@
 
 package org.apache.hadoop.hive.ql.io;
 
-import org.apache.curator.shaded.com.google.common.collect.Lists;
-
-import org.apache.hadoop.hive.common.ValidWriteIdList;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.FileUtils;
-import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.common.ValidWriteIdList;
 import org.apache.hadoop.hive.ql.plan.PartitionDesc;
-import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapred.FileInputFormat;
@@ -42,6 +37,10 @@ import org.apache.hadoop.mapred.InvalidInputException;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.RecordReader;
 import org.apache.hadoop.mapred.Reporter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.collect.Lists;
 
 /**
  * BucketizedHiveInputFormat serves the similar function as hiveInputFormat but
@@ -139,21 +138,39 @@ public class BucketizedHiveInputFormat<K extends WritableComparable, V extends W
         mmIds = getMmValidWriteIds(newjob, part.getTableDesc(), null);
       }
       // TODO: should this also handle ACID operation, etc.? seems to miss a lot of stuff from HIF.
-      Path[] finalDirs = (mmIds == null) ? new Path[] { dir }
-        : processPathsForMmRead(Lists.newArrayList(dir), newjob, mmIds);
-      if (finalDirs == null) {
+      List<Path> finalDirs = null, dirsWithMmOriginals = null;
+      if (mmIds == null) {
+        finalDirs = Lists.newArrayList(dir);
+      } else {
+        finalDirs = new ArrayList<>();
+        dirsWithMmOriginals = new ArrayList<>();
+        processPathsForMmRead(
+            Lists.newArrayList(dir), newjob, mmIds, finalDirs, dirsWithMmOriginals);
+      }
+      if (finalDirs.isEmpty() && (dirsWithMmOriginals == null || dirsWithMmOriginals.isEmpty())) {
         continue; // No valid inputs - possible in MM case.
       }
 
       for (Path finalDir : finalDirs) {
         FileStatus[] listStatus = listStatus(newjob, finalDir);
-
         for (FileStatus status : listStatus) {
           numOrigSplits = addBHISplit(
               status, inputFormat, inputFormatClass, numOrigSplits, newjob, result);
         }
       }
+      if (dirsWithMmOriginals != null) {
+        for (Path originalsDir : dirsWithMmOriginals) {
+          FileSystem fs = originalsDir.getFileSystem(job);
+          FileStatus[] listStatus = fs.listStatus(dir, FileUtils.HIDDEN_FILES_PATH_FILTER);
+          for (FileStatus status : listStatus) {
+            if (status.isDirectory()) continue;
+            numOrigSplits = addBHISplit(
+                status, inputFormat, inputFormatClass, numOrigSplits, newjob, result);
+          }
+        }
+      }
     }
+
     LOG.info(result.size() + " bucketized splits generated from "
         + numOrigSplits + " original splits.");
     return result.toArray(new BucketizedHiveInputSplit[result.size()]);

http://git-wip-us.apache.org/repos/asf/hive/blob/f4352e53/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
index 3d965c0..bcc0508 100755
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
@@ -29,35 +29,30 @@ import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
-import java.util.Map.Entry;
-
-import org.apache.hadoop.hive.common.FileUtils;
-import org.apache.hadoop.hive.common.JavaUtils;
-import org.apache.hadoop.hive.common.StringInternUtils;
-import org.apache.hadoop.hive.common.ValidTxnWriteIdList;
-import org.apache.hadoop.hive.common.ValidWriteIdList;
-import org.apache.hadoop.hive.ql.exec.SerializationUtilities;
-import org.apache.hive.common.util.Ref;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.FileUtils;
+import org.apache.hadoop.hive.common.JavaUtils;
+import org.apache.hadoop.hive.common.StringInternUtils;
+import org.apache.hadoop.hive.common.ValidTxnWriteIdList;
+import org.apache.hadoop.hive.common.ValidWriteIdList;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil;
 import org.apache.hadoop.hive.llap.io.api.LlapIo;
 import org.apache.hadoop.hive.llap.io.api.LlapProxy;
-import org.apache.hadoop.hive.ql.exec.spark.SparkDynamicPartitionPruner;
-import org.apache.hadoop.hive.ql.plan.TableDesc;
 import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.SerializationUtilities;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.exec.spark.SparkDynamicPartitionPruner;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.log.PerfLogger;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -65,6 +60,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.ql.plan.MapWork;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.hive.ql.plan.TableDesc;
 import org.apache.hadoop.hive.ql.plan.TableScanDesc;
 import org.apache.hadoop.hive.ql.plan.VectorPartitionDesc;
 import org.apache.hadoop.hive.ql.plan.VectorPartitionDesc.VectorMapOperatorReadType;
@@ -82,7 +78,10 @@ import org.apache.hadoop.mapred.JobConfigurable;
 import org.apache.hadoop.mapred.RecordReader;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.hadoop.util.StringUtils;
+import org.apache.hive.common.util.Ref;
 import org.apache.hive.common.util.ReflectionUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * HiveInputFormat is a parameterized InputFormat which looks at the path name
@@ -488,45 +487,72 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable>
       pushFilters(conf, tableScan, this.mrwork);
     }
 
-    Path[] finalDirs = processPathsForMmRead(dirs, conf, validMmWriteIdList);
-    if (finalDirs == null) {
+    List<Path> dirsWithFileOriginals = new ArrayList<>(), finalDirs = new ArrayList<>();
+    processPathsForMmRead(dirs, conf, validMmWriteIdList, finalDirs, dirsWithFileOriginals);
+    if (finalDirs.isEmpty() && dirsWithFileOriginals.isEmpty()) {
       // This is for transactional tables.
       if (!conf.getBoolean(Utilities.ENSURE_OPERATORS_EXECUTED, false)) {
         LOG.warn("No valid inputs found in " + dirs);
-        return; // No valid inputs.
       } else if (validMmWriteIdList != null) {
         // AcidUtils.getAcidState() is already called to verify there is no input split.
         // Thus for a GroupByOperator summary row, set finalDirs and add a Dummy split here.
-        finalDirs = dirs.toArray(new Path[dirs.size()]);
-        result.add(new HiveInputSplit(new NullRowsInputFormat.DummyInputSplit(finalDirs[0].toString()),
-            ZeroRowsInputFormat.class.getName()));
+        result.add(new HiveInputSplit(new NullRowsInputFormat.DummyInputSplit(
+            dirs.get(0).toString()), ZeroRowsInputFormat.class.getName()));
       }
-    } else {
-      FileInputFormat.setInputPaths(conf, finalDirs);
-      conf.setInputFormat(inputFormat.getClass());
-
-      int headerCount = 0;
-      int footerCount = 0;
-      if (table != null) {
-        headerCount = Utilities.getHeaderCount(table);
-        footerCount = Utilities.getFooterCount(table, conf);
-        if (headerCount != 0 || footerCount != 0) {
-          // Input file has header or footer, cannot be splitted.
-          HiveConf.setLongVar(conf, ConfVars.MAPREDMINSPLITSIZE, Long.MAX_VALUE);
-        }
+      return; // No valid inputs.
+    }
+
+    conf.setInputFormat(inputFormat.getClass());
+    int headerCount = 0;
+    int footerCount = 0;
+    if (table != null) {
+      headerCount = Utilities.getHeaderCount(table);
+      footerCount = Utilities.getFooterCount(table, conf);
+      if (headerCount != 0 || footerCount != 0) {
+        // Input file has header or footer, cannot be splitted.
+        HiveConf.setLongVar(conf, ConfVars.MAPREDMINSPLITSIZE, Long.MAX_VALUE);
       }
+    }
 
+    if (!finalDirs.isEmpty()) {
+      FileInputFormat.setInputPaths(conf, finalDirs.toArray(new Path[finalDirs.size()]));
       InputSplit[] iss = inputFormat.getSplits(conf, splits);
       for (InputSplit is : iss) {
         result.add(new HiveInputSplit(is, inputFormatClass.getName()));
       }
-      if (iss.length == 0 && finalDirs.length > 0 && conf.getBoolean(Utilities.ENSURE_OPERATORS_EXECUTED, false)) {
-        // If there are no inputs; the Execution engine skips the operator tree.
-        // To prevent it from happening; an opaque  ZeroRows input is added here - when needed.
-        result.add(new HiveInputSplit(new NullRowsInputFormat.DummyInputSplit(finalDirs[0].toString()),
-                ZeroRowsInputFormat.class.getName()));
+    }
+
+    if (!dirsWithFileOriginals.isEmpty()) {
+      // We are going to add splits for these directories with recursive = false, so we ignore
+      // any subdirectories (deltas or original directories) and only read the original files.
+      // The fact that there's a loop calling addSplitsForGroup already implies it's ok to
+      // the real input format multiple times... however some split concurrency/etc configs
+      // that are applied separately in each call will effectively be ignored for such splits.
+      JobConf nonRecConf = createConfForMmOriginalsSplit(conf, dirsWithFileOriginals);
+      InputSplit[] iss = inputFormat.getSplits(nonRecConf, splits);
+      for (InputSplit is : iss) {
+        result.add(new HiveInputSplit(is, inputFormatClass.getName()));
       }
     }
+
+    if (result.isEmpty() && conf.getBoolean(Utilities.ENSURE_OPERATORS_EXECUTED, false)) {
+      // If there are no inputs; the Execution engine skips the operator tree.
+      // To prevent it from happening; an opaque  ZeroRows input is added here - when needed.
+      result.add(new HiveInputSplit(new NullRowsInputFormat.DummyInputSplit(
+          finalDirs.get(0).toString()), ZeroRowsInputFormat.class.getName()));
+    }
+  }
+
+  public static JobConf createConfForMmOriginalsSplit(
+      JobConf conf, List<Path> dirsWithFileOriginals) {
+    JobConf nonRecConf = new JobConf(conf);
+    FileInputFormat.setInputPaths(nonRecConf,
+        dirsWithFileOriginals.toArray(new Path[dirsWithFileOriginals.size()]));
+    nonRecConf.setBoolean(FileInputFormat.INPUT_DIR_RECURSIVE, false);
+    nonRecConf.setBoolean("mapred.input.dir.recursive", false);
+    // TODO: change to FileInputFormat.... field after MAPREDUCE-7086.
+    nonRecConf.setBoolean("mapreduce.input.fileinputformat.input.dir.nonrecursive.ignore.subdirs", true);
+    return nonRecConf;
   }
 
   protected ValidWriteIdList getMmValidWriteIds(
@@ -543,71 +569,84 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable>
     return validWriteIdList;
   }
 
-  public static Path[] processPathsForMmRead(List<Path> dirs, JobConf conf,
-      ValidWriteIdList validWriteIdList) throws IOException {
-     if (validWriteIdList == null) {
-      return dirs.toArray(new Path[dirs.size()]);
-    } else {
-      List<Path> finalPaths = new ArrayList<>(dirs.size());
-      for (Path dir : dirs) {
-        processForWriteIds(dir, conf, validWriteIdList, finalPaths);
-      }
-      if (finalPaths.isEmpty()) {
-        return null;
-      }
-      return finalPaths.toArray(new Path[finalPaths.size()]);
+  public static void processPathsForMmRead(List<Path> dirs, Configuration conf,
+      ValidWriteIdList validWriteIdList, List<Path> finalPaths,
+      List<Path> pathsWithFileOriginals) throws IOException {
+    if (validWriteIdList == null) {
+      finalPaths.addAll(dirs);
+      return;
+    }
+    boolean allowOriginals = HiveConf.getBoolVar(conf, ConfVars.HIVE_MM_ALLOW_ORIGINALS);
+    for (Path dir : dirs) {
+      processForWriteIds(
+          dir, conf, validWriteIdList, allowOriginals, finalPaths, pathsWithFileOriginals);
     }
   }
 
-  private static void processForWriteIds(Path dir, JobConf conf,
-      ValidWriteIdList validWriteIdList, List<Path> finalPaths) throws IOException {
+  private static void processForWriteIds(Path dir, Configuration conf,
+      ValidWriteIdList validWriteIdList, boolean allowOriginals, List<Path> finalPaths,
+      List<Path> pathsWithFileOriginals) throws IOException {
     FileSystem fs = dir.getFileSystem(conf);
-    if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
-      Utilities.FILE_OP_LOGGER.trace("Checking " + dir + " (root) for inputs");
-    }
+    Utilities.FILE_OP_LOGGER.trace("Checking {} for inputs", dir);
+
     // Ignore nullscan-optimized paths.
     if (fs instanceof NullScanFileSystem) {
       finalPaths.add(dir);
       return;
     }
 
-    // Tez require the use of recursive input dirs for union processing, so we have to look into the
-    // directory to find out
-    LinkedList<Path> subdirs = new LinkedList<>();
-    subdirs.add(dir); // add itself as a starting point
-    while (!subdirs.isEmpty()) {
-      Path currDir = subdirs.poll();
-      FileStatus[] files = fs.listStatus(currDir);
-      boolean hadAcidState = false;   // whether getAcidState has been called for currDir
-      for (FileStatus file : files) {
-        Path path = file.getPath();
-        if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
-          Utilities.FILE_OP_LOGGER.trace("Checking " + path + " for inputs");
+    // We need to iterate to detect original directories, that are supported in MM but not ACID.
+    boolean hasOriginalFiles = false, hasAcidDirs = false;
+    List<Path> originalDirectories = new ArrayList<>();
+    for (FileStatus file : fs.listStatus(dir, AcidUtils.hiddenFileFilter)) {
+      Path currDir = file.getPath();
+      Utilities.FILE_OP_LOGGER.trace("Checking {} for being an input", currDir);
+      if (!file.isDirectory()) {
+        hasOriginalFiles = true;
+      } else if (AcidUtils.extractWriteId(currDir) == null) {
+        if (allowOriginals) {
+          originalDirectories.add(currDir); // Add as is; it would become a recursive split.
+        } else {
+          Utilities.FILE_OP_LOGGER.debug("Ignoring unknown (original?) directory {}", currDir);
         }
-        if (!file.isDirectory()) {
-          Utilities.FILE_OP_LOGGER.warn("Ignoring a file not in MM directory " + path);
-        } else if (AcidUtils.extractWriteId(path) == null) {
-          subdirs.add(path);
-        } else if (!hadAcidState) {
-          AcidUtils.Directory dirInfo
-                  = AcidUtils.getAcidState(currDir, conf, validWriteIdList, Ref.from(false), true, null);
-          hadAcidState = true;
-
-          // Find the base, created for IOW.
-          Path base = dirInfo.getBaseDirectory();
-          if (base != null) {
-            finalPaths.add(base);
-          }
+      } else {
+        hasAcidDirs = true;
+      }
+    }
+    if (hasAcidDirs) {
+      AcidUtils.Directory dirInfo = AcidUtils.getAcidState(
+          dir, conf, validWriteIdList, Ref.from(false), true, null);
 
-          // Find the parsed delta files.
-          for (AcidUtils.ParsedDelta delta : dirInfo.getCurrentDirectories()) {
-            Utilities.FILE_OP_LOGGER.debug("Adding input " + delta.getPath());
-            finalPaths.add(delta.getPath());
-          }
-        }
+      // Find the base, created for IOW.
+      Path base = dirInfo.getBaseDirectory();
+      if (base != null) {
+        Utilities.FILE_OP_LOGGER.debug("Adding input {}", base);
+        finalPaths.add(base);
+        // Base means originals no longer matter.
+        originalDirectories.clear();
+        hasOriginalFiles = false;
+      }
+
+      // Find the parsed delta files.
+      for (AcidUtils.ParsedDelta delta : dirInfo.getCurrentDirectories()) {
+        Utilities.FILE_OP_LOGGER.debug("Adding input {}", delta.getPath());
+        finalPaths.add(delta.getPath());
+      }
+    }
+    if (!originalDirectories.isEmpty()) {
+      Utilities.FILE_OP_LOGGER.debug("Adding original directories {}", originalDirectories);
+      finalPaths.addAll(originalDirectories);
+    }
+    if (hasOriginalFiles) {
+      if (allowOriginals) {
+        Utilities.FILE_OP_LOGGER.debug("Directory has original files {}", dir);
+        pathsWithFileOriginals.add(dir);
+      } else {
+        Utilities.FILE_OP_LOGGER.debug("Ignoring unknown (original?) files in {}", dir);
       }
     }
   }
+ 
 
   Path[] getInputPaths(JobConf job) throws IOException {
     Path[] dirs;
@@ -719,7 +758,7 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable>
       pushProjection(newjob, readColumnsBuffer, readColumnNamesBuffer);
     }
 
-    if (dirs.length != 0) {
+    if (dirs.length != 0) { // TODO: should this be currentDirs?
       if (LOG.isInfoEnabled()) {
         LOG.info("Generating splits for dirs: {}", dirs);
       }

http://git-wip-us.apache.org/repos/asf/hive/blob/f4352e53/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 2337a35..049dbd3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -106,6 +106,7 @@ import org.apache.hadoop.hive.shims.HadoopShims.HdfsFileStatusWithId;
 import org.apache.hadoop.hive.shims.ShimLoader;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.FileInputFormat;
 import org.apache.hadoop.mapred.FileSplit;
 import org.apache.hadoop.mapred.InputFormat;
 import org.apache.hadoop.mapred.InputSplit;
@@ -117,6 +118,7 @@ import org.apache.hive.common.util.Ref;
 import org.apache.orc.ColumnStatistics;
 import org.apache.orc.OrcProto;
 import org.apache.orc.OrcProto.Footer;
+import org.apache.orc.OrcProto.Type;
 import org.apache.orc.OrcUtils;
 import org.apache.orc.StripeInformation;
 import org.apache.orc.StripeStatistics;
@@ -342,7 +344,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
     return false;
   }
 
-  
+
   public static boolean[] genIncludedColumns(TypeDescription readerSchema,
                                              List<Integer> included) {
     return genIncludedColumns(readerSchema, included, null);
@@ -701,15 +703,15 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
       // & therefore we should be able to retrieve them here and determine appropriate behavior.
       // Note that this will be meaningless for non-acid tables & will be set to null.
       //this is set by Utilities.copyTablePropertiesToConf()
-      boolean isTableTransactional = conf.getBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, false);
-      String transactionalProperties = conf.get(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES);
-      this.acidOperationalProperties = isTableTransactional ?
-          AcidOperationalProperties.parseString(transactionalProperties) : null;
+      boolean isTxnTable = conf.getBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, false);
+      String txnProperties = conf.get(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES);
+      this.acidOperationalProperties = isTxnTable
+          ? AcidOperationalProperties.parseString(txnProperties) : null;
 
       String value = conf.get(ValidWriteIdList.VALID_WRITEIDS_KEY);
       writeIdList = value == null ? new ValidReaderWriteIdList() : new ValidReaderWriteIdList(value);
       LOG.debug("Context:: Read ValidWriteIdList: " + writeIdList.toString()
-              + " isTransactionalTable: " + isTableTransactional);
+              + " isTransactionalTable: " + isTxnTable + " properties: " + txnProperties);
     }
 
     @VisibleForTesting
@@ -1144,6 +1146,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
     private final Ref<Boolean> useFileIds;
     private final UserGroupInformation ugi;
 
+    @VisibleForTesting
     FileGenerator(Context context, FileSystem fs, Path dir, boolean useFileIds,
         UserGroupInformation ugi) {
       this(context, fs, dir, Ref.from(useFileIds), ugi);
@@ -1176,13 +1179,31 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
     }
 
     private AcidDirInfo callInternal() throws IOException {
+      if (context.acidOperationalProperties != null
+          && context.acidOperationalProperties.isInsertOnly()) {
+        // See the class comment - HIF handles MM for all input formats, so if we try to handle it
+        // again, in particular for the non-recursive originals-only getSplits call, we will just
+        // get confused. This bypass was not necessary when MM tables didn't support originals. Now
+        // that they do, we use this path for anything MM table related, although everything except
+        // the originals could still be handled by AcidUtils like a regular non-txn table.
+        boolean isRecursive = context.conf.getBoolean(FileInputFormat.INPUT_DIR_RECURSIVE,
+            context.conf.getBoolean("mapred.input.dir.recursive", false));
+        List<HdfsFileStatusWithId> originals = new ArrayList<>();
+        List<AcidBaseFileInfo> baseFiles = new ArrayList<>();
+        AcidUtils.findOriginals(fs, fs.getFileStatus(dir), originals, useFileIds, true, isRecursive);
+        for (HdfsFileStatusWithId fileId : originals) {
+          baseFiles.add(new AcidBaseFileInfo(fileId, AcidUtils.AcidBaseFileType.ORIGINAL_BASE));
+        }
+        return new AcidDirInfo(fs, dir, new AcidUtils.DirectoryImpl(Lists.newArrayList(), true, originals,
+            Lists.newArrayList(), Lists.newArrayList(), null), baseFiles, new ArrayList<>());
+      }
       //todo: shouldn't ignoreEmptyFiles be set based on ExecutionEngine?
-      AcidUtils.Directory dirInfo = AcidUtils.getAcidState(dir, context.conf,
-          context.writeIdList, useFileIds, true, null);
+      AcidUtils.Directory dirInfo = AcidUtils.getAcidState(
+          dir, context.conf, context.writeIdList, useFileIds, true, null);
       // find the base files (original or new style)
       List<AcidBaseFileInfo> baseFiles = new ArrayList<>();
       if (dirInfo.getBaseDirectory() == null) {
-        //for non-acid tables, all data files are in getOriginalFiles() list
+        // For non-acid tables (or paths), all data files are in getOriginalFiles() list
         for (HdfsFileStatusWithId fileId : dirInfo.getOriginalFiles()) {
           baseFiles.add(new AcidBaseFileInfo(fileId, AcidUtils.AcidBaseFileType.ORIGINAL_BASE));
         }
@@ -1197,7 +1218,6 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
       // Find the parsed deltas- some of them containing only the insert delta events
       // may get treated as base if split-update is enabled for ACID. (See HIVE-14035 for details)
       List<ParsedDelta> parsedDeltas = new ArrayList<>();
-
       if (context.acidOperationalProperties != null &&
           context.acidOperationalProperties.isSplitUpdate()) {
         // If we have split-update turned on for this table, then the delta events have already been
@@ -1258,7 +1278,8 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
         We already handled all delete deltas above and there should not be any other deltas for
         any table type.  (this was acid 1.0 code path).
          */
-        assert dirInfo.getCurrentDirectories().isEmpty() : "Non empty curDir list?!: " + dir;
+        assert dirInfo.getCurrentDirectories().isEmpty() :
+            "Non empty curDir list?!: " + dirInfo.getCurrentDirectories();
         // When split-update is not enabled, then all the deltas in the current directories
         // should be considered as usual.
         parsedDeltas.addAll(dirInfo.getCurrentDirectories());

http://git-wip-us.apache.org/repos/asf/hive/blob/f4352e53/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/io/FileOperations.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/io/FileOperations.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/io/FileOperations.java
index b3e76b6..085f4a1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/io/FileOperations.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/io/FileOperations.java
@@ -17,6 +17,16 @@
  */
 package org.apache.hadoop.hive.ql.parse.repl.dump.io;
 
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import javax.security.auth.login.LoginException;
+
+import org.apache.curator.shaded.com.google.common.collect.Lists;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -25,7 +35,7 @@ import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.ReplChangeManager;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
-import org.apache.hadoop.hive.ql.io.AcidUtils.ParsedDelta;
+import org.apache.hadoop.hive.ql.io.HiveInputFormat;
 import org.apache.hadoop.hive.ql.parse.EximUtil;
 import org.apache.hadoop.hive.ql.parse.LoadSemanticAnalyzer;
 import org.apache.hadoop.hive.ql.parse.ReplicationSpec;
@@ -35,14 +45,6 @@ import org.apache.hadoop.hive.ql.plan.ExportWork.MmContext;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import javax.security.auth.login.LoginException;
-
-import java.io.BufferedWriter;
-import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.util.ArrayList;
-import java.util.List;
-
 //TODO: this object is created once to call one method and then immediately destroyed.
 //So it's basically just a roundabout way to pass arguments to a static method. Simplify?
 public class FileOperations {
@@ -101,45 +103,48 @@ public class FileOperations {
   }
 
   private void copyMmPath() throws LoginException, IOException {
-    assert dataPathList.size() == 1;
     ValidWriteIdList ids = AcidUtils.getTableValidWriteIdList(hiveConf, mmCtx.getFqTableName());
-    Path fromPath = dataFileSystem.makeQualified(dataPathList.get(0));
-    List<Path> validPaths = getMmValidPaths(ids, fromPath);
-    String fromPathStr = fromPath.toString();
-    if (!fromPathStr.endsWith(Path.SEPARATOR)) {
-       fromPathStr += Path.SEPARATOR;
-    }
-    for (Path validPath : validPaths) {
-      // Export valid directories with a modified name so they don't look like bases/deltas.
-      // We could also dump the delta contents all together and rename the files if names collide.
-      String mmChildPath = "export_old_" + validPath.toString().substring(fromPathStr.length());
-      Path destPath = new Path(exportRootDataDir, mmChildPath);
-      exportFileSystem.mkdirs(destPath);
-      copyOneDataPath(validPath, destPath);
+    for (Path fromPath : dataPathList) {
+      fromPath = dataFileSystem.makeQualified(fromPath);
+      List<Path> validPaths = new ArrayList<>(), dirsWithOriginals = new ArrayList<>();
+      HiveInputFormat.processPathsForMmRead(dataPathList,
+          hiveConf, ids, validPaths, dirsWithOriginals);
+      String fromPathStr = fromPath.toString();
+      if (!fromPathStr.endsWith(Path.SEPARATOR)) {
+         fromPathStr += Path.SEPARATOR;
+      }
+      for (Path validPath : validPaths) {
+        // Export valid directories with a modified name so they don't look like bases/deltas.
+        // We could also dump the delta contents all together and rename the files if names collide.
+        String mmChildPath = "export_old_" + validPath.toString().substring(fromPathStr.length());
+        Path destPath = new Path(exportRootDataDir, mmChildPath);
+        Utilities.FILE_OP_LOGGER.debug("Exporting {} to {}", validPath, destPath);
+        exportFileSystem.mkdirs(destPath);
+        copyOneDataPath(validPath, destPath);
+      }
+      for (Path dirWithOriginals : dirsWithOriginals) {
+        FileStatus[] files = dataFileSystem.listStatus(dirWithOriginals, AcidUtils.hiddenFileFilter);
+        List<Path> srcPaths = new ArrayList<>();
+        for (FileStatus fileStatus : files) {
+          if (fileStatus.isDirectory()) continue;
+          srcPaths.add(fileStatus.getPath());
+        }
+        Utilities.FILE_OP_LOGGER.debug("Exporting originals from {} to {}",
+            dirWithOriginals, exportRootDataDir);
+        new CopyUtils(distCpDoAsUser, hiveConf).doCopy(exportRootDataDir, srcPaths);
+      }
     }
   }
 
-  private List<Path> getMmValidPaths(ValidWriteIdList ids, Path fromPath) throws IOException {
-    Utilities.FILE_OP_LOGGER.trace("Looking for valid MM paths under {}", fromPath);
-    AcidUtils.Directory acidState = AcidUtils.getAcidState(fromPath, hiveConf, ids);
-    List<Path> validPaths = new ArrayList<>();
-    Path base = acidState.getBaseDirectory();
-    if (base != null) {
-      validPaths.add(base);
-    }
-    for (ParsedDelta pd : acidState.getCurrentDirectories()) {
-      validPaths.add(pd.getPath());
-    }
-    return validPaths;
-  }
+
 
   /**
    * This needs the root data directory to which the data needs to be exported to.
    * The data export here is a list of files either in table/partition that are written to the _files
-   * in the exportRootDataDir provided. In case of MM/ACID tables, we expect this pathlist to be
-   * already passed as valid paths by caller based on ValidWriteIdList. So, mmCtx is ignored here.
+   * in the exportRootDataDir provided.
    */
   private void exportFilesAsList() throws SemanticException, IOException {
+    // This is only called for replication that handles MM tables; no need for mmCtx.
     try (BufferedWriter writer = writer()) {
       for (Path dataPath : dataPathList) {
         writeFilesList(listFilesInDir(dataPath), writer, AcidUtils.getAcidSubDir(dataPath));

http://git-wip-us.apache.org/repos/asf/hive/blob/f4352e53/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java
index 80f77b9..e77fc3e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java
@@ -186,7 +186,7 @@ public class ConditionalResolverMergeFiles implements ConditionalResolver,
           }
         }
       } else {
-        Utilities.FILE_OP_LOGGER.info("Resolver returning movetask for " + dirPath);
+        Utilities.FILE_OP_LOGGER.info("Resolver returning movetask for " + dirPath, new Exception());
         resTsks.add(mvTask);
       }
     } catch (IOException e) {

http://git-wip-us.apache.org/repos/asf/hive/blob/f4352e53/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
index b698c84..982b180 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
@@ -337,9 +337,11 @@ public class CompactorMR {
     }
 
     int deltaCount = dir.getCurrentDirectories().size();
-    if ((deltaCount + (dir.getBaseDirectory() == null ? 0 : 1)) <= 1) {
+    int origCount = dir.getOriginalFiles().size();
+    if ((deltaCount + (dir.getBaseDirectory() == null ? 0 : 1)) + origCount <= 1) {
       LOG.debug("Not compacting " + sd.getLocation() + "; current base is "
-        + dir.getBaseDirectory() + " and there are " + deltaCount + " deltas");
+        + dir.getBaseDirectory() + " and there are " + deltaCount + " deltas and "
+        + origCount + " originals");
       return;
     }
     try {
@@ -355,7 +357,8 @@ public class CompactorMR {
 
       // Note: we could skip creating the table and just add table type stuff directly to the
       //       "insert overwrite directory" command if there were no bucketing or list bucketing.
-      String tmpPrefix = t.getDbName() + ".tmp_compactor_" + t.getTableName() + "_", tmpTableName;
+      String tmpPrefix = t.getDbName() + ".tmp_compactor_" + t.getTableName() + "_";
+      String tmpTableName = null;
       while (true) {
         tmpTableName = tmpPrefix + System.currentTimeMillis();
         String query = buildMmCompactionCtQuery(tmpTableName, t,

http://git-wip-us.apache.org/repos/asf/hive/blob/f4352e53/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
index 3e2784b..a4d34a7 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
@@ -198,7 +198,7 @@ public class TestTxnCommands extends TxnCommandsBaseForTests {
         String.format("select a,b from %s order by a,b", importName));
     Assert.assertEquals("After import: " + rs, allData, rs);
     runStatementOnDriver("drop table if exists " + importName);
-    
+
     // Do insert overwrite to create some invalid deltas, and import into a non-MM table.
     int[][] rows2 = {{5,6},{7,8}};
     runStatementOnDriver(String.format("insert overwrite table %s %s",
@@ -259,7 +259,7 @@ public class TestTxnCommands extends TxnCommandsBaseForTests {
     return paths;
   }
 
-  
+
   /**
    * add tests for all transitions - AC=t, AC=t, AC=f, commit (for example)
    * @throws Exception

http://git-wip-us.apache.org/repos/asf/hive/blob/f4352e53/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java
index 8a55d8c..7319ba0 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java
@@ -35,6 +35,7 @@ import org.junit.Before;
 import org.junit.Rule;
 import org.junit.rules.TestName;
 import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.File;
 import java.util.ArrayList;
@@ -43,6 +44,7 @@ import java.util.List;
 import java.util.Set;
 
 public abstract class TxnCommandsBaseForTests {
+  private static final Logger LOG = LoggerFactory.getLogger(TxnCommandsBaseForTests.class);
   //bucket count for test tables; set it to 1 for easier debugging
   final static int BUCKET_COUNT = 2;
   @Rule
@@ -95,6 +97,7 @@ public abstract class TxnCommandsBaseForTests {
         "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory");
     hiveConf.setBoolVar(HiveConf.ConfVars.MERGE_CARDINALITY_VIOLATION_CHECK, true);
     hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSCOLAUTOGATHER, false);
+    hiveConf.setBoolean("mapred.input.dir.recursive", true);
     TxnDbUtil.setConfValues(hiveConf);
     TxnDbUtil.prepDb(hiveConf);
     File f = new File(getWarehouseDir());
@@ -159,6 +162,7 @@ public abstract class TxnCommandsBaseForTests {
   }
 
   List<String> runStatementOnDriver(String stmt) throws Exception {
+    LOG.info("Running the query: " + stmt);
     CommandProcessorResponse cpr = d.run(stmt);
     if(cpr.getResponseCode() != 0) {
       throw new RuntimeException(stmt + " failed: " + cpr);

http://git-wip-us.apache.org/repos/asf/hive/blob/f4352e53/ql/src/test/queries/clientpositive/mm_conversions.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/mm_conversions.q b/ql/src/test/queries/clientpositive/mm_conversions.q
index 55565a9..e933582 100644
--- a/ql/src/test/queries/clientpositive/mm_conversions.q
+++ b/ql/src/test/queries/clientpositive/mm_conversions.q
@@ -15,21 +15,42 @@ insert into table intermediate partition(p='455') select distinct key from src w
 insert into table intermediate partition(p='456') select distinct key from src where key is not null order by key asc limit 1;
 insert into table intermediate partition(p='457') select distinct key from src where key >= 100 order by key asc limit 1;
 
+set hive.mm.allow.originals=true;
+set hive.exim.test.mode=true;
 
 drop table simple_to_mm;
-create table simple_to_mm(key int) stored as orc;
+create table simple_to_mm(key int) stored as orc tblproperties("transactional"="false");
 insert into table simple_to_mm select key from intermediate;
 select * from simple_to_mm s1 order by key;
 alter table simple_to_mm set tblproperties("transactional"="true", "transactional_properties"="insert_only");
+export table simple_to_mm to 'ql/test/data/exports/export0';
 select * from simple_to_mm s2 order by key;
+create table import_converted0_mm(key int) stored as orc tblproperties("transactional"="false");
+import table import_converted0_mm from 'ql/test/data/exports/export0';
+select * from import_converted0_mm order by key;
+drop table import_converted0_mm;
+
 insert into table simple_to_mm select key from intermediate;
 insert into table simple_to_mm select key from intermediate;
+export table simple_to_mm to 'ql/test/data/exports/export1';
 select * from simple_to_mm s3 order by key;
+create table import_converted1_mm(key int) stored as orc tblproperties("transactional"="false");
+import table import_converted1_mm from 'ql/test/data/exports/export1';
+select * from import_converted1_mm order by key;
+drop table import_converted1_mm;
+
+insert overwrite table simple_to_mm select key from intermediate;
+export table simple_to_mm to 'ql/test/data/exports/export2';
+select * from simple_to_mm s4 order by key;
+create table import_converted2_mm(key int) stored as orc tblproperties("transactional"="false");
+import table import_converted2_mm from 'ql/test/data/exports/export2';
+select * from import_converted2_mm order by key;
+drop table import_converted2_mm;
 drop table simple_to_mm;
 
 
 drop table part_to_mm;
-create table part_to_mm(key int) partitioned by (key_mm int) stored as orc;
+create table part_to_mm(key int) partitioned by (key_mm int) stored as orc tblproperties("transactional"="false");
 insert into table part_to_mm partition(key_mm='455') select key from intermediate;
 insert into table part_to_mm partition(key_mm='456') select key from intermediate;
 select * from part_to_mm s1 order by key, key_mm;
@@ -40,4 +61,19 @@ insert into table part_to_mm partition(key_mm='457') select key from intermediat
 select * from part_to_mm s3 order by key, key_mm;
 drop table part_to_mm;
 
+set hive.mm.allow.originals=false;
+
+drop table simple_to_mm_text;
+create table simple_to_mm_text(key int) stored as textfile tblproperties("transactional"="false");
+insert into table simple_to_mm_text select key from intermediate;
+select * from simple_to_mm_text t1 order by key;
+alter table simple_to_mm_text set tblproperties("transactional"="true", "transactional_properties"="insert_only");
+select * from simple_to_mm_text t2 order by key;
+insert into table simple_to_mm_text select key from intermediate;
+insert into table simple_to_mm_text select key from intermediate;
+select * from simple_to_mm_text t3 order by key;
+insert overwrite table simple_to_mm_text select key from intermediate;
+select * from simple_to_mm_text t4 order by key;
+drop table simple_to_mm_text;
+
 drop table intermediate;

http://git-wip-us.apache.org/repos/asf/hive/blob/f4352e53/ql/src/test/results/clientpositive/llap/mm_conversions.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/mm_conversions.q.out b/ql/src/test/results/clientpositive/llap/mm_conversions.q.out
index 4754710..8a9036a 100644
--- a/ql/src/test/results/clientpositive/llap/mm_conversions.q.out
+++ b/ql/src/test/results/clientpositive/llap/mm_conversions.q.out
@@ -41,11 +41,11 @@ PREHOOK: query: drop table simple_to_mm
 PREHOOK: type: DROPTABLE
 POSTHOOK: query: drop table simple_to_mm
 POSTHOOK: type: DROPTABLE
-PREHOOK: query: create table simple_to_mm(key int) stored as orc
+PREHOOK: query: create table simple_to_mm(key int) stored as orc tblproperties("transactional"="false")
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
 PREHOOK: Output: default@simple_to_mm
-POSTHOOK: query: create table simple_to_mm(key int) stored as orc
+POSTHOOK: query: create table simple_to_mm(key int) stored as orc tblproperties("transactional"="false")
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@simple_to_mm
@@ -83,6 +83,14 @@ POSTHOOK: query: alter table simple_to_mm set tblproperties("transactional"="tru
 POSTHOOK: type: ALTERTABLE_PROPERTIES
 POSTHOOK: Input: default@simple_to_mm
 POSTHOOK: Output: default@simple_to_mm
+PREHOOK: query: export table simple_to_mm to 'ql/test/data/exports/export0'
+PREHOOK: type: EXPORT
+PREHOOK: Input: default@simple_to_mm
+#### A masked pattern was here ####
+POSTHOOK: query: export table simple_to_mm to 'ql/test/data/exports/export0'
+POSTHOOK: type: EXPORT
+POSTHOOK: Input: default@simple_to_mm
+#### A masked pattern was here ####
 PREHOOK: query: select * from simple_to_mm s2 order by key
 PREHOOK: type: QUERY
 PREHOOK: Input: default@simple_to_mm
@@ -94,6 +102,41 @@ POSTHOOK: Input: default@simple_to_mm
 0
 98
 100
+PREHOOK: query: create table import_converted0_mm(key int) stored as orc tblproperties("transactional"="false")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@import_converted0_mm
+POSTHOOK: query: create table import_converted0_mm(key int) stored as orc tblproperties("transactional"="false")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@import_converted0_mm
+PREHOOK: query: import table import_converted0_mm from 'ql/test/data/exports/export0'
+PREHOOK: type: IMPORT
+#### A masked pattern was here ####
+PREHOOK: Output: default@import_converted0_mm
+POSTHOOK: query: import table import_converted0_mm from 'ql/test/data/exports/export0'
+POSTHOOK: type: IMPORT
+#### A masked pattern was here ####
+POSTHOOK: Output: default@import_converted0_mm
+PREHOOK: query: select * from import_converted0_mm order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@import_converted0_mm
+#### A masked pattern was here ####
+POSTHOOK: query: select * from import_converted0_mm order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@import_converted0_mm
+#### A masked pattern was here ####
+0
+98
+100
+PREHOOK: query: drop table import_converted0_mm
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@import_converted0_mm
+PREHOOK: Output: default@import_converted0_mm
+POSTHOOK: query: drop table import_converted0_mm
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@import_converted0_mm
+POSTHOOK: Output: default@import_converted0_mm
 PREHOOK: query: insert into table simple_to_mm select key from intermediate
 PREHOOK: type: QUERY
 PREHOOK: Input: default@intermediate
@@ -124,6 +167,14 @@ POSTHOOK: Input: default@intermediate@p=456
 POSTHOOK: Input: default@intermediate@p=457
 POSTHOOK: Output: default@simple_to_mm
 POSTHOOK: Lineage: simple_to_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: export table simple_to_mm to 'ql/test/data/exports/export1'
+PREHOOK: type: EXPORT
+PREHOOK: Input: default@simple_to_mm
+#### A masked pattern was here ####
+POSTHOOK: query: export table simple_to_mm to 'ql/test/data/exports/export1'
+POSTHOOK: type: EXPORT
+POSTHOOK: Input: default@simple_to_mm
+#### A masked pattern was here ####
 PREHOOK: query: select * from simple_to_mm s3 order by key
 PREHOOK: type: QUERY
 PREHOOK: Input: default@simple_to_mm
@@ -141,6 +192,116 @@ POSTHOOK: Input: default@simple_to_mm
 100
 100
 100
+PREHOOK: query: create table import_converted1_mm(key int) stored as orc tblproperties("transactional"="false")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@import_converted1_mm
+POSTHOOK: query: create table import_converted1_mm(key int) stored as orc tblproperties("transactional"="false")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@import_converted1_mm
+PREHOOK: query: import table import_converted1_mm from 'ql/test/data/exports/export1'
+PREHOOK: type: IMPORT
+#### A masked pattern was here ####
+PREHOOK: Output: default@import_converted1_mm
+POSTHOOK: query: import table import_converted1_mm from 'ql/test/data/exports/export1'
+POSTHOOK: type: IMPORT
+#### A masked pattern was here ####
+POSTHOOK: Output: default@import_converted1_mm
+PREHOOK: query: select * from import_converted1_mm order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@import_converted1_mm
+#### A masked pattern was here ####
+POSTHOOK: query: select * from import_converted1_mm order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@import_converted1_mm
+#### A masked pattern was here ####
+0
+0
+0
+98
+98
+98
+100
+100
+100
+PREHOOK: query: drop table import_converted1_mm
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@import_converted1_mm
+PREHOOK: Output: default@import_converted1_mm
+POSTHOOK: query: drop table import_converted1_mm
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@import_converted1_mm
+POSTHOOK: Output: default@import_converted1_mm
+PREHOOK: query: insert overwrite table simple_to_mm select key from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@simple_to_mm
+POSTHOOK: query: insert overwrite table simple_to_mm select key from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@simple_to_mm
+POSTHOOK: Lineage: simple_to_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: export table simple_to_mm to 'ql/test/data/exports/export2'
+PREHOOK: type: EXPORT
+PREHOOK: Input: default@simple_to_mm
+#### A masked pattern was here ####
+POSTHOOK: query: export table simple_to_mm to 'ql/test/data/exports/export2'
+POSTHOOK: type: EXPORT
+POSTHOOK: Input: default@simple_to_mm
+#### A masked pattern was here ####
+PREHOOK: query: select * from simple_to_mm s4 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@simple_to_mm
+#### A masked pattern was here ####
+POSTHOOK: query: select * from simple_to_mm s4 order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@simple_to_mm
+#### A masked pattern was here ####
+0
+98
+100
+PREHOOK: query: create table import_converted2_mm(key int) stored as orc tblproperties("transactional"="false")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@import_converted2_mm
+POSTHOOK: query: create table import_converted2_mm(key int) stored as orc tblproperties("transactional"="false")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@import_converted2_mm
+PREHOOK: query: import table import_converted2_mm from 'ql/test/data/exports/export2'
+PREHOOK: type: IMPORT
+#### A masked pattern was here ####
+PREHOOK: Output: default@import_converted2_mm
+POSTHOOK: query: import table import_converted2_mm from 'ql/test/data/exports/export2'
+POSTHOOK: type: IMPORT
+#### A masked pattern was here ####
+POSTHOOK: Output: default@import_converted2_mm
+PREHOOK: query: select * from import_converted2_mm order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@import_converted2_mm
+#### A masked pattern was here ####
+POSTHOOK: query: select * from import_converted2_mm order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@import_converted2_mm
+#### A masked pattern was here ####
+0
+98
+100
+PREHOOK: query: drop table import_converted2_mm
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@import_converted2_mm
+PREHOOK: Output: default@import_converted2_mm
+POSTHOOK: query: drop table import_converted2_mm
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@import_converted2_mm
+POSTHOOK: Output: default@import_converted2_mm
 PREHOOK: query: drop table simple_to_mm
 PREHOOK: type: DROPTABLE
 PREHOOK: Input: default@simple_to_mm
@@ -153,11 +314,11 @@ PREHOOK: query: drop table part_to_mm
 PREHOOK: type: DROPTABLE
 POSTHOOK: query: drop table part_to_mm
 POSTHOOK: type: DROPTABLE
-PREHOOK: query: create table part_to_mm(key int) partitioned by (key_mm int) stored as orc
+PREHOOK: query: create table part_to_mm(key int) partitioned by (key_mm int) stored as orc tblproperties("transactional"="false")
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
 PREHOOK: Output: default@part_to_mm
-POSTHOOK: query: create table part_to_mm(key int) partitioned by (key_mm int) stored as orc
+POSTHOOK: query: create table part_to_mm(key int) partitioned by (key_mm int) stored as orc tblproperties("transactional"="false")
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@part_to_mm
@@ -299,6 +460,144 @@ POSTHOOK: query: drop table part_to_mm
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Input: default@part_to_mm
 POSTHOOK: Output: default@part_to_mm
+PREHOOK: query: drop table simple_to_mm_text
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table simple_to_mm_text
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table simple_to_mm_text(key int) stored as textfile tblproperties("transactional"="false")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@simple_to_mm_text
+POSTHOOK: query: create table simple_to_mm_text(key int) stored as textfile tblproperties("transactional"="false")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@simple_to_mm_text
+PREHOOK: query: insert into table simple_to_mm_text select key from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@simple_to_mm_text
+POSTHOOK: query: insert into table simple_to_mm_text select key from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@simple_to_mm_text
+POSTHOOK: Lineage: simple_to_mm_text.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from simple_to_mm_text t1 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@simple_to_mm_text
+#### A masked pattern was here ####
+POSTHOOK: query: select * from simple_to_mm_text t1 order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@simple_to_mm_text
+#### A masked pattern was here ####
+0
+98
+100
+PREHOOK: query: alter table simple_to_mm_text set tblproperties("transactional"="true", "transactional_properties"="insert_only")
+PREHOOK: type: ALTERTABLE_PROPERTIES
+PREHOOK: Input: default@simple_to_mm_text
+PREHOOK: Output: default@simple_to_mm_text
+POSTHOOK: query: alter table simple_to_mm_text set tblproperties("transactional"="true", "transactional_properties"="insert_only")
+POSTHOOK: type: ALTERTABLE_PROPERTIES
+POSTHOOK: Input: default@simple_to_mm_text
+POSTHOOK: Output: default@simple_to_mm_text
+PREHOOK: query: select * from simple_to_mm_text t2 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@simple_to_mm_text
+#### A masked pattern was here ####
+POSTHOOK: query: select * from simple_to_mm_text t2 order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@simple_to_mm_text
+#### A masked pattern was here ####
+0
+98
+100
+PREHOOK: query: insert into table simple_to_mm_text select key from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@simple_to_mm_text
+POSTHOOK: query: insert into table simple_to_mm_text select key from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@simple_to_mm_text
+POSTHOOK: Lineage: simple_to_mm_text.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: insert into table simple_to_mm_text select key from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@simple_to_mm_text
+POSTHOOK: query: insert into table simple_to_mm_text select key from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@simple_to_mm_text
+POSTHOOK: Lineage: simple_to_mm_text.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from simple_to_mm_text t3 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@simple_to_mm_text
+#### A masked pattern was here ####
+POSTHOOK: query: select * from simple_to_mm_text t3 order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@simple_to_mm_text
+#### A masked pattern was here ####
+0
+0
+0
+98
+98
+98
+100
+100
+100
+PREHOOK: query: insert overwrite table simple_to_mm_text select key from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@simple_to_mm_text
+POSTHOOK: query: insert overwrite table simple_to_mm_text select key from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@simple_to_mm_text
+POSTHOOK: Lineage: simple_to_mm_text.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from simple_to_mm_text t4 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@simple_to_mm_text
+#### A masked pattern was here ####
+POSTHOOK: query: select * from simple_to_mm_text t4 order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@simple_to_mm_text
+#### A masked pattern was here ####
+0
+98
+100
+PREHOOK: query: drop table simple_to_mm_text
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@simple_to_mm_text
+PREHOOK: Output: default@simple_to_mm_text
+POSTHOOK: query: drop table simple_to_mm_text
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@simple_to_mm_text
+POSTHOOK: Output: default@simple_to_mm_text
 PREHOOK: query: drop table intermediate
 PREHOOK: type: DROPTABLE
 PREHOOK: Input: default@intermediate

[5/5] hive git commit: HIVE-19639 : a transactional Hive table cannot be imported as an external table (Sergey Shelukhin, reviewed by Thejas M Nair)

Posted by se...@apache.org.

HIVE-19639 : a transactional Hive table cannot be imported as an external table (Sergey Shelukhin, reviewed by Thejas M Nair)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ffb7e043
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ffb7e043
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ffb7e043

Branch: refs/heads/master
Commit: ffb7e043ee15f90faf5271f9117ceaf688967bc5
Parents: 5df1eb3
Author: sergey <se...@apache.org>
Authored: Tue May 22 12:08:50 2018 -0700
Committer: sergey <se...@apache.org>
Committed: Tue May 22 12:14:12 2018 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/ql/io/AcidUtils.java |  5 ++
 .../hive/ql/parse/ImportSemanticAnalyzer.java   | 31 +++++----
 ql/src/test/queries/clientpositive/mm_exim.q    |  8 +++
 .../results/clientpositive/llap/mm_exim.q.out   | 70 ++++++++++++++++++++
 4 files changed, 102 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/ffb7e043/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
index 10f7bd2..d84d0ee 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
@@ -1962,4 +1962,9 @@ public class AcidUtils {
     }
     return writeId;
   }
+
+  public static void setNonTransactional(Map<String, String> tblProps) {
+    tblProps.put(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, "false");
+    tblProps.remove(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES);
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb7e043/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java
index 682b641..cc7f0d5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java
@@ -255,16 +255,13 @@ public class ImportSemanticAnalyzer extends BaseSemanticAnalyzer {
     }
 
     if (isExternalSet) {
-      if (AcidUtils.isInsertOnlyTable(tblDesc.getTblProps())) {
-        throw new SemanticException("Cannot import an MM table as external");
-      }
       tblDesc.setExternal(isExternalSet);
       // This condition-check could have been avoided, but to honour the old
       // default of not calling if it wasn't set, we retain that behaviour.
       // TODO:cleanup after verification that the outer if isn't really needed here
     }
 
-    if (isLocationSet){
+    if (isLocationSet) {
       tblDesc.setLocation(parsedLocation);
       x.getInputs().add(toReadEntity(new Path(parsedLocation), x.getConf()));
     }
@@ -320,11 +317,16 @@ public class ImportSemanticAnalyzer extends BaseSemanticAnalyzer {
     boolean tableExists = false;
 
     if (table != null) {
-      checkTable(table, tblDesc,replicationSpec, x.getConf());
+      checkTable(table, tblDesc, replicationSpec, x.getConf());
       x.getLOG().debug("table " + tblDesc.getTableName() + " exists: metadata checked");
       tableExists = true;
     }
 
+    if (!tableExists && isExternalSet) {
+      // If the user is explicitly importing a new external table, clear txn flags from the spec.
+      AcidUtils.setNonTransactional(tblDesc.getTblProps());
+    }
+
     Long writeId = 0L; // Initialize with 0 for non-ACID and non-MM tables.
     int stmtId = 0;
     if (!replicationSpec.isInReplicationScope()
@@ -855,7 +857,7 @@ public class ImportSemanticAnalyzer extends BaseSemanticAnalyzer {
       x.getLOG().debug("table " + tblDesc.getTableName() + " does not exist");
 
       Task<?> t = createTableTask(tblDesc, x);
-      table = createNewTableMetadataObject(tblDesc);
+      table = createNewTableMetadataObject(tblDesc, false);
 
       Database parentDb = x.getHive().getDatabase(tblDesc.getDatabaseName());
 
@@ -891,14 +893,19 @@ public class ImportSemanticAnalyzer extends BaseSemanticAnalyzer {
     }
   }
 
-  private static Table createNewTableMetadataObject(ImportTableDesc tblDesc)
+  private static Table createNewTableMetadataObject(ImportTableDesc tblDesc, boolean isRepl)
       throws SemanticException {
     Table newTable = new Table(tblDesc.getDatabaseName(), tblDesc.getTableName());
     //so that we know the type of table we are creating: acid/MM to match what was exported
     newTable.setParameters(tblDesc.getTblProps());
     if(tblDesc.isExternal() && AcidUtils.isTransactionalTable(newTable)) {
-      throw new SemanticException("External tables may not be transactional: " +
-          Warehouse.getQualifiedName(tblDesc.getDatabaseName(), tblDesc.getTableName()));
+      if (isRepl) {
+        throw new SemanticException("External tables may not be transactional: " +
+            Warehouse.getQualifiedName(tblDesc.getDatabaseName(), tblDesc.getTableName()));
+      } else {
+        throw new AssertionError("Internal error: transactional properties not set properly"
+            + tblDesc.getTblProps());
+      }
     }
     return newTable;
   }
@@ -992,7 +999,7 @@ public class ImportSemanticAnalyzer extends BaseSemanticAnalyzer {
       }
 
       Task t = createTableTask(tblDesc, x);
-      table = createNewTableMetadataObject(tblDesc);
+      table = createNewTableMetadataObject(tblDesc, true);
 
       if (!replicationSpec.isMetadataOnly()) {
         if (isPartitioned(tblDesc)) {
@@ -1025,7 +1032,7 @@ public class ImportSemanticAnalyzer extends BaseSemanticAnalyzer {
       // create the dummy table object for adding repl tasks.
       boolean isOldTableValid = true;
       if (table.isPartitioned() != isPartitioned(tblDesc)) {
-        table = createNewTableMetadataObject(tblDesc);
+        table = createNewTableMetadataObject(tblDesc, true);
         isOldTableValid = false;
       }
 
@@ -1043,7 +1050,7 @@ public class ImportSemanticAnalyzer extends BaseSemanticAnalyzer {
               ptn = x.getHive().getPartition(table, partSpec, false);
             } catch (HiveException ex) {
               ptn = null;
-              table = createNewTableMetadataObject(tblDesc);
+              table = createNewTableMetadataObject(tblDesc, true);
               isOldTableValid = false;
             }
           }

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb7e043/ql/src/test/queries/clientpositive/mm_exim.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/mm_exim.q b/ql/src/test/queries/clientpositive/mm_exim.q
index 9870bf4..13b3055 100644
--- a/ql/src/test/queries/clientpositive/mm_exim.q
+++ b/ql/src/test/queries/clientpositive/mm_exim.q
@@ -97,4 +97,12 @@ import table import7_mm from 'ql/test/data/exports/intermmediate_part';
 select * from import7_mm order by key, p;
 drop table import7_mm;
 
+-- import MM as external
+
+drop table import8_mm;
+import external table import8_mm from 'ql/test/data/exports/intermmediate_nonpart';
+desc formatted import8_mm;
+select * from import8_mm order by key, p;
+drop table import8_mm;
+
 set hive.exim.test.mode=false;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/ffb7e043/ql/src/test/results/clientpositive/llap/mm_exim.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/mm_exim.q.out b/ql/src/test/results/clientpositive/llap/mm_exim.q.out
index 001cd05..8dfc738 100644
--- a/ql/src/test/results/clientpositive/llap/mm_exim.q.out
+++ b/ql/src/test/results/clientpositive/llap/mm_exim.q.out
@@ -610,3 +610,73 @@ POSTHOOK: query: drop table import7_mm
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Input: default@import7_mm
 POSTHOOK: Output: default@import7_mm
+PREHOOK: query: drop table import8_mm
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table import8_mm
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: import external table import8_mm from 'ql/test/data/exports/intermmediate_nonpart'
+PREHOOK: type: IMPORT
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+POSTHOOK: query: import external table import8_mm from 'ql/test/data/exports/intermmediate_nonpart'
+POSTHOOK: type: IMPORT
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@import8_mm
+PREHOOK: query: desc formatted import8_mm
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@import8_mm
+POSTHOOK: query: desc formatted import8_mm
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@import8_mm
+# col_name            	data_type           	comment             
+key                 	int                 	                    
+p                   	int                 	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	EXTERNAL_TABLE      	 
+Table Parameters:	 	 
+	EXTERNAL            	TRUE                
+	bucketing_version   	2                   
+	numFiles            	3                   
+	numRows             	6                   
+	rawDataSize         	37                  
+	totalSize           	43                  
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: select * from import8_mm order by key, p
+PREHOOK: type: QUERY
+PREHOOK: Input: default@import8_mm
+#### A masked pattern was here ####
+POSTHOOK: query: select * from import8_mm order by key, p
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@import8_mm
+#### A masked pattern was here ####
+0	456
+10	456
+97	455
+98	455
+100	457
+103	457
+PREHOOK: query: drop table import8_mm
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@import8_mm
+PREHOOK: Output: default@import8_mm
+POSTHOOK: query: drop table import8_mm
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@import8_mm
+POSTHOOK: Output: default@import8_mm

[3/5] hive git commit: HIVE-19579 : remove HBase transitive dependency that drags in some snapshot (Sergey Shelukhin, reviewed by Alan Gates)

Posted by se...@apache.org.

HIVE-19579 :  remove HBase transitive dependency that drags in some snapshot (Sergey Shelukhin, reviewed by Alan Gates)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/37e86e95
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/37e86e95
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/37e86e95

Branch: refs/heads/master
Commit: 37e86e9581dfc02aea243eab05c8a15347f638d6
Parents: df12aec
Author: sergey <se...@apache.org>
Authored: Tue May 22 12:01:29 2018 -0700
Committer: sergey <se...@apache.org>
Committed: Tue May 22 12:14:11 2018 -0700

----------------------------------------------------------------------
 hbase-handler/pom.xml            |  8 ++++++++
 itests/hcatalog-unit/pom.xml     |  6 ++++++
 itests/hive-minikdc/pom.xml      |  6 ++++++
 itests/hive-unit-hadoop2/pom.xml |  6 ++++++
 itests/hive-unit/pom.xml         |  6 ++++++
 itests/qtest-accumulo/pom.xml    |  6 ++++++
 itests/qtest-spark/pom.xml       |  6 ++++++
 itests/qtest/pom.xml             |  6 ++++++
 itests/util/pom.xml              |  6 ++++++
 llap-server/pom.xml              | 10 ++++++++++
 pom.xml                          |  6 ++++++
 11 files changed, 72 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/37e86e95/hbase-handler/pom.xml
----------------------------------------------------------------------
diff --git a/hbase-handler/pom.xml b/hbase-handler/pom.xml
index 7ab5b61..cc272a7 100644
--- a/hbase-handler/pom.xml
+++ b/hbase-handler/pom.xml
@@ -90,6 +90,10 @@
             <groupId>commmons-logging</groupId>
             <artifactId>commons-logging</artifactId>
           </exclusion>
+          <exclusion>
+            <groupId>org.glassfish.web</groupId>
+            <artifactId>javax.servlet.jsp</artifactId>
+          </exclusion>
       </exclusions>
     </dependency>
     <dependency>
@@ -155,6 +159,10 @@
             <groupId>commmons-logging</groupId>
             <artifactId>commons-logging</artifactId>
           </exclusion>
+          <exclusion>
+            <groupId>org.glassfish.web</groupId>
+            <artifactId>javax.servlet.jsp</artifactId>
+          </exclusion>
       </exclusions>
     </dependency>
     <dependency>

http://git-wip-us.apache.org/repos/asf/hive/blob/37e86e95/itests/hcatalog-unit/pom.xml
----------------------------------------------------------------------
diff --git a/itests/hcatalog-unit/pom.xml b/itests/hcatalog-unit/pom.xml
index 628902b..91c2b90 100644
--- a/itests/hcatalog-unit/pom.xml
+++ b/itests/hcatalog-unit/pom.xml
@@ -280,6 +280,12 @@
       <artifactId>hbase-server</artifactId>
       <version>${hbase.version}</version>
       <scope>test</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>org.glassfish.web</groupId>
+            <artifactId>javax.servlet.jsp</artifactId>
+          </exclusion>
+        </exclusions>
     </dependency>
     <dependency>
       <groupId>org.apache.hbase</groupId>

http://git-wip-us.apache.org/repos/asf/hive/blob/37e86e95/itests/hive-minikdc/pom.xml
----------------------------------------------------------------------
diff --git a/itests/hive-minikdc/pom.xml b/itests/hive-minikdc/pom.xml
index 15b8dfa..38ffde5 100644
--- a/itests/hive-minikdc/pom.xml
+++ b/itests/hive-minikdc/pom.xml
@@ -217,6 +217,12 @@
       <artifactId>hbase-server</artifactId>
       <version>${hbase.version}</version>
       <scope>test</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>org.glassfish.web</groupId>
+            <artifactId>javax.servlet.jsp</artifactId>
+          </exclusion>
+        </exclusions>
     </dependency>
     <dependency>
       <groupId>org.apache.hbase</groupId>

http://git-wip-us.apache.org/repos/asf/hive/blob/37e86e95/itests/hive-unit-hadoop2/pom.xml
----------------------------------------------------------------------
diff --git a/itests/hive-unit-hadoop2/pom.xml b/itests/hive-unit-hadoop2/pom.xml
index 18f1037..1fab838 100644
--- a/itests/hive-unit-hadoop2/pom.xml
+++ b/itests/hive-unit-hadoop2/pom.xml
@@ -221,6 +221,12 @@
       <artifactId>hbase-server</artifactId>
       <version>${hbase.version}</version>
       <scope>test</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>org.glassfish.web</groupId>
+            <artifactId>javax.servlet.jsp</artifactId>
+          </exclusion>
+        </exclusions>
     </dependency>
     <dependency>
       <groupId>org.apache.hbase</groupId>

http://git-wip-us.apache.org/repos/asf/hive/blob/37e86e95/itests/hive-unit/pom.xml
----------------------------------------------------------------------
diff --git a/itests/hive-unit/pom.xml b/itests/hive-unit/pom.xml
index 26e423c..96ae2bb 100644
--- a/itests/hive-unit/pom.xml
+++ b/itests/hive-unit/pom.xml
@@ -272,6 +272,12 @@
       <groupId>org.apache.hbase</groupId>
       <artifactId>hbase-server</artifactId>
       <version>${hbase.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>org.glassfish.web</groupId>
+            <artifactId>javax.servlet.jsp</artifactId>
+          </exclusion>
+        </exclusions>
     </dependency>
     <dependency>
       <groupId>org.apache.hbase</groupId>

http://git-wip-us.apache.org/repos/asf/hive/blob/37e86e95/itests/qtest-accumulo/pom.xml
----------------------------------------------------------------------
diff --git a/itests/qtest-accumulo/pom.xml b/itests/qtest-accumulo/pom.xml
index 30f2225..c872797 100644
--- a/itests/qtest-accumulo/pom.xml
+++ b/itests/qtest-accumulo/pom.xml
@@ -286,6 +286,12 @@
       <artifactId>hbase-server</artifactId>
       <version>${hbase.version}</version>
       <scope>test</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>org.glassfish.web</groupId>
+            <artifactId>javax.servlet.jsp</artifactId>
+          </exclusion>
+        </exclusions>
     </dependency>
     <dependency>
       <groupId>org.apache.htrace</groupId>

http://git-wip-us.apache.org/repos/asf/hive/blob/37e86e95/itests/qtest-spark/pom.xml
----------------------------------------------------------------------
diff --git a/itests/qtest-spark/pom.xml b/itests/qtest-spark/pom.xml
index c550446..7ecabef 100644
--- a/itests/qtest-spark/pom.xml
+++ b/itests/qtest-spark/pom.xml
@@ -318,6 +318,12 @@
       <artifactId>hbase-server</artifactId>
       <version>${hbase.version}</version>
       <scope>test</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>org.glassfish.web</groupId>
+            <artifactId>javax.servlet.jsp</artifactId>
+          </exclusion>
+        </exclusions>
     </dependency>
     <dependency>
       <groupId>org.apache.hbase</groupId>

http://git-wip-us.apache.org/repos/asf/hive/blob/37e86e95/itests/qtest/pom.xml
----------------------------------------------------------------------
diff --git a/itests/qtest/pom.xml b/itests/qtest/pom.xml
index 8b84fa3..0c82c14 100644
--- a/itests/qtest/pom.xml
+++ b/itests/qtest/pom.xml
@@ -319,6 +319,12 @@
       <artifactId>hbase-server</artifactId>
       <version>${hbase.version}</version>
       <scope>test</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>org.glassfish.web</groupId>
+            <artifactId>javax.servlet.jsp</artifactId>
+          </exclusion>
+        </exclusions>
     </dependency>
     <dependency>
       <groupId>org.apache.hbase</groupId>

http://git-wip-us.apache.org/repos/asf/hive/blob/37e86e95/itests/util/pom.xml
----------------------------------------------------------------------
diff --git a/itests/util/pom.xml b/itests/util/pom.xml
index 5f91523..e31e013 100644
--- a/itests/util/pom.xml
+++ b/itests/util/pom.xml
@@ -164,6 +164,12 @@
       <groupId>org.apache.hbase</groupId>
       <artifactId>hbase-server</artifactId>
       <version>${hbase.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>org.glassfish.web</groupId>
+            <artifactId>javax.servlet.jsp</artifactId>
+          </exclusion>
+        </exclusions>
     </dependency>
     <dependency>
       <groupId>org.apache.hbase</groupId>

http://git-wip-us.apache.org/repos/asf/hive/blob/37e86e95/llap-server/pom.xml
----------------------------------------------------------------------
diff --git a/llap-server/pom.xml b/llap-server/pom.xml
index a57835e..f02f022 100644
--- a/llap-server/pom.xml
+++ b/llap-server/pom.xml
@@ -317,6 +317,12 @@
       <groupId>org.apache.hbase</groupId>
       <artifactId>hbase-client</artifactId>
       <version>${hbase.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.glassfish.web</groupId>
+          <artifactId>javax.servlet.jsp</artifactId>
+        </exclusion>
+      </exclusions> 
     </dependency>
     <dependency>
       <groupId>org.apache.hbase</groupId>
@@ -331,6 +337,10 @@
           <groupId>commmons-logging</groupId>
           <artifactId>commons-logging</artifactId>
         </exclusion>
+        <exclusion>
+          <groupId>org.glassfish.web</groupId>
+          <artifactId>javax.servlet.jsp</artifactId>
+        </exclusion>
       </exclusions>
     </dependency>
     <dependency>

http://git-wip-us.apache.org/repos/asf/hive/blob/37e86e95/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 9f64cbf..162cce4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -942,6 +942,12 @@
         <groupId>org.apache.hbase</groupId>
         <artifactId>hbase-server</artifactId>
         <version>${hbase.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>org.glassfish.web</groupId>
+            <artifactId>javax.servlet.jsp</artifactId>
+          </exclusion>
+        </exclusions>
       </dependency>
       <dependency>
         <groupId>org.apache.hbase</groupId>