You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ek...@apache.org on 2018/06/06 17:15:10 UTC

hive git commit: HIVE-19750: Initialize NEXT_WRITE_ID. NWI_NEXT on converting an existing table to full acid (Eugene Koifman, reviewed by Sankar Hariappan)

Repository: hive
Updated Branches:
  refs/heads/master 0992d8292 -> 13fbae573


HIVE-19750: Initialize NEXT_WRITE_ID. NWI_NEXT on converting an existing table to full acid (Eugene Koifman, reviewed by Sankar Hariappan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/13fbae57
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/13fbae57
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/13fbae57

Branch: refs/heads/master
Commit: 13fbae57321f3525cabb326df702430d61c242f9
Parents: 0992d82
Author: Eugene Koifman <ek...@apache.org>
Authored: Wed Jun 6 10:15:04 2018 -0700
Committer: Eugene Koifman <ek...@apache.org>
Committed: Wed Jun 6 10:15:04 2018 -0700

----------------------------------------------------------------------
 .../apache/hadoop/hive/ql/TestTxnCommands.java  | 19 ++--
 .../apache/hadoop/hive/ql/TestTxnCommands2.java | 18 ++--
 .../apache/hadoop/hive/ql/TestTxnLoadData.java  | 39 ++++++---
 .../apache/hadoop/hive/ql/TestTxnNoBuckets.java | 73 ++++++++++------
 .../llap/acid_vectorization_original.q.out      | 14 +--
 .../tez/acid_vectorization_original_tez.q.out   | 14 +--
 .../TransactionalValidationListener.java        | 11 +++
 .../api/InitializeTableWriteIdsRequest.java     | 42 +++++++++
 .../hadoop/hive/metastore/txn/TxnHandler.java   | 92 ++++++++++----------
 .../hadoop/hive/metastore/txn/TxnStore.java     |  6 ++
 10 files changed, 211 insertions(+), 117 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/13fbae57/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
index a4d34a7..cd4b670 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
@@ -897,12 +897,14 @@ public class TestTxnCommands extends TxnCommandsBaseForTests {
             rs.get(2).startsWith("{\"writeid\":0,\"bucketid\":536936448,\"rowid\":2}\t0\t12"));
     Assert.assertTrue(rs.get(2), rs.get(2).endsWith("nonacidorctbl/000001_0_copy_1"));
     Assert.assertTrue(rs.get(3),
-            rs.get(3).startsWith("{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t1\t17"));
-    Assert.assertTrue(rs.get(3), rs.get(3).endsWith("nonacidorctbl/delta_0000001_0000001_0000/bucket_00001"));
+            rs.get(3).startsWith("{\"writeid\":10000001,\"bucketid\":536936448,\"rowid\":0}\t1\t17"));
+    Assert.assertTrue(rs.get(3), rs.get(3)
+        .endsWith("nonacidorctbl/delta_10000001_10000001_0000/bucket_00001"));
     //run Compaction
     runStatementOnDriver("alter table "+ TestTxnCommands2.Table.NONACIDORCTBL +" compact 'major'");
     TestTxnCommands2.runWorker(hiveConf);
-    rs = runStatementOnDriver("select ROW__ID, a, b, INPUT__FILE__NAME from " + Table.NONACIDORCTBL + " order by ROW__ID");
+    rs = runStatementOnDriver("select ROW__ID, a, b, INPUT__FILE__NAME from " +
+        Table.NONACIDORCTBL + " order by ROW__ID");
     LOG.warn("after compact");
     for(String s : rs) {
       LOG.warn(s);
@@ -910,16 +912,17 @@ public class TestTxnCommands extends TxnCommandsBaseForTests {
     Assert.assertEquals("", 4, rs.size());
     Assert.assertTrue(rs.get(0),
             rs.get(0).startsWith("{\"writeid\":0,\"bucketid\":536936448,\"rowid\":0}\t1\t2"));
-    Assert.assertTrue(rs.get(0), rs.get(0).endsWith("nonacidorctbl/base_0000001/bucket_00001"));
+    Assert.assertTrue(rs.get(0), rs.get(0).endsWith("nonacidorctbl/base_10000001/bucket_00001"));
     Assert.assertTrue(rs.get(1),
             rs.get(1).startsWith("{\"writeid\":0,\"bucketid\":536936448,\"rowid\":1}\t1\t5"));
-    Assert.assertTrue(rs.get(1), rs.get(1).endsWith("nonacidorctbl/base_0000001/bucket_00001"));
+    Assert.assertTrue(rs.get(1), rs.get(1).endsWith("nonacidorctbl/base_10000001/bucket_00001"));
     Assert.assertTrue(rs.get(2),
             rs.get(2).startsWith("{\"writeid\":0,\"bucketid\":536936448,\"rowid\":2}\t0\t12"));
-    Assert.assertTrue(rs.get(2), rs.get(2).endsWith("nonacidorctbl/base_0000001/bucket_00001"));
+    Assert.assertTrue(rs.get(2), rs.get(2).endsWith("nonacidorctbl/base_10000001/bucket_00001"));
     Assert.assertTrue(rs.get(3),
-            rs.get(3).startsWith("{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t1\t17"));
-    Assert.assertTrue(rs.get(3), rs.get(3).endsWith("nonacidorctbl/base_0000001/bucket_00001"));
+            rs.get(3)
+                .startsWith("{\"writeid\":10000001,\"bucketid\":536936448,\"rowid\":0}\t1\t17"));
+    Assert.assertTrue(rs.get(3), rs.get(3).endsWith("nonacidorctbl/base_10000001/bucket_00001"));
 
     //make sure they are the same before and after compaction
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/13fbae57/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
index a547a84..7c201b6 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
@@ -379,14 +379,14 @@ public class TestTxnCommands2 {
      */
     String[][] expected = {
         {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":4}\t0\t13",  "bucket_00001"},
-        {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t0\t15", "bucket_00001"},
-        {"{\"writeid\":3,\"bucketid\":536936448,\"rowid\":0}\t0\t17", "bucket_00001"},
-        {"{\"writeid\":2,\"bucketid\":536936448,\"rowid\":0}\t0\t120", "bucket_00001"},
+        {"{\"writeid\":10000001,\"bucketid\":536936448,\"rowid\":1}\t0\t15", "bucket_00001"},
+        {"{\"writeid\":10000003,\"bucketid\":536936448,\"rowid\":0}\t0\t17", "bucket_00001"},
+        {"{\"writeid\":10000002,\"bucketid\":536936448,\"rowid\":0}\t0\t120", "bucket_00001"},
         {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":1}\t1\t2",   "bucket_00001"},
         {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":3}\t1\t4",   "bucket_00001"},
         {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":2}\t1\t5",   "bucket_00001"},
         {"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":6}\t1\t6",   "bucket_00001"},
-        {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t1\t16", "bucket_00001"}
+        {"{\"writeid\":10000001,\"bucketid\":536936448,\"rowid\":0}\t1\t16", "bucket_00001"}
     };
     Assert.assertEquals("Unexpected row count before compaction", expected.length, rs.size());
     for(int i = 0; i < expected.length; i++) {
@@ -773,11 +773,11 @@ public class TestTxnCommands2 {
         FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER);
         Arrays.sort(buckets);
         if (numDelta == 1) {
-          Assert.assertEquals("delta_0000001_0000001_0000", status[i].getPath().getName());
+          Assert.assertEquals("delta_10000001_10000001_0000", status[i].getPath().getName());
           Assert.assertEquals(BUCKET_COUNT - 1, buckets.length);
           Assert.assertEquals("bucket_00001", buckets[0].getPath().getName());
         } else if (numDelta == 2) {
-          Assert.assertEquals("delta_0000002_0000002_0000", status[i].getPath().getName());
+          Assert.assertEquals("delta_10000002_10000002_0000", status[i].getPath().getName());
           Assert.assertEquals(1, buckets.length);
           Assert.assertEquals("bucket_00000", buckets[0].getPath().getName());
         }
@@ -786,7 +786,7 @@ public class TestTxnCommands2 {
         FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER);
         Arrays.sort(buckets);
         if (numDeleteDelta == 1) {
-          Assert.assertEquals("delete_delta_0000001_0000001_0000", status[i].getPath().getName());
+          Assert.assertEquals("delete_delta_10000001_10000001_0000", status[i].getPath().getName());
           Assert.assertEquals(BUCKET_COUNT - 1, buckets.length);
           Assert.assertEquals("bucket_00001", buckets[0].getPath().getName());
         }
@@ -833,7 +833,7 @@ public class TestTxnCommands2 {
           Assert.assertEquals("bucket_00001", buckets[0].getPath().getName());
         } else if (numBase == 2) {
           // The new base dir now has two bucket files, since the delta dir has two bucket files
-          Assert.assertEquals("base_0000002", status[i].getPath().getName());
+          Assert.assertEquals("base_10000002", status[i].getPath().getName());
           Assert.assertEquals(2, buckets.length);
           Assert.assertEquals("bucket_00000", buckets[0].getPath().getName());
         }
@@ -859,7 +859,7 @@ public class TestTxnCommands2 {
     status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" +
       (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.HIDDEN_FILES_PATH_FILTER);
     Assert.assertEquals(1, status.length);
-    Assert.assertEquals("base_0000002", status[0].getPath().getName());
+    Assert.assertEquals("base_10000002", status[0].getPath().getName());
     FileStatus[] buckets = fs.listStatus(status[0].getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER);
     Arrays.sort(buckets);
     Assert.assertEquals(2, buckets.length);

http://git-wip-us.apache.org/repos/asf/hive/blob/13fbae57/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java
index 11c5930..fb88f25 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java
@@ -263,12 +263,18 @@ public class TestTxnLoadData extends TxnCommandsBaseForTests {
         {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t0\t2", "t/000000_0"},
         {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t0\t4", "t/000000_0"},
         //from Load Data into acid converted table
-        {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000001_0000001_0000/000000_0"},
-        {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000001_0000/000000_0"},
-        {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t2\t2", "t/delta_0000001_0000001_0000/000001_0"},
-        {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t3\t3", "t/delta_0000001_0000001_0000/000001_0"},
-        {"{\"writeid\":1,\"bucketid\":537001984,\"rowid\":0}\t4\t4", "t/delta_0000001_0000001_0000/000002_0"},
-        {"{\"writeid\":1,\"bucketid\":537001984,\"rowid\":1}\t5\t5", "t/delta_0000001_0000001_0000/000002_0"},
+        {"{\"writeid\":10000001,\"bucketid\":536870912,\"rowid\":0}\t1\t2",
+            "t/delta_10000001_10000001_0000/000000_0"},
+        {"{\"writeid\":10000001,\"bucketid\":536870912,\"rowid\":1}\t3\t4",
+            "t/delta_10000001_10000001_0000/000000_0"},
+        {"{\"writeid\":10000001,\"bucketid\":536936448,\"rowid\":0}\t2\t2",
+            "t/delta_10000001_10000001_0000/000001_0"},
+        {"{\"writeid\":10000001,\"bucketid\":536936448,\"rowid\":1}\t3\t3",
+            "t/delta_10000001_10000001_0000/000001_0"},
+        {"{\"writeid\":10000001,\"bucketid\":537001984,\"rowid\":0}\t4\t4",
+            "t/delta_10000001_10000001_0000/000002_0"},
+        {"{\"writeid\":10000001,\"bucketid\":537001984,\"rowid\":1}\t5\t5",
+            "t/delta_10000001_10000001_0000/000002_0"},
     };
     checkResult(expected, testQuery, isVectorized, "load data inpath");
 
@@ -279,9 +285,12 @@ public class TestTxnLoadData extends TxnCommandsBaseForTests {
     runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/2/data' overwrite into table T");
 
     String[][] expected2 = new String[][] {
-        {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t5\t6", "t/base_0000002/000000_0"},
-        {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t7\t8", "t/base_0000002/000000_0"},
-        {"{\"writeid\":2,\"bucketid\":536936448,\"rowid\":0}\t8\t8", "t/base_0000002/000001_0"}
+        {"{\"writeid\":10000002,\"bucketid\":536870912,\"rowid\":0}\t5\t6",
+            "t/base_10000002/000000_0"},
+        {"{\"writeid\":10000002,\"bucketid\":536870912,\"rowid\":1}\t7\t8", "t/base_10000002/000000_0"},
+        {"{\"writeid\":10000002,\"bucketid\":536936448,\"rowid\":0}\t8\t8",
+
+            "t/base_10000002/000001_0"}
     };
     checkResult(expected2, testQuery, isVectorized, "load data inpath overwrite");
 
@@ -291,10 +300,14 @@ public class TestTxnLoadData extends TxnCommandsBaseForTests {
     TestTxnCommands2.runWorker(hiveConf);
 
     String[][] expected3 = new String[][] {
-        {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t5\t6", "t/base_0000003/bucket_00000"},
-        {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t7\t8", "t/base_0000003/bucket_00000"},
-        {"{\"writeid\":2,\"bucketid\":536936448,\"rowid\":0}\t8\t8", "t/base_0000003/bucket_00001"},
-        {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t9\t9", "t/base_0000003/bucket_00000"}
+        {"{\"writeid\":10000002,\"bucketid\":536870912,\"rowid\":0}\t5\t6",
+            "t/base_10000003/bucket_00000"},
+        {"{\"writeid\":10000002,\"bucketid\":536870912,\"rowid\":1}\t7\t8",
+            "t/base_10000003/bucket_00000"},
+        {"{\"writeid\":10000002,\"bucketid\":536936448,\"rowid\":0}\t8\t8",
+            "t/base_10000003/bucket_00001"},
+        {"{\"writeid\":10000003,\"bucketid\":536870912,\"rowid\":0}\t9\t9",
+            "t/base_10000003/bucket_00000"}
     };
     checkResult(expected3, testQuery, isVectorized, "load data inpath overwrite (major)");
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/13fbae57/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java
index c15c5a6..f071531 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java
@@ -375,7 +375,7 @@ ekoifman:apache-hive-3.0.0-SNAPSHOT-bin ekoifman$ tree /Users/ekoifman/dev/hiver
         {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}\t12\t12", "warehouse/t/000000_0_copy_1"},
         {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t20\t40", "warehouse/t/HIVE_UNION_SUBDIR_15/000000_0"},
         {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":5}\t50\t60", "warehouse/t/HIVE_UNION_SUBDIR_16/000000_0"},
-        {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t60\t88", "warehouse/t/delta_0000001_0000001_0000/bucket_00000"},
+        {"{\"writeid\":10000001,\"bucketid\":536870912,\"rowid\":0}\t60\t88", "warehouse/t/delta_10000001_10000001_0000/bucket_00000"},
     };
     rs = runStatementOnDriver("select ROW__ID, a, b, INPUT__FILE__NAME from T order by a, b, INPUT__FILE__NAME");
     checkExpected(rs, expected3,"after converting to acid (no compaction with updates)");
@@ -387,15 +387,24 @@ ekoifman:apache-hive-3.0.0-SNAPSHOT-bin ekoifman$ tree /Users/ekoifman/dev/hiver
 
     /*Compaction preserves location of rows wrt buckets/tranches (for now)*/
     String expected4[][] = {
-        {"{\"writeid\":0,\"bucketid\":537001984,\"rowid\":0}\t1\t2", "warehouse/t/base_0000002/bucket_00002"},
-        {"{\"writeid\":0,\"bucketid\":537001984,\"rowid\":1}\t2\t4", "warehouse/t/base_0000002/bucket_00002"},
-        {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t5\t6", "warehouse/t/base_0000002/bucket_00000"},
-        {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t9\t10", "warehouse/t/base_0000002/bucket_00000"},
-        {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t10\t20", "warehouse/t/base_0000002/bucket_00000"},
-        {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}\t12\t12", "warehouse/t/base_0000002/bucket_00000"},
-        {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t20\t40", "warehouse/t/base_0000002/bucket_00000"},
-        {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":5}\t50\t60", "warehouse/t/base_0000002/bucket_00000"},
-        {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t60\t88", "warehouse/t/base_0000002/bucket_00000"},
+        {"{\"writeid\":0,\"bucketid\":537001984,\"rowid\":0}\t1\t2",
+            "warehouse/t/base_10000002/bucket_00002"},
+        {"{\"writeid\":0,\"bucketid\":537001984,\"rowid\":1}\t2\t4",
+            "warehouse/t/base_10000002/bucket_00002"},
+        {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t5\t6",
+            "warehouse/t/base_10000002/bucket_00000"},
+        {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t9\t10",
+            "warehouse/t/base_10000002/bucket_00000"},
+        {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t10\t20",
+            "warehouse/t/base_10000002/bucket_00000"},
+        {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}\t12\t12",
+            "warehouse/t/base_10000002/bucket_00000"},
+        {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t20\t40",
+            "warehouse/t/base_10000002/bucket_00000"},
+        {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":5}\t50\t60",
+            "warehouse/t/base_10000002/bucket_00000"},
+        {"{\"writeid\":10000001,\"bucketid\":536870912,\"rowid\":0}\t60\t88",
+            "warehouse/t/base_10000002/bucket_00000"},
     };
     checkExpected(rs, expected4,"after major compact");
   }
@@ -467,15 +476,24 @@ ekoifman:apache-hive-3.0.0-SNAPSHOT-bin ekoifman$ tree /Users/ekoifman/dev/hiver
      * Also check the file name (only) after compaction for completeness
      */
     String[][] expected = {
-        {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t0\t13",  "bucket_00000", "000000_0_copy_1"},
-        {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t15", "bucket_00000", "bucket_00000"},
-        {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t0\t17", "bucket_00000", "bucket_00000"},
-        {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t0\t120", "bucket_00000", "bucket_00000"},
-        {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t1\t2",   "bucket_00000", "000000_0"},
-        {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t1\t4",   "bucket_00000", "000000_0_copy_1"},
-        {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":5}\t1\t5",   "bucket_00000", "000000_0_copy_1"},
-        {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":6}\t1\t6",   "bucket_00000", "000000_0_copy_2"},
-        {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t16", "bucket_00000", "bucket_00000"}
+        {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t0\t13",
+            "bucket_00000", "000000_0_copy_1"},
+        {"{\"writeid\":10000001,\"bucketid\":536870912,\"rowid\":0}\t0\t15",
+            "bucket_00000", "bucket_00000"},
+        {"{\"writeid\":10000003,\"bucketid\":536870912,\"rowid\":0}\t0\t17",
+            "bucket_00000", "bucket_00000"},
+        {"{\"writeid\":10000002,\"bucketid\":536870912,\"rowid\":0}\t0\t120",
+            "bucket_00000", "bucket_00000"},
+        {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t1\t2",
+            "bucket_00000", "000000_0"},
+        {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t1\t4",
+            "bucket_00000", "000000_0_copy_1"},
+        {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":5}\t1\t5",
+            "bucket_00000", "000000_0_copy_1"},
+        {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":6}\t1\t6",
+            "bucket_00000", "000000_0_copy_2"},
+        {"{\"writeid\":10000001,\"bucketid\":536870912,\"rowid\":1}\t1\t16",
+            "bucket_00000", "bucket_00000"}
     };
     Assert.assertEquals("Unexpected row count before compaction", expected.length, rs.size());
     for(int i = 0; i < expected.length; i++) {
@@ -620,7 +638,7 @@ ekoifman:apache-hive-3.0.0-SNAPSHOT-bin ekoifman$ tree /Users/ekoifman/dev/hiver
     query = "select ROW__ID, b from T where b > 0 order by a";
     rs = runStatementOnDriver(query);
     String[][] expected4 = {
-        {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}","17"},
+        {"{\"writeid\":10000001,\"bucketid\":536870912,\"rowid\":0}","17"},
         {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}","4"},
         {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}","6"},
         {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}","8"},
@@ -641,11 +659,16 @@ ekoifman:apache-hive-3.0.0-SNAPSHOT-bin ekoifman$ tree /Users/ekoifman/dev/hiver
     query = "select ROW__ID, a, b, INPUT__FILE__NAME from T where b > 0 order by a, b";
     rs = runStatementOnDriver(query);
     String[][] expected5 = {//the row__ids are the same after compaction
-        {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t17", "warehouse/t/base_0000001/bucket_00000"},
-        {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t2\t4",   "warehouse/t/base_0000001/bucket_00000"},
-        {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}\t5\t6",   "warehouse/t/base_0000001/bucket_00000"},
-        {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t6\t8",   "warehouse/t/base_0000001/bucket_00000"},
-        {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t9\t10",  "warehouse/t/base_0000001/bucket_00000"}
+        {"{\"writeid\":10000001,\"bucketid\":536870912,\"rowid\":0}\t1\t17",
+            "warehouse/t/base_10000001/bucket_00000"},
+        {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t2\t4",
+            "warehouse/t/base_10000001/bucket_00000"},
+        {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}\t5\t6",
+            "warehouse/t/base_10000001/bucket_00000"},
+        {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t6\t8",
+            "warehouse/t/base_10000001/bucket_00000"},
+        {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t9\t10",
+            "warehouse/t/base_10000001/bucket_00000"}
     };
     checkExpected(rs, expected5, "After major compaction");
     //vectorized because there is INPUT__FILE__NAME

http://git-wip-us.apache.org/repos/asf/hive/blob/13fbae57/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out b/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out
index dafd5d9..957dfd8 100644
--- a/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out
+++ b/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out
@@ -665,22 +665,22 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: over10k_orc_bucketed
-                  Statistics: Num rows: 1241 Data size: 710230 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 1247 Data size: 713720 Basic stats: COMPLETE Column stats: COMPLETE
                   Select Operator
                     expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
                     outputColumnNames: ROW__ID
-                    Statistics: Num rows: 1241 Data size: 710230 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 1247 Data size: 713720 Basic stats: COMPLETE Column stats: COMPLETE
                     Group By Operator
                       aggregations: count()
                       keys: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
                       mode: hash
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 620 Data size: 52080 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 623 Data size: 52332 Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
-                        Statistics: Num rows: 620 Data size: 52080 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 623 Data size: 52332 Basic stats: COMPLETE Column stats: COMPLETE
                         value expressions: _col1 (type: bigint)
             Execution mode: llap
             LLAP IO: may be used (ACID table)
@@ -692,13 +692,13 @@ STAGE PLANS:
                 keys: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 620 Data size: 52080 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 623 Data size: 52332 Basic stats: COMPLETE Column stats: COMPLETE
                 Filter Operator
                   predicate: (_col1 > 1L) (type: boolean)
-                  Statistics: Num rows: 206 Data size: 17304 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 207 Data size: 17388 Basic stats: COMPLETE Column stats: COMPLETE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 206 Data size: 17304 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 207 Data size: 17388 Basic stats: COMPLETE Column stats: COMPLETE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/13fbae57/ql/src/test/results/clientpositive/tez/acid_vectorization_original_tez.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/acid_vectorization_original_tez.q.out b/ql/src/test/results/clientpositive/tez/acid_vectorization_original_tez.q.out
index 01ec132..3c9cf03 100644
--- a/ql/src/test/results/clientpositive/tez/acid_vectorization_original_tez.q.out
+++ b/ql/src/test/results/clientpositive/tez/acid_vectorization_original_tez.q.out
@@ -680,22 +680,22 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: over10k_orc_bucketed_n0
-                  Statistics: Num rows: 1241 Data size: 710230 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 1247 Data size: 713720 Basic stats: COMPLETE Column stats: COMPLETE
                   Select Operator
                     expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
                     outputColumnNames: ROW__ID
-                    Statistics: Num rows: 1241 Data size: 710230 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 1247 Data size: 713720 Basic stats: COMPLETE Column stats: COMPLETE
                     Group By Operator
                       aggregations: count()
                       keys: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
                       mode: hash
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 620 Data size: 52080 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 623 Data size: 52332 Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
-                        Statistics: Num rows: 620 Data size: 52080 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 623 Data size: 52332 Basic stats: COMPLETE Column stats: COMPLETE
                         value expressions: _col1 (type: bigint)
         Reducer 2 
             Reduce Operator Tree:
@@ -704,13 +704,13 @@ STAGE PLANS:
                 keys: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 620 Data size: 52080 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 623 Data size: 52332 Basic stats: COMPLETE Column stats: COMPLETE
                 Filter Operator
                   predicate: (_col1 > 1L) (type: boolean)
-                  Statistics: Num rows: 206 Data size: 17304 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 207 Data size: 17388 Basic stats: COMPLETE Column stats: COMPLETE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 206 Data size: 17304 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 207 Data size: 17388 Basic stats: COMPLETE Column stats: COMPLETE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/13fbae57/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java
index c3d99c3..56da115 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java
@@ -29,6 +29,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.LocatedFileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.hive.metastore.api.InitializeTableWriteIdsRequest;
 import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
@@ -39,6 +40,7 @@ import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
 import org.apache.hadoop.hive.metastore.events.PreAlterTableEvent;
 import org.apache.hadoop.hive.metastore.events.PreCreateTableEvent;
 import org.apache.hadoop.hive.metastore.events.PreEventContext;
+import org.apache.hadoop.hive.metastore.txn.TxnStore;
 import org.apache.hadoop.hive.metastore.txn.TxnUtils;
 import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
 import org.slf4j.Logger;
@@ -190,6 +192,15 @@ public final class TransactionalValidationListener extends MetaStorePreEventList
       }
     }
     checkSorted(newTable);
+    if(TxnUtils.isAcidTable(newTable) && !TxnUtils.isAcidTable(oldTable)) {
+      /* we just made an existing table full acid which wasn't acid before and it passed all checks
+      initialize the Write ID sequence so that we can handle assigning ROW_IDs to 'original'
+      files already present in the table. */
+      TxnStore t = TxnUtils.getTxnStore(getConf());
+      //For now assume no partition may have > 10M files.  Perhaps better to count them.
+      t.seedWriteIdOnAcidConversion(new InitializeTableWriteIdsRequest(newTable.getDbName(),
+          newTable.getTableName(), 10000000));
+    }
   }
 
   private void checkSorted(Table newTable) throws MetaException {

http://git-wip-us.apache.org/repos/asf/hive/blob/13fbae57/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/api/InitializeTableWriteIdsRequest.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/api/InitializeTableWriteIdsRequest.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/api/InitializeTableWriteIdsRequest.java
new file mode 100644
index 0000000..d56b66a
--- /dev/null
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/api/InitializeTableWriteIdsRequest.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.metastore.api;
+
+public class InitializeTableWriteIdsRequest {
+  private final String dbName;
+  private final String tblName;
+  private final long seeWriteId;
+  public InitializeTableWriteIdsRequest(String dbName, String tblName, long seeWriteId) {
+    assert dbName != null;
+    assert tblName != null;
+    assert seeWriteId > 1;
+    this.dbName = dbName;
+    this.tblName = tblName;
+    this.seeWriteId = seeWriteId;
+  }
+  public String getDbName() {
+    return dbName;
+  }
+  public String getTblName() {
+    return tblName;
+  }
+
+  public long getSeeWriteId() {
+    return seeWriteId;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/13fbae57/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
index d1b0d32..f25e77a 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
@@ -67,54 +67,7 @@ import org.apache.hadoop.hive.metastore.MaterializationsRebuildLockHandler;
 import org.apache.hadoop.hive.metastore.Warehouse;
 import org.apache.hadoop.hive.metastore.MetaStoreListenerNotifier;
 import org.apache.hadoop.hive.metastore.TransactionalMetaStoreEventListener;
-import org.apache.hadoop.hive.metastore.api.AbortTxnRequest;
-import org.apache.hadoop.hive.metastore.api.AbortTxnsRequest;
-import org.apache.hadoop.hive.metastore.api.AddDynamicPartitions;
-import org.apache.hadoop.hive.metastore.api.AllocateTableWriteIdsRequest;
-import org.apache.hadoop.hive.metastore.api.AllocateTableWriteIdsResponse;
-import org.apache.hadoop.hive.metastore.api.BasicTxnInfo;
-import org.apache.hadoop.hive.metastore.api.CheckLockRequest;
-import org.apache.hadoop.hive.metastore.api.CommitTxnRequest;
-import org.apache.hadoop.hive.metastore.api.CompactionRequest;
-import org.apache.hadoop.hive.metastore.api.CompactionResponse;
-import org.apache.hadoop.hive.metastore.api.CompactionType;
-import org.apache.hadoop.hive.metastore.api.DataOperationType;
-import org.apache.hadoop.hive.metastore.api.Database;
-import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse;
-import org.apache.hadoop.hive.metastore.api.GetOpenTxnsResponse;
-import org.apache.hadoop.hive.metastore.api.GetValidWriteIdsRequest;
-import org.apache.hadoop.hive.metastore.api.GetValidWriteIdsResponse;
-import org.apache.hadoop.hive.metastore.api.HeartbeatRequest;
-import org.apache.hadoop.hive.metastore.api.HeartbeatTxnRangeRequest;
-import org.apache.hadoop.hive.metastore.api.HeartbeatTxnRangeResponse;
-import org.apache.hadoop.hive.metastore.api.HiveObjectType;
-import org.apache.hadoop.hive.metastore.api.LockComponent;
-import org.apache.hadoop.hive.metastore.api.LockRequest;
-import org.apache.hadoop.hive.metastore.api.LockResponse;
-import org.apache.hadoop.hive.metastore.api.LockState;
-import org.apache.hadoop.hive.metastore.api.LockType;
-import org.apache.hadoop.hive.metastore.api.MetaException;
-import org.apache.hadoop.hive.metastore.api.NoSuchLockException;
-import org.apache.hadoop.hive.metastore.api.NoSuchTxnException;
-import org.apache.hadoop.hive.metastore.api.OpenTxnRequest;
-import org.apache.hadoop.hive.metastore.api.OpenTxnsResponse;
-import org.apache.hadoop.hive.metastore.api.Partition;
-import org.apache.hadoop.hive.metastore.api.ReplTblWriteIdStateRequest;
-import org.apache.hadoop.hive.metastore.api.ShowCompactRequest;
-import org.apache.hadoop.hive.metastore.api.ShowCompactResponse;
-import org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement;
-import org.apache.hadoop.hive.metastore.api.ShowLocksRequest;
-import org.apache.hadoop.hive.metastore.api.ShowLocksResponse;
-import org.apache.hadoop.hive.metastore.api.ShowLocksResponseElement;
-import org.apache.hadoop.hive.metastore.api.Table;
-import org.apache.hadoop.hive.metastore.api.TableValidWriteIds;
-import org.apache.hadoop.hive.metastore.api.TxnAbortedException;
-import org.apache.hadoop.hive.metastore.api.TxnInfo;
-import org.apache.hadoop.hive.metastore.api.TxnOpenException;
-import org.apache.hadoop.hive.metastore.api.TxnState;
-import org.apache.hadoop.hive.metastore.api.TxnToWriteId;
-import org.apache.hadoop.hive.metastore.api.UnlockRequest;
+import org.apache.hadoop.hive.metastore.api.*;
 import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
 import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars;
 import org.apache.hadoop.hive.metastore.datasource.DataSourceProvider;
@@ -1537,7 +1490,50 @@ abstract class TxnHandler implements TxnStore, TxnStore.MutexAPI {
       return allocateTableWriteIds(rqst);
     }
   }
+  @Override
+  public void seedWriteIdOnAcidConversion(InitializeTableWriteIdsRequest rqst)
+      throws MetaException {
+    try {
+      Connection dbConn = null;
+      Statement stmt = null;
+      TxnStore.MutexAPI.LockHandle handle = null;
+      try {
+        lockInternal();
+        dbConn = getDbConn(Connection.TRANSACTION_READ_COMMITTED);
+        stmt = dbConn.createStatement();
+
+        handle = getMutexAPI().acquireLock(MUTEX_KEY.WriteIdAllocator.name());
+        //since this is on conversion from non-acid to acid, NEXT_WRITE_ID should not have an entry
+        //for this table.  It also has a unique index in case 'should not' is violated
+
+        // First allocation of write id should add the table to the next_write_id meta table
+        // The initial value for write id should be 1 and hence we add 1 with number of write ids
+        // allocated here
+        String s = "insert into NEXT_WRITE_ID (nwi_database, nwi_table, nwi_next) values ("
+            + quoteString(rqst.getDbName()) + "," + quoteString(rqst.getTblName()) + "," +
+            Long.toString(rqst.getSeeWriteId() + 1) + ")";
+        LOG.debug("Going to execute insert <" + s + ">");
+        stmt.execute(s);
+        LOG.debug("Going to commit");
+        dbConn.commit();
+      } catch (SQLException e) {
+        LOG.debug("Going to rollback");
+        rollbackDBConn(dbConn);
+        checkRetryable(dbConn, e, "seedWriteIdOnAcidConversion(" + rqst + ")");
+        throw new MetaException("Unable to update transaction database "
+            + StringUtils.stringifyException(e));
+      } finally {
+        close(null, stmt, dbConn);
+        if(handle != null) {
+          handle.releaseLocks();
+        }
+        unlockInternal();
+      }
+    } catch (RetryException e) {
+      seedWriteIdOnAcidConversion(rqst);
+    }
 
+  }
   @Override
   @RetrySemantics.SafeToRetry
   public void performWriteSetGC() {

http://git-wip-us.apache.org/repos/asf/hive/blob/13fbae57/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnStore.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnStore.java
index 4695f0d..ef447e1 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnStore.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnStore.java
@@ -154,6 +154,12 @@ public interface TxnStore extends Configurable {
     throws NoSuchTxnException, TxnAbortedException, MetaException;
 
   /**
+   * Called on conversion of existing table to full acid.  Sets initial write ID to a high
+   * enough value so that we can assign unique ROW__IDs to data in existing files.
+   */
+  void seedWriteIdOnAcidConversion(InitializeTableWriteIdsRequest rqst) throws MetaException;
+
+  /**
    * Obtain a lock.
    * @param rqst information on the lock to obtain.  If the requester is part of a transaction
    *             the txn information must be included in the lock request.