You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ek...@apache.org on 2018/06/06 17:15:10 UTC
hive git commit: HIVE-19750: Initialize NEXT_WRITE_ID. NWI_NEXT on
converting an existing table to full acid (Eugene Koifman,
reviewed by Sankar Hariappan)
Repository: hive
Updated Branches:
refs/heads/master 0992d8292 -> 13fbae573
HIVE-19750: Initialize NEXT_WRITE_ID. NWI_NEXT on converting an existing table to full acid (Eugene Koifman, reviewed by Sankar Hariappan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/13fbae57
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/13fbae57
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/13fbae57
Branch: refs/heads/master
Commit: 13fbae57321f3525cabb326df702430d61c242f9
Parents: 0992d82
Author: Eugene Koifman <ek...@apache.org>
Authored: Wed Jun 6 10:15:04 2018 -0700
Committer: Eugene Koifman <ek...@apache.org>
Committed: Wed Jun 6 10:15:04 2018 -0700
----------------------------------------------------------------------
.../apache/hadoop/hive/ql/TestTxnCommands.java | 19 ++--
.../apache/hadoop/hive/ql/TestTxnCommands2.java | 18 ++--
.../apache/hadoop/hive/ql/TestTxnLoadData.java | 39 ++++++---
.../apache/hadoop/hive/ql/TestTxnNoBuckets.java | 73 ++++++++++------
.../llap/acid_vectorization_original.q.out | 14 +--
.../tez/acid_vectorization_original_tez.q.out | 14 +--
.../TransactionalValidationListener.java | 11 +++
.../api/InitializeTableWriteIdsRequest.java | 42 +++++++++
.../hadoop/hive/metastore/txn/TxnHandler.java | 92 ++++++++++----------
.../hadoop/hive/metastore/txn/TxnStore.java | 6 ++
10 files changed, 211 insertions(+), 117 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/13fbae57/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
index a4d34a7..cd4b670 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
@@ -897,12 +897,14 @@ public class TestTxnCommands extends TxnCommandsBaseForTests {
rs.get(2).startsWith("{\"writeid\":0,\"bucketid\":536936448,\"rowid\":2}\t0\t12"));
Assert.assertTrue(rs.get(2), rs.get(2).endsWith("nonacidorctbl/000001_0_copy_1"));
Assert.assertTrue(rs.get(3),
- rs.get(3).startsWith("{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t1\t17"));
- Assert.assertTrue(rs.get(3), rs.get(3).endsWith("nonacidorctbl/delta_0000001_0000001_0000/bucket_00001"));
+ rs.get(3).startsWith("{\"writeid\":10000001,\"bucketid\":536936448,\"rowid\":0}\t1\t17"));
+ Assert.assertTrue(rs.get(3), rs.get(3)
+ .endsWith("nonacidorctbl/delta_10000001_10000001_0000/bucket_00001"));
//run Compaction
runStatementOnDriver("alter table "+ TestTxnCommands2.Table.NONACIDORCTBL +" compact 'major'");
TestTxnCommands2.runWorker(hiveConf);
- rs = runStatementOnDriver("select ROW__ID, a, b, INPUT__FILE__NAME from " + Table.NONACIDORCTBL + " order by ROW__ID");
+ rs = runStatementOnDriver("select ROW__ID, a, b, INPUT__FILE__NAME from " +
+ Table.NONACIDORCTBL + " order by ROW__ID");
LOG.warn("after compact");
for(String s : rs) {
LOG.warn(s);
@@ -910,16 +912,17 @@ public class TestTxnCommands extends TxnCommandsBaseForTests {
Assert.assertEquals("", 4, rs.size());
Assert.assertTrue(rs.get(0),
rs.get(0).startsWith("{\"writeid\":0,\"bucketid\":536936448,\"rowid\":0}\t1\t2"));
- Assert.assertTrue(rs.get(0), rs.get(0).endsWith("nonacidorctbl/base_0000001/bucket_00001"));
+ Assert.assertTrue(rs.get(0), rs.get(0).endsWith("nonacidorctbl/base_10000001/bucket_00001"));
Assert.assertTrue(rs.get(1),
rs.get(1).startsWith("{\"writeid\":0,\"bucketid\":536936448,\"rowid\":1}\t1\t5"));
- Assert.assertTrue(rs.get(1), rs.get(1).endsWith("nonacidorctbl/base_0000001/bucket_00001"));
+ Assert.assertTrue(rs.get(1), rs.get(1).endsWith("nonacidorctbl/base_10000001/bucket_00001"));
Assert.assertTrue(rs.get(2),
rs.get(2).startsWith("{\"writeid\":0,\"bucketid\":536936448,\"rowid\":2}\t0\t12"));
- Assert.assertTrue(rs.get(2), rs.get(2).endsWith("nonacidorctbl/base_0000001/bucket_00001"));
+ Assert.assertTrue(rs.get(2), rs.get(2).endsWith("nonacidorctbl/base_10000001/bucket_00001"));
Assert.assertTrue(rs.get(3),
- rs.get(3).startsWith("{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t1\t17"));
- Assert.assertTrue(rs.get(3), rs.get(3).endsWith("nonacidorctbl/base_0000001/bucket_00001"));
+ rs.get(3)
+ .startsWith("{\"writeid\":10000001,\"bucketid\":536936448,\"rowid\":0}\t1\t17"));
+ Assert.assertTrue(rs.get(3), rs.get(3).endsWith("nonacidorctbl/base_10000001/bucket_00001"));
//make sure they are the same before and after compaction
}
http://git-wip-us.apache.org/repos/asf/hive/blob/13fbae57/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
index a547a84..7c201b6 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
@@ -379,14 +379,14 @@ public class TestTxnCommands2 {
*/
String[][] expected = {
{"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":4}\t0\t13", "bucket_00001"},
- {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t0\t15", "bucket_00001"},
- {"{\"writeid\":3,\"bucketid\":536936448,\"rowid\":0}\t0\t17", "bucket_00001"},
- {"{\"writeid\":2,\"bucketid\":536936448,\"rowid\":0}\t0\t120", "bucket_00001"},
+ {"{\"writeid\":10000001,\"bucketid\":536936448,\"rowid\":1}\t0\t15", "bucket_00001"},
+ {"{\"writeid\":10000003,\"bucketid\":536936448,\"rowid\":0}\t0\t17", "bucket_00001"},
+ {"{\"writeid\":10000002,\"bucketid\":536936448,\"rowid\":0}\t0\t120", "bucket_00001"},
{"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":1}\t1\t2", "bucket_00001"},
{"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":3}\t1\t4", "bucket_00001"},
{"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":2}\t1\t5", "bucket_00001"},
{"{\"writeid\":0,\"bucketid\":536936448,\"rowid\":6}\t1\t6", "bucket_00001"},
- {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t1\t16", "bucket_00001"}
+ {"{\"writeid\":10000001,\"bucketid\":536936448,\"rowid\":0}\t1\t16", "bucket_00001"}
};
Assert.assertEquals("Unexpected row count before compaction", expected.length, rs.size());
for(int i = 0; i < expected.length; i++) {
@@ -773,11 +773,11 @@ public class TestTxnCommands2 {
FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER);
Arrays.sort(buckets);
if (numDelta == 1) {
- Assert.assertEquals("delta_0000001_0000001_0000", status[i].getPath().getName());
+ Assert.assertEquals("delta_10000001_10000001_0000", status[i].getPath().getName());
Assert.assertEquals(BUCKET_COUNT - 1, buckets.length);
Assert.assertEquals("bucket_00001", buckets[0].getPath().getName());
} else if (numDelta == 2) {
- Assert.assertEquals("delta_0000002_0000002_0000", status[i].getPath().getName());
+ Assert.assertEquals("delta_10000002_10000002_0000", status[i].getPath().getName());
Assert.assertEquals(1, buckets.length);
Assert.assertEquals("bucket_00000", buckets[0].getPath().getName());
}
@@ -786,7 +786,7 @@ public class TestTxnCommands2 {
FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER);
Arrays.sort(buckets);
if (numDeleteDelta == 1) {
- Assert.assertEquals("delete_delta_0000001_0000001_0000", status[i].getPath().getName());
+ Assert.assertEquals("delete_delta_10000001_10000001_0000", status[i].getPath().getName());
Assert.assertEquals(BUCKET_COUNT - 1, buckets.length);
Assert.assertEquals("bucket_00001", buckets[0].getPath().getName());
}
@@ -833,7 +833,7 @@ public class TestTxnCommands2 {
Assert.assertEquals("bucket_00001", buckets[0].getPath().getName());
} else if (numBase == 2) {
// The new base dir now has two bucket files, since the delta dir has two bucket files
- Assert.assertEquals("base_0000002", status[i].getPath().getName());
+ Assert.assertEquals("base_10000002", status[i].getPath().getName());
Assert.assertEquals(2, buckets.length);
Assert.assertEquals("bucket_00000", buckets[0].getPath().getName());
}
@@ -859,7 +859,7 @@ public class TestTxnCommands2 {
status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" +
(Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.HIDDEN_FILES_PATH_FILTER);
Assert.assertEquals(1, status.length);
- Assert.assertEquals("base_0000002", status[0].getPath().getName());
+ Assert.assertEquals("base_10000002", status[0].getPath().getName());
FileStatus[] buckets = fs.listStatus(status[0].getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER);
Arrays.sort(buckets);
Assert.assertEquals(2, buckets.length);
http://git-wip-us.apache.org/repos/asf/hive/blob/13fbae57/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java
index 11c5930..fb88f25 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java
@@ -263,12 +263,18 @@ public class TestTxnLoadData extends TxnCommandsBaseForTests {
{"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t0\t2", "t/000000_0"},
{"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t0\t4", "t/000000_0"},
//from Load Data into acid converted table
- {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000001_0000001_0000/000000_0"},
- {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000001_0000001_0000/000000_0"},
- {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t2\t2", "t/delta_0000001_0000001_0000/000001_0"},
- {"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":1}\t3\t3", "t/delta_0000001_0000001_0000/000001_0"},
- {"{\"writeid\":1,\"bucketid\":537001984,\"rowid\":0}\t4\t4", "t/delta_0000001_0000001_0000/000002_0"},
- {"{\"writeid\":1,\"bucketid\":537001984,\"rowid\":1}\t5\t5", "t/delta_0000001_0000001_0000/000002_0"},
+ {"{\"writeid\":10000001,\"bucketid\":536870912,\"rowid\":0}\t1\t2",
+ "t/delta_10000001_10000001_0000/000000_0"},
+ {"{\"writeid\":10000001,\"bucketid\":536870912,\"rowid\":1}\t3\t4",
+ "t/delta_10000001_10000001_0000/000000_0"},
+ {"{\"writeid\":10000001,\"bucketid\":536936448,\"rowid\":0}\t2\t2",
+ "t/delta_10000001_10000001_0000/000001_0"},
+ {"{\"writeid\":10000001,\"bucketid\":536936448,\"rowid\":1}\t3\t3",
+ "t/delta_10000001_10000001_0000/000001_0"},
+ {"{\"writeid\":10000001,\"bucketid\":537001984,\"rowid\":0}\t4\t4",
+ "t/delta_10000001_10000001_0000/000002_0"},
+ {"{\"writeid\":10000001,\"bucketid\":537001984,\"rowid\":1}\t5\t5",
+ "t/delta_10000001_10000001_0000/000002_0"},
};
checkResult(expected, testQuery, isVectorized, "load data inpath");
@@ -279,9 +285,12 @@ public class TestTxnLoadData extends TxnCommandsBaseForTests {
runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/2/data' overwrite into table T");
String[][] expected2 = new String[][] {
- {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t5\t6", "t/base_0000002/000000_0"},
- {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t7\t8", "t/base_0000002/000000_0"},
- {"{\"writeid\":2,\"bucketid\":536936448,\"rowid\":0}\t8\t8", "t/base_0000002/000001_0"}
+ {"{\"writeid\":10000002,\"bucketid\":536870912,\"rowid\":0}\t5\t6",
+ "t/base_10000002/000000_0"},
+ {"{\"writeid\":10000002,\"bucketid\":536870912,\"rowid\":1}\t7\t8", "t/base_10000002/000000_0"},
+ {"{\"writeid\":10000002,\"bucketid\":536936448,\"rowid\":0}\t8\t8",
+
+ "t/base_10000002/000001_0"}
};
checkResult(expected2, testQuery, isVectorized, "load data inpath overwrite");
@@ -291,10 +300,14 @@ public class TestTxnLoadData extends TxnCommandsBaseForTests {
TestTxnCommands2.runWorker(hiveConf);
String[][] expected3 = new String[][] {
- {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t5\t6", "t/base_0000003/bucket_00000"},
- {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\t7\t8", "t/base_0000003/bucket_00000"},
- {"{\"writeid\":2,\"bucketid\":536936448,\"rowid\":0}\t8\t8", "t/base_0000003/bucket_00001"},
- {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t9\t9", "t/base_0000003/bucket_00000"}
+ {"{\"writeid\":10000002,\"bucketid\":536870912,\"rowid\":0}\t5\t6",
+ "t/base_10000003/bucket_00000"},
+ {"{\"writeid\":10000002,\"bucketid\":536870912,\"rowid\":1}\t7\t8",
+ "t/base_10000003/bucket_00000"},
+ {"{\"writeid\":10000002,\"bucketid\":536936448,\"rowid\":0}\t8\t8",
+ "t/base_10000003/bucket_00001"},
+ {"{\"writeid\":10000003,\"bucketid\":536870912,\"rowid\":0}\t9\t9",
+ "t/base_10000003/bucket_00000"}
};
checkResult(expected3, testQuery, isVectorized, "load data inpath overwrite (major)");
}
http://git-wip-us.apache.org/repos/asf/hive/blob/13fbae57/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java
index c15c5a6..f071531 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java
@@ -375,7 +375,7 @@ ekoifman:apache-hive-3.0.0-SNAPSHOT-bin ekoifman$ tree /Users/ekoifman/dev/hiver
{"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}\t12\t12", "warehouse/t/000000_0_copy_1"},
{"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t20\t40", "warehouse/t/HIVE_UNION_SUBDIR_15/000000_0"},
{"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":5}\t50\t60", "warehouse/t/HIVE_UNION_SUBDIR_16/000000_0"},
- {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t60\t88", "warehouse/t/delta_0000001_0000001_0000/bucket_00000"},
+ {"{\"writeid\":10000001,\"bucketid\":536870912,\"rowid\":0}\t60\t88", "warehouse/t/delta_10000001_10000001_0000/bucket_00000"},
};
rs = runStatementOnDriver("select ROW__ID, a, b, INPUT__FILE__NAME from T order by a, b, INPUT__FILE__NAME");
checkExpected(rs, expected3,"after converting to acid (no compaction with updates)");
@@ -387,15 +387,24 @@ ekoifman:apache-hive-3.0.0-SNAPSHOT-bin ekoifman$ tree /Users/ekoifman/dev/hiver
/*Compaction preserves location of rows wrt buckets/tranches (for now)*/
String expected4[][] = {
- {"{\"writeid\":0,\"bucketid\":537001984,\"rowid\":0}\t1\t2", "warehouse/t/base_0000002/bucket_00002"},
- {"{\"writeid\":0,\"bucketid\":537001984,\"rowid\":1}\t2\t4", "warehouse/t/base_0000002/bucket_00002"},
- {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t5\t6", "warehouse/t/base_0000002/bucket_00000"},
- {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t9\t10", "warehouse/t/base_0000002/bucket_00000"},
- {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t10\t20", "warehouse/t/base_0000002/bucket_00000"},
- {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}\t12\t12", "warehouse/t/base_0000002/bucket_00000"},
- {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t20\t40", "warehouse/t/base_0000002/bucket_00000"},
- {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":5}\t50\t60", "warehouse/t/base_0000002/bucket_00000"},
- {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t60\t88", "warehouse/t/base_0000002/bucket_00000"},
+ {"{\"writeid\":0,\"bucketid\":537001984,\"rowid\":0}\t1\t2",
+ "warehouse/t/base_10000002/bucket_00002"},
+ {"{\"writeid\":0,\"bucketid\":537001984,\"rowid\":1}\t2\t4",
+ "warehouse/t/base_10000002/bucket_00002"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t5\t6",
+ "warehouse/t/base_10000002/bucket_00000"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t9\t10",
+ "warehouse/t/base_10000002/bucket_00000"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t10\t20",
+ "warehouse/t/base_10000002/bucket_00000"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}\t12\t12",
+ "warehouse/t/base_10000002/bucket_00000"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t20\t40",
+ "warehouse/t/base_10000002/bucket_00000"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":5}\t50\t60",
+ "warehouse/t/base_10000002/bucket_00000"},
+ {"{\"writeid\":10000001,\"bucketid\":536870912,\"rowid\":0}\t60\t88",
+ "warehouse/t/base_10000002/bucket_00000"},
};
checkExpected(rs, expected4,"after major compact");
}
@@ -467,15 +476,24 @@ ekoifman:apache-hive-3.0.0-SNAPSHOT-bin ekoifman$ tree /Users/ekoifman/dev/hiver
* Also check the file name (only) after compaction for completeness
*/
String[][] expected = {
- {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t0\t13", "bucket_00000", "000000_0_copy_1"},
- {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t0\t15", "bucket_00000", "bucket_00000"},
- {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t0\t17", "bucket_00000", "bucket_00000"},
- {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t0\t120", "bucket_00000", "bucket_00000"},
- {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "bucket_00000", "000000_0"},
- {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t1\t4", "bucket_00000", "000000_0_copy_1"},
- {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":5}\t1\t5", "bucket_00000", "000000_0_copy_1"},
- {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":6}\t1\t6", "bucket_00000", "000000_0_copy_2"},
- {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":1}\t1\t16", "bucket_00000", "bucket_00000"}
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t0\t13",
+ "bucket_00000", "000000_0_copy_1"},
+ {"{\"writeid\":10000001,\"bucketid\":536870912,\"rowid\":0}\t0\t15",
+ "bucket_00000", "bucket_00000"},
+ {"{\"writeid\":10000003,\"bucketid\":536870912,\"rowid\":0}\t0\t17",
+ "bucket_00000", "bucket_00000"},
+ {"{\"writeid\":10000002,\"bucketid\":536870912,\"rowid\":0}\t0\t120",
+ "bucket_00000", "bucket_00000"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":0}\t1\t2",
+ "bucket_00000", "000000_0"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t1\t4",
+ "bucket_00000", "000000_0_copy_1"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":5}\t1\t5",
+ "bucket_00000", "000000_0_copy_1"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":6}\t1\t6",
+ "bucket_00000", "000000_0_copy_2"},
+ {"{\"writeid\":10000001,\"bucketid\":536870912,\"rowid\":1}\t1\t16",
+ "bucket_00000", "bucket_00000"}
};
Assert.assertEquals("Unexpected row count before compaction", expected.length, rs.size());
for(int i = 0; i < expected.length; i++) {
@@ -620,7 +638,7 @@ ekoifman:apache-hive-3.0.0-SNAPSHOT-bin ekoifman$ tree /Users/ekoifman/dev/hiver
query = "select ROW__ID, b from T where b > 0 order by a";
rs = runStatementOnDriver(query);
String[][] expected4 = {
- {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}","17"},
+ {"{\"writeid\":10000001,\"bucketid\":536870912,\"rowid\":0}","17"},
{"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}","4"},
{"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}","6"},
{"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}","8"},
@@ -641,11 +659,16 @@ ekoifman:apache-hive-3.0.0-SNAPSHOT-bin ekoifman$ tree /Users/ekoifman/dev/hiver
query = "select ROW__ID, a, b, INPUT__FILE__NAME from T where b > 0 order by a, b";
rs = runStatementOnDriver(query);
String[][] expected5 = {//the row__ids are the same after compaction
- {"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t1\t17", "warehouse/t/base_0000001/bucket_00000"},
- {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t2\t4", "warehouse/t/base_0000001/bucket_00000"},
- {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}\t5\t6", "warehouse/t/base_0000001/bucket_00000"},
- {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t6\t8", "warehouse/t/base_0000001/bucket_00000"},
- {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t9\t10", "warehouse/t/base_0000001/bucket_00000"}
+ {"{\"writeid\":10000001,\"bucketid\":536870912,\"rowid\":0}\t1\t17",
+ "warehouse/t/base_10000001/bucket_00000"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":1}\t2\t4",
+ "warehouse/t/base_10000001/bucket_00000"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":2}\t5\t6",
+ "warehouse/t/base_10000001/bucket_00000"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":3}\t6\t8",
+ "warehouse/t/base_10000001/bucket_00000"},
+ {"{\"writeid\":0,\"bucketid\":536870912,\"rowid\":4}\t9\t10",
+ "warehouse/t/base_10000001/bucket_00000"}
};
checkExpected(rs, expected5, "After major compaction");
//vectorized because there is INPUT__FILE__NAME
http://git-wip-us.apache.org/repos/asf/hive/blob/13fbae57/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out b/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out
index dafd5d9..957dfd8 100644
--- a/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out
+++ b/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out
@@ -665,22 +665,22 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: over10k_orc_bucketed
- Statistics: Num rows: 1241 Data size: 710230 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1247 Data size: 713720 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
outputColumnNames: ROW__ID
- Statistics: Num rows: 1241 Data size: 710230 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1247 Data size: 713720 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
keys: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 620 Data size: 52080 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 623 Data size: 52332 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
sort order: +
Map-reduce partition columns: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
- Statistics: Num rows: 620 Data size: 52080 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 623 Data size: 52332 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint)
Execution mode: llap
LLAP IO: may be used (ACID table)
@@ -692,13 +692,13 @@ STAGE PLANS:
keys: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 620 Data size: 52080 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 623 Data size: 52332 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (_col1 > 1L) (type: boolean)
- Statistics: Num rows: 206 Data size: 17304 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 207 Data size: 17388 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 206 Data size: 17304 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 207 Data size: 17388 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/13fbae57/ql/src/test/results/clientpositive/tez/acid_vectorization_original_tez.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/acid_vectorization_original_tez.q.out b/ql/src/test/results/clientpositive/tez/acid_vectorization_original_tez.q.out
index 01ec132..3c9cf03 100644
--- a/ql/src/test/results/clientpositive/tez/acid_vectorization_original_tez.q.out
+++ b/ql/src/test/results/clientpositive/tez/acid_vectorization_original_tez.q.out
@@ -680,22 +680,22 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: over10k_orc_bucketed_n0
- Statistics: Num rows: 1241 Data size: 710230 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1247 Data size: 713720 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
outputColumnNames: ROW__ID
- Statistics: Num rows: 1241 Data size: 710230 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1247 Data size: 713720 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
keys: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 620 Data size: 52080 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 623 Data size: 52332 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
sort order: +
Map-reduce partition columns: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
- Statistics: Num rows: 620 Data size: 52080 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 623 Data size: 52332 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint)
Reducer 2
Reduce Operator Tree:
@@ -704,13 +704,13 @@ STAGE PLANS:
keys: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 620 Data size: 52080 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 623 Data size: 52332 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (_col1 > 1L) (type: boolean)
- Statistics: Num rows: 206 Data size: 17304 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 207 Data size: 17388 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 206 Data size: 17304 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 207 Data size: 17388 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/13fbae57/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java
index c3d99c3..56da115 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java
@@ -29,6 +29,7 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.hive.metastore.api.InitializeTableWriteIdsRequest;
import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
@@ -39,6 +40,7 @@ import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
import org.apache.hadoop.hive.metastore.events.PreAlterTableEvent;
import org.apache.hadoop.hive.metastore.events.PreCreateTableEvent;
import org.apache.hadoop.hive.metastore.events.PreEventContext;
+import org.apache.hadoop.hive.metastore.txn.TxnStore;
import org.apache.hadoop.hive.metastore.txn.TxnUtils;
import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
import org.slf4j.Logger;
@@ -190,6 +192,15 @@ public final class TransactionalValidationListener extends MetaStorePreEventList
}
}
checkSorted(newTable);
+ if(TxnUtils.isAcidTable(newTable) && !TxnUtils.isAcidTable(oldTable)) {
+ /* we just made an existing table full acid which wasn't acid before and it passed all checks
+ initialize the Write ID sequence so that we can handle assigning ROW_IDs to 'original'
+ files already present in the table. */
+ TxnStore t = TxnUtils.getTxnStore(getConf());
+ //For now assume no partition may have > 10M files. Perhaps better to count them.
+ t.seedWriteIdOnAcidConversion(new InitializeTableWriteIdsRequest(newTable.getDbName(),
+ newTable.getTableName(), 10000000));
+ }
}
private void checkSorted(Table newTable) throws MetaException {
http://git-wip-us.apache.org/repos/asf/hive/blob/13fbae57/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/api/InitializeTableWriteIdsRequest.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/api/InitializeTableWriteIdsRequest.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/api/InitializeTableWriteIdsRequest.java
new file mode 100644
index 0000000..d56b66a
--- /dev/null
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/api/InitializeTableWriteIdsRequest.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.metastore.api;
+
+public class InitializeTableWriteIdsRequest {
+ private final String dbName;
+ private final String tblName;
+ private final long seeWriteId;
+ public InitializeTableWriteIdsRequest(String dbName, String tblName, long seeWriteId) {
+ assert dbName != null;
+ assert tblName != null;
+ assert seeWriteId > 1;
+ this.dbName = dbName;
+ this.tblName = tblName;
+ this.seeWriteId = seeWriteId;
+ }
+ public String getDbName() {
+ return dbName;
+ }
+ public String getTblName() {
+ return tblName;
+ }
+
+ public long getSeeWriteId() {
+ return seeWriteId;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/13fbae57/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
index d1b0d32..f25e77a 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
@@ -67,54 +67,7 @@ import org.apache.hadoop.hive.metastore.MaterializationsRebuildLockHandler;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.MetaStoreListenerNotifier;
import org.apache.hadoop.hive.metastore.TransactionalMetaStoreEventListener;
-import org.apache.hadoop.hive.metastore.api.AbortTxnRequest;
-import org.apache.hadoop.hive.metastore.api.AbortTxnsRequest;
-import org.apache.hadoop.hive.metastore.api.AddDynamicPartitions;
-import org.apache.hadoop.hive.metastore.api.AllocateTableWriteIdsRequest;
-import org.apache.hadoop.hive.metastore.api.AllocateTableWriteIdsResponse;
-import org.apache.hadoop.hive.metastore.api.BasicTxnInfo;
-import org.apache.hadoop.hive.metastore.api.CheckLockRequest;
-import org.apache.hadoop.hive.metastore.api.CommitTxnRequest;
-import org.apache.hadoop.hive.metastore.api.CompactionRequest;
-import org.apache.hadoop.hive.metastore.api.CompactionResponse;
-import org.apache.hadoop.hive.metastore.api.CompactionType;
-import org.apache.hadoop.hive.metastore.api.DataOperationType;
-import org.apache.hadoop.hive.metastore.api.Database;
-import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse;
-import org.apache.hadoop.hive.metastore.api.GetOpenTxnsResponse;
-import org.apache.hadoop.hive.metastore.api.GetValidWriteIdsRequest;
-import org.apache.hadoop.hive.metastore.api.GetValidWriteIdsResponse;
-import org.apache.hadoop.hive.metastore.api.HeartbeatRequest;
-import org.apache.hadoop.hive.metastore.api.HeartbeatTxnRangeRequest;
-import org.apache.hadoop.hive.metastore.api.HeartbeatTxnRangeResponse;
-import org.apache.hadoop.hive.metastore.api.HiveObjectType;
-import org.apache.hadoop.hive.metastore.api.LockComponent;
-import org.apache.hadoop.hive.metastore.api.LockRequest;
-import org.apache.hadoop.hive.metastore.api.LockResponse;
-import org.apache.hadoop.hive.metastore.api.LockState;
-import org.apache.hadoop.hive.metastore.api.LockType;
-import org.apache.hadoop.hive.metastore.api.MetaException;
-import org.apache.hadoop.hive.metastore.api.NoSuchLockException;
-import org.apache.hadoop.hive.metastore.api.NoSuchTxnException;
-import org.apache.hadoop.hive.metastore.api.OpenTxnRequest;
-import org.apache.hadoop.hive.metastore.api.OpenTxnsResponse;
-import org.apache.hadoop.hive.metastore.api.Partition;
-import org.apache.hadoop.hive.metastore.api.ReplTblWriteIdStateRequest;
-import org.apache.hadoop.hive.metastore.api.ShowCompactRequest;
-import org.apache.hadoop.hive.metastore.api.ShowCompactResponse;
-import org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement;
-import org.apache.hadoop.hive.metastore.api.ShowLocksRequest;
-import org.apache.hadoop.hive.metastore.api.ShowLocksResponse;
-import org.apache.hadoop.hive.metastore.api.ShowLocksResponseElement;
-import org.apache.hadoop.hive.metastore.api.Table;
-import org.apache.hadoop.hive.metastore.api.TableValidWriteIds;
-import org.apache.hadoop.hive.metastore.api.TxnAbortedException;
-import org.apache.hadoop.hive.metastore.api.TxnInfo;
-import org.apache.hadoop.hive.metastore.api.TxnOpenException;
-import org.apache.hadoop.hive.metastore.api.TxnState;
-import org.apache.hadoop.hive.metastore.api.TxnToWriteId;
-import org.apache.hadoop.hive.metastore.api.UnlockRequest;
+import org.apache.hadoop.hive.metastore.api.*;
import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars;
import org.apache.hadoop.hive.metastore.datasource.DataSourceProvider;
@@ -1537,7 +1490,50 @@ abstract class TxnHandler implements TxnStore, TxnStore.MutexAPI {
return allocateTableWriteIds(rqst);
}
}
+ @Override
+ public void seedWriteIdOnAcidConversion(InitializeTableWriteIdsRequest rqst)
+ throws MetaException {
+ try {
+ Connection dbConn = null;
+ Statement stmt = null;
+ TxnStore.MutexAPI.LockHandle handle = null;
+ try {
+ lockInternal();
+ dbConn = getDbConn(Connection.TRANSACTION_READ_COMMITTED);
+ stmt = dbConn.createStatement();
+
+ handle = getMutexAPI().acquireLock(MUTEX_KEY.WriteIdAllocator.name());
+ //since this is on conversion from non-acid to acid, NEXT_WRITE_ID should not have an entry
+ //for this table. It also has a unique index in case 'should not' is violated
+
+ // First allocation of write id should add the table to the next_write_id meta table
+ // The initial value for write id should be 1 and hence we add 1 with number of write ids
+ // allocated here
+ String s = "insert into NEXT_WRITE_ID (nwi_database, nwi_table, nwi_next) values ("
+ + quoteString(rqst.getDbName()) + "," + quoteString(rqst.getTblName()) + "," +
+ Long.toString(rqst.getSeeWriteId() + 1) + ")";
+ LOG.debug("Going to execute insert <" + s + ">");
+ stmt.execute(s);
+ LOG.debug("Going to commit");
+ dbConn.commit();
+ } catch (SQLException e) {
+ LOG.debug("Going to rollback");
+ rollbackDBConn(dbConn);
+ checkRetryable(dbConn, e, "seedWriteIdOnAcidConversion(" + rqst + ")");
+ throw new MetaException("Unable to update transaction database "
+ + StringUtils.stringifyException(e));
+ } finally {
+ close(null, stmt, dbConn);
+ if(handle != null) {
+ handle.releaseLocks();
+ }
+ unlockInternal();
+ }
+ } catch (RetryException e) {
+ seedWriteIdOnAcidConversion(rqst);
+ }
+ }
@Override
@RetrySemantics.SafeToRetry
public void performWriteSetGC() {
http://git-wip-us.apache.org/repos/asf/hive/blob/13fbae57/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnStore.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnStore.java
index 4695f0d..ef447e1 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnStore.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnStore.java
@@ -154,6 +154,12 @@ public interface TxnStore extends Configurable {
throws NoSuchTxnException, TxnAbortedException, MetaException;
/**
+ * Called on conversion of existing table to full acid. Sets initial write ID to a high
+ * enough value so that we can assign unique ROW__IDs to data in existing files.
+ */
+ void seedWriteIdOnAcidConversion(InitializeTableWriteIdsRequest rqst) throws MetaException;
+
+ /**
* Obtain a lock.
* @param rqst information on the lock to obtain. If the requester is part of a transaction
* the txn information must be included in the lock request.