You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jd...@apache.org on 2018/03/27 01:02:58 UTC

hive git commit: HIVE-19017: Add util function to determine if 2 ValidWriteIdLists are at the same committed ID (Jason Dere, reviewed by Jesus Camacho Rodriguez)

Repository: hive
Updated Branches:
  refs/heads/master 42ca4d770 -> c9bb5d389


HIVE-19017: Add util function to determine if 2 ValidWriteIdLists are at the same committed ID (Jason Dere, reviewed by Jesus Camacho Rodriguez)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c9bb5d38
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c9bb5d38
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c9bb5d38

Branch: refs/heads/master
Commit: c9bb5d38925b4a5052ca7fc23527fd2571f5a2ff
Parents: 42ca4d7
Author: Jason Dere <jd...@hortonworks.com>
Authored: Mon Mar 26 18:02:05 2018 -0700
Committer: Jason Dere <jd...@hortonworks.com>
Committed: Mon Mar 26 18:02:05 2018 -0700

----------------------------------------------------------------------
 .../org/apache/hive/common/util/TxnIdUtils.java |  78 +++++++++++++
 .../apache/hive/common/util/TestTxnIdUtils.java | 117 +++++++++++++++++++
 2 files changed, 195 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/c9bb5d38/storage-api/src/java/org/apache/hive/common/util/TxnIdUtils.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hive/common/util/TxnIdUtils.java b/storage-api/src/java/org/apache/hive/common/util/TxnIdUtils.java
new file mode 100644
index 0000000..17f3777
--- /dev/null
+++ b/storage-api/src/java/org/apache/hive/common/util/TxnIdUtils.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.common.util;
+
+import org.apache.hadoop.hive.common.ValidWriteIdList;
+
+public class TxnIdUtils {
+
+  /**
+   * Check if 2 ValidWriteIdLists are at an equivalent commit point.
+   */
+  public static boolean checkEquivalentWriteIds(ValidWriteIdList a, ValidWriteIdList b) {
+    if (!a.getTableName().equalsIgnoreCase(b.getTableName())) {
+      return false;
+    }
+    ValidWriteIdList newer = a;
+    ValidWriteIdList older = b;
+    if (a.getHighWatermark() < b.getHighWatermark()) {
+      newer = b;
+      older = a;
+    }
+
+    return checkEquivalentCommittedIds(
+        older.getHighWatermark(), older.getInvalidWriteIds(),
+        newer.getHighWatermark(), newer.getInvalidWriteIds());
+  }
+
+  /**
+   * Check the min open ID/highwater mark/exceptions list to see if 2 ID lists are at the same commit point.
+   * This can also be used for ValidTxnList as well as ValidWriteIdList.
+   */
+  private static boolean checkEquivalentCommittedIds(
+      long oldHWM, long[] oldInvalidIds,
+      long newHWM, long[] newInvalidIds) {
+
+    // There should be no valid txns in newer list that are not also in older.
+    // - All values in oldInvalidIds should also be in newInvalidIds.
+    // - if oldHWM < newHWM, then all IDs between oldHWM .. newHWM should exist in newInvalidTxns.
+    //   A Gap in the sequence means a committed txn in newer list (lists are not equivalent)
+
+    if (newInvalidIds.length < oldInvalidIds.length) {
+      return false;
+    }
+
+    // Check that the values in the older list are also in newer. Lists should already be sorted.
+    for (int idx = 0; idx < oldInvalidIds.length; ++idx) {
+      if (oldInvalidIds[idx] != newInvalidIds[idx]) {
+        return false;
+      }
+    }
+
+    // If older committed state is equivalent to newer state, then there should be no committed IDs
+    // between oldHWM and newHWM, and newInvalidIds should have exactly (newHWM - oldHWM)
+    // more entries than oldInvalidIds.
+    long oldNewListSizeDifference = newInvalidIds.length - oldInvalidIds.length;
+    long oldNewHWMDifference = newHWM - oldHWM;
+    if (oldNewHWMDifference != oldNewListSizeDifference) {
+      return false;
+    }
+
+    return true;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/c9bb5d38/storage-api/src/test/org/apache/hive/common/util/TestTxnIdUtils.java
----------------------------------------------------------------------
diff --git a/storage-api/src/test/org/apache/hive/common/util/TestTxnIdUtils.java b/storage-api/src/test/org/apache/hive/common/util/TestTxnIdUtils.java
new file mode 100644
index 0000000..3d8f329
--- /dev/null
+++ b/storage-api/src/test/org/apache/hive/common/util/TestTxnIdUtils.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.common.util;
+
+import java.util.BitSet;
+
+import org.apache.hadoop.hive.common.ValidReaderWriteIdList;
+import org.junit.Test;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+public class TestTxnIdUtils {
+
+  @Test
+  public void testCheckEquivalentWriteIds() throws Exception {
+    ValidReaderWriteIdList id1 = new ValidReaderWriteIdList("default.table1",
+        new long[] {1,2,3,4,5}, new BitSet(), 5, 1);
+    assertTrue(TxnIdUtils.checkEquivalentWriteIds(id1, id1));
+
+    // write ID with additional uncommitted IDs. Should match.
+    ValidReaderWriteIdList id2 = new ValidReaderWriteIdList("default.table1",
+        new long[] {1,2,3,4,5,6,7,8}, new BitSet(), 8, 1);
+    assertTrue(TxnIdUtils.checkEquivalentWriteIds(id1, id2));
+    assertTrue(TxnIdUtils.checkEquivalentWriteIds(id2, id1));
+
+    // ID 1 has been committed, all others open
+    ValidReaderWriteIdList id3 = new ValidReaderWriteIdList("default.table1",
+        new long[] {2,3,4,5,6,7,8}, new BitSet(), 8, 2);
+    assertFalse(TxnIdUtils.checkEquivalentWriteIds(id1, id3));
+    assertFalse(TxnIdUtils.checkEquivalentWriteIds(id3, id2));
+
+    // ID 5 has been committed, all others open
+    ValidReaderWriteIdList id4 = new ValidReaderWriteIdList("default.table1",
+        new long[] {1,2,3,4,6,7,8}, new BitSet(), 8, 1);
+    assertFalse(TxnIdUtils.checkEquivalentWriteIds(id1, id4));
+    assertFalse(TxnIdUtils.checkEquivalentWriteIds(id4, id2));
+
+    // ID 8 was committed, all others open
+    ValidReaderWriteIdList id5 = new ValidReaderWriteIdList("default.table1",
+        new long[] {1,2,3,4,6,7}, new BitSet(), 8, 1);
+    assertFalse(TxnIdUtils.checkEquivalentWriteIds(id1, id5));
+    assertFalse(TxnIdUtils.checkEquivalentWriteIds(id5, id2));
+
+    // Different table name
+    ValidReaderWriteIdList id6 = new ValidReaderWriteIdList("default.tab2",
+        new long[] {1,2,3,4,5}, new BitSet(), 5, 1);
+    assertFalse(TxnIdUtils.checkEquivalentWriteIds(id1, id6));
+
+    // WriteID for table1, way in the future
+    ValidReaderWriteIdList id7 = new ValidReaderWriteIdList("default.table1",
+        new long[] {100, 101, 105}, new BitSet(), 105, 100);
+    assertFalse(TxnIdUtils.checkEquivalentWriteIds(id1, id7));
+
+    // Before any activity on the table, no open IDs
+    ValidReaderWriteIdList id8 = new ValidReaderWriteIdList("default.table1",
+        new long[] {}, new BitSet(), 0);
+    assertTrue(TxnIdUtils.checkEquivalentWriteIds(id1, id8));
+    assertTrue(TxnIdUtils.checkEquivalentWriteIds(id8, id2));
+    assertFalse(TxnIdUtils.checkEquivalentWriteIds(id8, id3));
+    assertFalse(TxnIdUtils.checkEquivalentWriteIds(id8, id7));
+
+    // ID 5 committed, no open IDs
+    ValidReaderWriteIdList id9 = new ValidReaderWriteIdList("default.table1",
+        new long[] {}, new BitSet(), 5);
+    ValidReaderWriteIdList id10 = new ValidReaderWriteIdList("default.table1",
+        new long[] {}, new BitSet(), 5);
+    assertTrue(TxnIdUtils.checkEquivalentWriteIds(id9, id10));
+    assertFalse(TxnIdUtils.checkEquivalentWriteIds(id8, id9));
+    assertFalse(TxnIdUtils.checkEquivalentWriteIds(id9, id8));
+    assertFalse(TxnIdUtils.checkEquivalentWriteIds(id9, id1));
+    assertFalse(TxnIdUtils.checkEquivalentWriteIds(id9, id2));
+    assertFalse(TxnIdUtils.checkEquivalentWriteIds(id9, id7));
+  }
+
+  @Test
+  public void testCheckEquivalentWriteIds2() throws Exception {
+    ValidReaderWriteIdList id1 = new ValidReaderWriteIdList("default.table1",
+        new long[] {}, new BitSet(), 5);
+    ValidReaderWriteIdList id2 = new ValidReaderWriteIdList("default.table1",
+        new long[] {6}, BitSet.valueOf(new byte[]{1}), 6);
+    ValidReaderWriteIdList id3 = new ValidReaderWriteIdList("default.table1",
+        new long[] {6,7}, BitSet.valueOf(new byte[]{1,0}), 7, 7);
+    ValidReaderWriteIdList id4 = new ValidReaderWriteIdList("default.table1",
+        new long[] {6,7,8}, BitSet.valueOf(new byte[]{1,0,1}), 8, 7);
+
+    assertTrue(TxnIdUtils.checkEquivalentWriteIds(id1, id2));
+    assertTrue(TxnIdUtils.checkEquivalentWriteIds(id1, id3));
+    assertTrue(TxnIdUtils.checkEquivalentWriteIds(id1, id4));
+    assertTrue(TxnIdUtils.checkEquivalentWriteIds(id2, id3));
+    assertTrue(TxnIdUtils.checkEquivalentWriteIds(id2, id4));
+
+    // If IDs 6,7,8 were all aborted and the metadata cleaned up, we would lose the record
+    // of the aborted IDs. In this case we are not able to determine the new WriteIDList has
+    // an equivalent commit state compared to the previous WriteIDLists.
+    ValidReaderWriteIdList id5 = new ValidReaderWriteIdList("default.table1",
+        new long[] {}, new BitSet(), 8);
+    assertFalse(TxnIdUtils.checkEquivalentWriteIds(id1, id5));
+    assertFalse(TxnIdUtils.checkEquivalentWriteIds(id2, id5));
+    assertFalse(TxnIdUtils.checkEquivalentWriteIds(id3, id5));
+    assertFalse(TxnIdUtils.checkEquivalentWriteIds(id4, id5));
+  }
+}