You are viewing a plain text version of this content. The canonical link for it is here.
Posted to gitbox@hive.apache.org by GitBox <gi...@apache.org> on 2020/08/10 14:42:31 UTC

[GitHub] [hive] klcopp commented on a change in pull request #1384: HIVE-24021: Read insert-only tables truncated by Impala correctly

klcopp commented on a change in pull request #1384:
URL: https://github.com/apache/hive/pull/1384#discussion_r467952862



##########
File path: ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommandsForMmTable.java
##########
@@ -481,6 +482,61 @@ public void testOperationsOnCompletedTxnComponentsForMmTable() throws Exception
     verifyDirAndResult(0, true);
   }
 
+  /**
+   * Impala truncates insert-only tables by writing a base directory (like insert overwrite) containing an empty file
+   * named "_empty". Generally in Hive files beginning with an underscore are hidden, so here we make sure that Hive
+   * reads these bases correctly.
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testImpalaTruncatedMmTable() throws Exception {
+    FileSystem fs = FileSystem.get(hiveConf);
+    FileStatus[] status;
+
+    Path tblLocation = new Path(TEST_WAREHOUSE_DIR + "/" +
+        (TableExtended.MMTBL).toString().toLowerCase());
+
+    // 1. Insert two rows to an MM table
+    runStatementOnDriver("insert into " + TableExtended.MMTBL + "(a,b) values(1,2)");
+    runStatementOnDriver("insert into " + TableExtended.MMTBL + "(a,b) values(3,4)");
+    status = fs.listStatus(tblLocation, FileUtils.STAGING_DIR_PATH_FILTER);
+    // There should be 2 delta dirs in the location
+    Assert.assertEquals(2, status.length);
+    for (int i = 0; i < status.length; i++) {
+      Assert.assertTrue(status[i].getPath().getName().matches("delta_.*"));
+    }
+
+    // 2. Simulate Impala truncating the table: write a base dir (base_0000003) containing an empty file.
+    // Hive will name the empty file "000000_0"
+    runStatementOnDriver("insert overwrite  table " + TableExtended.MMTBL + " select * from "
+        + TableExtended.MMTBL + " where 1=2");
+    status = fs.listStatus(tblLocation, FileUtils.STAGING_DIR_PATH_FILTER);
+    // There should be 2 delta dirs, plus 1 base dir in the location
+    Assert.assertEquals(3, status.length);
+    int baseCount = 0;
+    int deltaCount = 0;
+    for (int i = 0; i < status.length; i++) {
+      String dirName = status[i].getPath().getName();
+      if (dirName.matches("delta_.*")) {
+        deltaCount++;
+      } else {
+        baseCount++;
+      }
+    }
+    Assert.assertEquals(2, deltaCount);
+    Assert.assertEquals(1, baseCount);
+
+    // rename empty file to "_empty"
+    Path basePath = new Path(tblLocation, "base_0000003");
+    Assert.assertTrue("Rename failed",
+        fs.rename(new Path(basePath, "000000_0"), new Path(basePath, "_empty")));
+
+    // 3. Verify query result. Selecting from a truncated table should return nothing.
+    List<String> rs = runStatementOnDriver("select a,b from " + TableExtended.MMTBL + " order by a,b");
+    Assert.assertEquals(Collections.emptyList(), rs);
+  }

Review comment:
       Great idea!




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: gitbox-unsubscribe@hive.apache.org
For additional commands, e-mail: gitbox-help@hive.apache.org