You are viewing a plain text version of this content. The canonical link for it is here.
Posted to gitbox@hive.apache.org by GitBox <gi...@apache.org> on 2022/09/06 13:04:05 UTC

[GitHub] [hive] lcspinter commented on a diff in pull request #3552: HIVE-26498: Implement MV maintenance with Iceberg sources using full rebuild

lcspinter commented on code in PR #3552:
URL: https://github.com/apache/hive/pull/3552#discussion_r963624074


##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java:
##########
@@ -977,4 +977,11 @@ public Configuration get() {
       return conf;
     }
   }
+
+  @Override
+  public String getCurrentSnapshot(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {

Review Comment:
   Why cannot we use long as return type? 



##########
ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java:
##########
@@ -489,4 +489,13 @@ default void validateSinkDesc(FileSinkDesc sinkDesc) throws SemanticException {
    */
   default void executeOperation(org.apache.hadoop.hive.ql.metadata.Table table, AlterTableExecuteSpec executeSpec) {
   }
+
+  /**
+   * Query the unique snapshot id of the passed table.
+   * @param table - {@link org.apache.hadoop.hive.ql.metadata.Table} which snapshot id should be returned.
+   * @return String representation of the snapshotId or null if not supported.
+   */
+  default String getCurrentSnapshot(org.apache.hadoop.hive.ql.metadata.Table table) {

Review Comment:
   I would change this to `getCurrentSnapshotId`



##########
ql/src/java/org/apache/hadoop/hive/ql/metadata/MaterializedViewMetadata.java:
##########
@@ -32,16 +36,19 @@
 import static java.util.Collections.unmodifiableSet;
 
 public class MaterializedViewMetadata {
+  private static final Logger LOG = LoggerFactory.getLogger(MaterializedViewMetadata.class);
+
   final CreationMetadata creationMetadata;
 
   MaterializedViewMetadata(CreationMetadata creationMetadata) {
     this.creationMetadata = creationMetadata;
   }
 
   public MaterializedViewMetadata(
-          String catalogName, String dbName, String mvName, Set<SourceTable> sourceTables, String validTxnList) {
+          String catalogName, String dbName, String mvName, Set<SourceTable> sourceTables,
+          MaterializationSnapshot snapshot) {
     this.creationMetadata = new CreationMetadata(catalogName, dbName, mvName, toFullTableNames(sourceTables));
-    this.creationMetadata.setValidTxnList(validTxnList);
+    this.creationMetadata.setValidTxnList(snapshot.asJsonString());

Review Comment:
   This is not a List anymore, but rather an object. Maybe we should consider renaming it. 



##########
ql/src/java/org/apache/hadoop/hive/ql/metadata/MaterializedViewMetadata.java:
##########
@@ -81,15 +88,20 @@ public Collection<SourceTable> getSourceTables() {
     return unmodifiableList(creationMetadata.getSourceTables());
   }
 
-  public String getValidTxnList() {
-    return creationMetadata.getValidTxnList();
+  public MaterializationSnapshot getSnapshot() {

Review Comment:
   Javadoc. Please cover what happens in case of native ACID table and non-native table. 



##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewUtils.java:
##########
@@ -98,7 +105,22 @@ public static Table extractTable(RelOptMaterialization materialization) {
    * materialized view definition uses external tables.
    */
   public static Boolean isOutdatedMaterializedView(
-      String validTxnsList, HiveTxnManager txnMgr,
+          String validTxnsList, HiveTxnManager txnMgr, Hive db,
+          Set<TableName> tablesUsed, Table materializedViewTable) throws HiveException {
+
+    MaterializedViewMetadata mvMetadata = materializedViewTable.getMVMetadata();
+    MaterializationSnapshot snapshot = mvMetadata.getSnapshot();
+
+    if (snapshot.getTableSnapshots() != null && !snapshot.getTableSnapshots().isEmpty()) {

Review Comment:
   snapshot can be null



##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java:
##########
@@ -977,4 +977,11 @@ public Configuration get() {
       return conf;
     }
   }
+
+  @Override
+  public String getCurrentSnapshot(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
+    TableDesc tableDesc = Utilities.getTableDesc(hmsTable);
+    Table table = IcebergTableUtil.getTable(conf, tableDesc.getProperties());
+    return Long.toString(table.currentSnapshot().sequenceNumber());

Review Comment:
   `table.currentSnashot()` can be null, if the table is empty.



##########
ql/src/java/org/apache/hadoop/hive/ql/metadata/MaterializedViewMetadata.java:
##########
@@ -81,15 +88,20 @@ public Collection<SourceTable> getSourceTables() {
     return unmodifiableList(creationMetadata.getSourceTables());
   }
 
-  public String getValidTxnList() {
-    return creationMetadata.getValidTxnList();
+  public MaterializationSnapshot getSnapshot() {
+    if (creationMetadata.getValidTxnList() == null || creationMetadata.getValidTxnList().isEmpty()) {
+      LOG.debug("Could not obtain materialization snapshot of materialized view {}.{}",
+          creationMetadata.getDbName(), creationMetadata.getDbName());

Review Comment:
   `getDbName` is passed twice



##########
storage-api/src/java/org/apache/hadoop/hive/common/MaterializationSnapshot.java:
##########
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import java.io.IOException;
+import java.io.StringWriter;
+import java.io.UncheckedIOException;
+import java.io.Writer;
+import java.util.Map;
+
+/**
+ * Class to store snapshot data of Materialized view source tables.
+ * The data represents the state of the source tables when the view was created/last rebuilt.
+ */
+public class MaterializationSnapshot {
+
+  public static MaterializationSnapshot fromJson(String jsonString) {
+    try {
+      return new ObjectMapper().readValue(jsonString, MaterializationSnapshot.class);
+    } catch (JsonProcessingException e) {
+      // this is not a jsonString, fall back to treating it as ValidTxnWriteIdList
+      return new MaterializationSnapshot(jsonString, null);
+    }
+  }
+
+  // snapshot of native ACID tables
+  private String validTxnList;
+  // snapshot of non-native ACID tables. Key is the fully qualified name of the table.

Review Comment:
   Materialized views are only supported for iceberg V2 tables? If not, we should change this java doc.



##########
storage-api/src/java/org/apache/hadoop/hive/common/MaterializationSnapshot.java:
##########
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import java.io.IOException;
+import java.io.StringWriter;
+import java.io.UncheckedIOException;
+import java.io.Writer;
+import java.util.Map;
+
+/**
+ * Class to store snapshot data of Materialized view source tables.
+ * The data represents the state of the source tables when the view was created/last rebuilt.
+ */
+public class MaterializationSnapshot {
+
+  public static MaterializationSnapshot fromJson(String jsonString) {
+    try {
+      return new ObjectMapper().readValue(jsonString, MaterializationSnapshot.class);
+    } catch (JsonProcessingException e) {
+      // this is not a jsonString, fall back to treating it as ValidTxnWriteIdList
+      return new MaterializationSnapshot(jsonString, null);
+    }
+  }
+
+  // snapshot of native ACID tables
+  private String validTxnList;
+  // snapshot of non-native ACID tables. Key is the fully qualified name of the table.
+  // Value is the unique id of the snapshot provided by the table's storage HiveStorageHandler
+  private Map<String, String> tableSnapshots;
+
+  private MaterializationSnapshot() {
+  }
+
+  public MaterializationSnapshot(String validTxnList, Map<String, String> tableSnapshots) {
+    this.validTxnList = validTxnList;
+    this.tableSnapshots = tableSnapshots;
+  }
+
+  public String asJsonString() {

Review Comment:
   Few lines of javadoc wouldn't hurt



##########
iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_orc.q:
##########
@@ -0,0 +1,35 @@
+-- SORT_QUERY_RESULTS
+
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+
+create external table tbl_ice(a int, b string, c int) stored by iceberg stored as orc tblproperties ('format-version'='2');
+
+insert into tbl_ice values (1, 'one', 50), (2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54);
+
+select * from tbl_ice;
+
+
+create materialized view mat1 as
+select b, c from tbl_ice where c > 52;

Review Comment:
   How does this work with time travel queries? We should add a few tests covering that as well.



##########
storage-api/src/java/org/apache/hadoop/hive/common/MaterializationSnapshot.java:
##########
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import java.io.IOException;
+import java.io.StringWriter;
+import java.io.UncheckedIOException;
+import java.io.Writer;
+import java.util.Map;
+
+/**
+ * Class to store snapshot data of Materialized view source tables.
+ * The data represents the state of the source tables when the view was created/last rebuilt.
+ */
+public class MaterializationSnapshot {
+
+  public static MaterializationSnapshot fromJson(String jsonString) {
+    try {
+      return new ObjectMapper().readValue(jsonString, MaterializationSnapshot.class);
+    } catch (JsonProcessingException e) {
+      // this is not a jsonString, fall back to treating it as ValidTxnWriteIdList
+      return new MaterializationSnapshot(jsonString, null);
+    }
+  }
+
+  // snapshot of native ACID tables
+  private String validTxnList;
+  // snapshot of non-native ACID tables. Key is the fully qualified name of the table.
+  // Value is the unique id of the snapshot provided by the table's storage HiveStorageHandler
+  private Map<String, String> tableSnapshots;
+
+  private MaterializationSnapshot() {
+  }
+
+  public MaterializationSnapshot(String validTxnList, Map<String, String> tableSnapshots) {

Review Comment:
   Is `validTxnList` and `tableSnapshots` are mandatory parameters? If not, we can use two separate constructors or a builder pattern. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: gitbox-unsubscribe@hive.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: gitbox-unsubscribe@hive.apache.org
For additional commands, e-mail: gitbox-help@hive.apache.org