You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by GitBox <gi...@apache.org> on 2020/04/08 04:00:08 UTC

[GitHub] [incubator-hudi] bvaradar opened a new pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

bvaradar opened a new pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495
 
 
   [HUDI-770] Organize upsert/insert API implementation under a single package

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] bvaradar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
bvaradar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#discussion_r407252828
 
 

 ##########
 File path: hudi-common/src/main/java/org/apache/hudi/common/model/WriteOperationType.java
 ##########
 @@ -27,23 +27,25 @@
  */
 public enum WriteOperationType {
   // directly insert
-  INSERT("insert"),
-  INSERT_PREPPED("insert_prepped"),
+  INSERT("insert", false),
+  INSERT_PREPPED("insert_prepped", false),
   // update and insert
-  UPSERT("upsert"),
-  UPSERT_PREPPED("upsert_prepped"),
+  UPSERT("upsert", true),
+  UPSERT_PREPPED("upsert_prepped", true),
   // bulk insert
-  BULK_INSERT("bulk_insert"),
-  BULK_INSERT_PREPPED("bulk_insert_prepped"),
+  BULK_INSERT("bulk_insert", false),
+  BULK_INSERT_PREPPED("bulk_insert_prepped", false),
   // delete
-  DELETE("delete"),
+  DELETE("delete", true),
   // used for old version
-  UNKNOWN("unknown");
+  UNKNOWN("unknown", false);
 
   private final String value;
+  private final boolean isUpsert;
 
-  WriteOperationType(String value) {
+  WriteOperationType(String value, boolean isUpsert) {
 
 Review comment:
   Done

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] bvaradar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
bvaradar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#discussion_r407252832
 
 

 ##########
 File path: hudi-client/src/main/java/org/apache/hudi/table/action/commit/AbstractBaseCommitActionExecutor.java
 ##########
 @@ -0,0 +1,290 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.table.action.commit;
+
+import org.apache.hudi.client.SparkTaskContextSupplier;
+import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.client.utils.SparkConfigUtils;
+import org.apache.hudi.common.model.HoodieCommitMetadata;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.common.model.HoodieWriteStat;
+import org.apache.hudi.common.model.WriteOperationType;
+import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.table.timeline.HoodieInstant.State;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.exception.HoodieCommitException;
+import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hudi.exception.HoodieUpsertException;
+import org.apache.hudi.table.HoodieTable;
+import org.apache.hudi.table.WorkloadProfile;
+import org.apache.hudi.table.WorkloadStat;
+import org.apache.hudi.table.action.BaseActionExecutor;
+
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.spark.Partitioner;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.storage.StorageLevel;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.time.Duration;
+import java.time.Instant;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import scala.Tuple2;
+
+public abstract class AbstractBaseCommitActionExecutor<T extends HoodieRecordPayload<T>>
 
 Review comment:
   Done

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] codecov-io edited a comment on issue #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
codecov-io edited a comment on issue #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#issuecomment-610761048
 
 
   # [Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=h1) Report
   > Merging [#1495](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=desc) into [master](https://codecov.io/gh/apache/incubator-hudi/commit/c0f96e072650d39433929c6efe3bc0b2cd882a39&el=desc) will **decrease** coverage by `0.02%`.
   > The diff coverage is `81.48%`.
   
   [![Impacted file tree graph](https://codecov.io/gh/apache/incubator-hudi/pull/1495/graphs/tree.svg?width=650&height=150&src=pr&token=VTTXabwbs2)](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=tree)
   
   ```diff
   @@             Coverage Diff              @@
   ##             master    #1495      +/-   ##
   ============================================
   - Coverage     72.25%   72.22%   -0.03%     
     Complexity      289      289              
   ============================================
     Files           338      365      +27     
     Lines         15946    16225     +279     
     Branches       1624     1629       +5     
   ============================================
   + Hits          11521    11719     +198     
   - Misses         3697     3774      +77     
   - Partials        728      732       +4     
   ```
   
   
   | [Impacted Files](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=tree) | Coverage Δ | Complexity Δ | |
   |---|---|---|---|
   | [...c/main/java/org/apache/hudi/table/HoodieTable.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvSG9vZGllVGFibGUuamF2YQ==) | `79.64% <ø> (ø)` | `0.00 <0.00> (ø)` | |
   | [...g/apache/hudi/table/action/BaseActionExecutor.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL0Jhc2VBY3Rpb25FeGVjdXRvci5qYXZh) | `100.00% <ø> (ø)` | `0.00 <0.00> (ø)` | |
   | [...e/action/commit/MergeOnReadBulkInsertExecutor.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2NvbW1pdC9NZXJnZU9uUmVhZEJ1bGtJbnNlcnRFeGVjdXRvci5qYXZh) | `0.00% <0.00%> (ø)` | `0.00 <0.00> (?)` | |
   | [...n/commit/MergeOnReadBulkInsertPreppedExecutor.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2NvbW1pdC9NZXJnZU9uUmVhZEJ1bGtJbnNlcnRQcmVwcGVkRXhlY3V0b3IuamF2YQ==) | `0.00% <0.00%> (ø)` | `0.00 <0.00> (?)` | |
   | [...ction/commit/MergeOnReadInsertPreppedExecutor.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2NvbW1pdC9NZXJnZU9uUmVhZEluc2VydFByZXBwZWRFeGVjdXRvci5qYXZh) | `0.00% <0.00%> (ø)` | `0.00 <0.00> (?)` | |
   | [...ction/commit/MergeOnReadUpsertPreppedExecutor.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2NvbW1pdC9NZXJnZU9uUmVhZFVwc2VydFByZXBwZWRFeGVjdXRvci5qYXZh) | `0.00% <0.00%> (ø)` | `0.00 <0.00> (?)` | |
   | [.../org/apache/hudi/table/HoodieMergeOnReadTable.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvSG9vZGllTWVyZ2VPblJlYWRUYWJsZS5qYXZh) | `73.33% <22.22%> (-9.80%)` | `0.00 <0.00> (ø)` | |
   | [...e/action/commit/CopyOnWriteBulkInsertExecutor.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2NvbW1pdC9Db3B5T25Xcml0ZUJ1bGtJbnNlcnRFeGVjdXRvci5qYXZh) | `55.55% <55.55%> (ø)` | `0.00 <0.00> (?)` | |
   | [...n/commit/CopyOnWriteBulkInsertPreppedExecutor.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2NvbW1pdC9Db3B5T25Xcml0ZUJ1bGtJbnNlcnRQcmVwcGVkRXhlY3V0b3IuamF2YQ==) | `55.55% <55.55%> (ø)` | `0.00 <0.00> (?)` | |
   | [.../apache/hudi/table/action/commit/DeleteHelper.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2NvbW1pdC9EZWxldGVIZWxwZXIuamF2YQ==) | `64.28% <64.28%> (ø)` | `0.00 <0.00> (?)` | |
   | ... and [57 more](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree-more) | |
   
   ------
   
   [Continue to review full report at Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=continue).
   > **Legend** - [Click here to learn more](https://docs.codecov.io/docs/codecov-delta)
   > `Δ = absolute <relative> (impact)`, `ø = not affected`, `? = missing data`
   > Powered by [Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=footer). Last update [c0f96e0...ce8cc69](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=lastupdated). Read the [comment docs](https://docs.codecov.io/docs/pull-request-comments).
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] codecov-io commented on issue #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
codecov-io commented on issue #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#issuecomment-610761048
 
 
   # [Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=h1) Report
   > Merging [#1495](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=desc) into [master](https://codecov.io/gh/apache/incubator-hudi/commit/b5d093a21bbb19f164fbc549277188f2151232a8&el=desc) will **decrease** coverage by `0.61%`.
   > The diff coverage is `89.09%`.
   
   [![Impacted file tree graph](https://codecov.io/gh/apache/incubator-hudi/pull/1495/graphs/tree.svg?width=650&height=150&src=pr&token=VTTXabwbs2)](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=tree)
   
   ```diff
   @@             Coverage Diff              @@
   ##             master    #1495      +/-   ##
   ============================================
   - Coverage     71.53%   70.92%   -0.62%     
   - Complexity      261      290      +29     
   ============================================
     Files           336      348      +12     
     Lines         15744    16291     +547     
     Branches       1610     1660      +50     
   ============================================
   + Hits          11263    11554     +291     
   - Misses         3760     3998     +238     
   - Partials        721      739      +18     
   ```
   
   
   | [Impacted Files](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=tree) | Coverage Δ | Complexity Δ | |
   |---|---|---|---|
   | [...c/main/java/org/apache/hudi/table/HoodieTable.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvSG9vZGllVGFibGUuamF2YQ==) | `79.64% <ø> (ø)` | `0.00 <0.00> (ø)` | |
   | [...g/apache/hudi/table/action/BaseActionExecutor.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL0Jhc2VBY3Rpb25FeGVjdXRvci5qYXZh) | `100.00% <ø> (ø)` | `0.00 <0.00> (ø)` | |
   | [...action/commit/CopyOnWriteCommitActionExecutor.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2NvbW1pdC9Db3B5T25Xcml0ZUNvbW1pdEFjdGlvbkV4ZWN1dG9yLmphdmE=) | `64.44% <64.44%> (ø)` | `0.00 <0.00> (?)` | |
   | [.../apache/hudi/client/AbstractHoodieWriteClient.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvY2xpZW50L0Fic3RyYWN0SG9vZGllV3JpdGVDbGllbnQuamF2YQ==) | `73.77% <66.66%> (-0.39%)` | `0.00 <0.00> (ø)` | |
   | [...ction/commit/AbstractBaseCommitActionExecutor.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2NvbW1pdC9BYnN0cmFjdEJhc2VDb21taXRBY3Rpb25FeGVjdXRvci5qYXZh) | `84.68% <84.68%> (ø)` | `0.00 <0.00> (?)` | |
   | [...action/commit/MergeOnReadCommitActionExecutor.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2NvbW1pdC9NZXJnZU9uUmVhZENvbW1pdEFjdGlvbkV4ZWN1dG9yLmphdmE=) | `88.88% <88.88%> (ø)` | `0.00 <0.00> (?)` | |
   | [...le/action/commit/MergeOnReadUpsertPartitioner.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2NvbW1pdC9NZXJnZU9uUmVhZFVwc2VydFBhcnRpdGlvbmVyLmphdmE=) | `92.15% <92.15%> (ø)` | `0.00 <0.00> (?)` | |
   | [...he/hudi/table/action/commit/UpsertPartitioner.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2NvbW1pdC9VcHNlcnRQYXJ0aXRpb25lci5qYXZh) | `94.96% <94.96%> (ø)` | `0.00 <0.00> (?)` | |
   | [...java/org/apache/hudi/client/HoodieWriteClient.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvY2xpZW50L0hvb2RpZVdyaXRlQ2xpZW50LmphdmE=) | `66.81% <100.00%> (-2.42%)` | `0.00 <0.00> (ø)` | |
   | [.../org/apache/hudi/table/HoodieCopyOnWriteTable.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvSG9vZGllQ29weU9uV3JpdGVUYWJsZS5qYXZh) | `31.41% <100.00%> (-57.87%)` | `0.00 <0.00> (ø)` | |
   | ... and [31 more](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree-more) | |
   
   ------
   
   [Continue to review full report at Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=continue).
   > **Legend** - [Click here to learn more](https://docs.codecov.io/docs/codecov-delta)
   > `Δ = absolute <relative> (impact)`, `ø = not affected`, `? = missing data`
   > Powered by [Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=footer). Last update [b5d093a...cc857d0](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=lastupdated). Read the [comment docs](https://docs.codecov.io/docs/pull-request-comments).
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] vinothchandar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
vinothchandar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#discussion_r407272545
 
 

 ##########
 File path: hudi-client/src/main/java/org/apache/hudi/table/action/commit/MergeOnReadCommitActionExecutor.java
 ##########
 @@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.table.action.commit;
+
+import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.common.model.WriteOperationType;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.exception.HoodieUpsertException;
+import org.apache.hudi.execution.MergeOnReadLazyInsertIterable;
+import org.apache.hudi.io.HoodieAppendHandle;
+import org.apache.hudi.table.HoodieTable;
+import org.apache.hudi.table.WorkloadProfile;
+
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.spark.Partitioner;
+import org.apache.spark.api.java.JavaSparkContext;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+public abstract class MergeOnReadCommitActionExecutor<T extends HoodieRecordPayload<T>>
 
 Review comment:
   classes renamed as `MergeOnReadDeltaCommitActionExecutor`?  (or we can drop the `MergeOnRead` even since delta commits only apply to MOR and the forking happens clearly above at teh HoodieTable level)

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] bvaradar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
bvaradar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#discussion_r407252830
 
 

 ##########
 File path: hudi-client/src/main/java/org/apache/hudi/table/action/commit/CommitActionResult.java
 ##########
 @@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.table.action.commit;
+
+import java.util.List;
+import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.common.model.HoodieCommitMetadata;
+import org.apache.hudi.common.model.HoodieWriteStat;
+import org.apache.hudi.common.util.Option;
+
+import org.apache.spark.api.java.JavaRDD;
+
+import java.time.Duration;
+
+/**
+ * Contains metadata, write-statuses and latency times corresponding to a commit/delta-commit action.
+ */
+public class CommitActionResult {
+
+  private JavaRDD<WriteStatus> writeStatuses;
+  private Duration indexUpdateDuration;
 
 Review comment:
   This is fixed as as part of creating separate executors for each operation.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] bvaradar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
bvaradar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#discussion_r407320570
 
 

 ##########
 File path: hudi-common/src/main/java/org/apache/hudi/common/model/WriteOperationType.java
 ##########
 @@ -69,4 +69,8 @@ public static WriteOperationType fromValue(String value) {
         throw new HoodieException("Invalid value of Type.");
     }
   }
+
+  public static boolean isUpsert(WriteOperationType operationType) {
 
 Review comment:
   Renamed to isChangingRecords. 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] vinothchandar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
vinothchandar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#discussion_r405954276
 
 

 ##########
 File path: hudi-client/src/main/java/org/apache/hudi/client/HoodieWriteClient.java
 ##########
 @@ -455,49 +454,15 @@ private void saveWorkloadProfileMetadataToInflight(WorkloadProfile profile, Hood
   }
 
   private JavaRDD<WriteStatus> upsertRecordsInternal(JavaRDD<HoodieRecord<T>> preppedRecords, String instantTime,
-      HoodieTable<T> hoodieTable, final boolean isUpsert) {
-
-    // Cache the tagged records, so we don't end up computing both
-    // TODO: Consistent contract in HoodieWriteClient regarding preppedRecord storage level handling
-    if (preppedRecords.getStorageLevel() == StorageLevel.NONE()) {
-      preppedRecords.persist(StorageLevel.MEMORY_AND_DISK_SER());
-    } else {
-      LOG.info("RDD PreppedRecords was persisted at: " + preppedRecords.getStorageLevel());
-    }
-
-    WorkloadProfile profile = null;
-    if (hoodieTable.isWorkloadProfileNeeded()) {
-      profile = new WorkloadProfile(preppedRecords);
-      LOG.info("Workload profile :" + profile);
-      saveWorkloadProfileMetadataToInflight(profile, hoodieTable, instantTime);
-    }
-
-    // partition using the insert partitioner
-    final Partitioner partitioner = getPartitioner(hoodieTable, isUpsert, profile);
-    JavaRDD<HoodieRecord<T>> partitionedRecords = partition(preppedRecords, partitioner);
-    JavaRDD<WriteStatus> writeStatusRDD = partitionedRecords.mapPartitionsWithIndex((partition, recordItr) -> {
-      if (isUpsert) {
-        return hoodieTable.handleUpsertPartition(instantTime, partition, recordItr, partitioner);
-      } else {
-        return hoodieTable.handleInsertPartition(instantTime, partition, recordItr, partitioner);
-      }
-    }, true).flatMap(List::iterator);
-
-    return updateIndexAndCommitIfNeeded(writeStatusRDD, hoodieTable, instantTime);
-  }
-
-  private Partitioner getPartitioner(HoodieTable table, boolean isUpsert, WorkloadProfile profile) {
-    if (isUpsert) {
-      return table.getUpsertPartitioner(profile, jsc);
-    } else {
-      return table.getInsertPartitioner(profile, jsc);
+      HoodieTable<T> hoodieTable) {
+    CommitActionResult result = hoodieTable.ingest(jsc, preppedRecords, instantTime, getOperationType());
 
 Review comment:
   and can we return `HoodieCommitMetadata` instead? I guess the issue is we have to pass this to the caller (deltastreamer or datasource) to decide whether to commit or not? 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] bvaradar commented on issue #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
bvaradar commented on issue #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#issuecomment-612752623
 
 
   @vinothchandar : Addressed comments. Will merge once the CI passes.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] codecov-io edited a comment on issue #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
codecov-io edited a comment on issue #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#issuecomment-610761048
 
 
   # [Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=h1) Report
   > Merging [#1495](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=desc) into [master](https://codecov.io/gh/apache/incubator-hudi/commit/b5d093a21bbb19f164fbc549277188f2151232a8&el=desc) will **decrease** coverage by `0.61%`.
   > The diff coverage is `89.09%`.
   
   [![Impacted file tree graph](https://codecov.io/gh/apache/incubator-hudi/pull/1495/graphs/tree.svg?width=650&height=150&src=pr&token=VTTXabwbs2)](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=tree)
   
   ```diff
   @@             Coverage Diff              @@
   ##             master    #1495      +/-   ##
   ============================================
   - Coverage     71.53%   70.92%   -0.62%     
   - Complexity      261      290      +29     
   ============================================
     Files           336      348      +12     
     Lines         15744    16291     +547     
     Branches       1610     1660      +50     
   ============================================
   + Hits          11263    11554     +291     
   - Misses         3760     3998     +238     
   - Partials        721      739      +18     
   ```
   
   
   | [Impacted Files](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=tree) | Coverage Δ | Complexity Δ | |
   |---|---|---|---|
   | [...c/main/java/org/apache/hudi/table/HoodieTable.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvSG9vZGllVGFibGUuamF2YQ==) | `79.64% <ø> (ø)` | `0.00 <0.00> (ø)` | |
   | [...g/apache/hudi/table/action/BaseActionExecutor.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL0Jhc2VBY3Rpb25FeGVjdXRvci5qYXZh) | `100.00% <ø> (ø)` | `0.00 <0.00> (ø)` | |
   | [...action/commit/CopyOnWriteCommitActionExecutor.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2NvbW1pdC9Db3B5T25Xcml0ZUNvbW1pdEFjdGlvbkV4ZWN1dG9yLmphdmE=) | `64.44% <64.44%> (ø)` | `0.00 <0.00> (?)` | |
   | [.../apache/hudi/client/AbstractHoodieWriteClient.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvY2xpZW50L0Fic3RyYWN0SG9vZGllV3JpdGVDbGllbnQuamF2YQ==) | `73.77% <66.66%> (-0.39%)` | `0.00 <0.00> (ø)` | |
   | [...ction/commit/AbstractBaseCommitActionExecutor.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2NvbW1pdC9BYnN0cmFjdEJhc2VDb21taXRBY3Rpb25FeGVjdXRvci5qYXZh) | `84.68% <84.68%> (ø)` | `0.00 <0.00> (?)` | |
   | [...action/commit/MergeOnReadCommitActionExecutor.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2NvbW1pdC9NZXJnZU9uUmVhZENvbW1pdEFjdGlvbkV4ZWN1dG9yLmphdmE=) | `88.88% <88.88%> (ø)` | `0.00 <0.00> (?)` | |
   | [...le/action/commit/MergeOnReadUpsertPartitioner.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2NvbW1pdC9NZXJnZU9uUmVhZFVwc2VydFBhcnRpdGlvbmVyLmphdmE=) | `92.15% <92.15%> (ø)` | `0.00 <0.00> (?)` | |
   | [...he/hudi/table/action/commit/UpsertPartitioner.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2NvbW1pdC9VcHNlcnRQYXJ0aXRpb25lci5qYXZh) | `94.96% <94.96%> (ø)` | `0.00 <0.00> (?)` | |
   | [...java/org/apache/hudi/client/HoodieWriteClient.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvY2xpZW50L0hvb2RpZVdyaXRlQ2xpZW50LmphdmE=) | `66.81% <100.00%> (-2.42%)` | `0.00 <0.00> (ø)` | |
   | [.../org/apache/hudi/table/HoodieCopyOnWriteTable.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvSG9vZGllQ29weU9uV3JpdGVUYWJsZS5qYXZh) | `31.41% <100.00%> (-57.87%)` | `0.00 <0.00> (ø)` | |
   | ... and [31 more](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree-more) | |
   
   ------
   
   [Continue to review full report at Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=continue).
   > **Legend** - [Click here to learn more](https://docs.codecov.io/docs/codecov-delta)
   > `Δ = absolute <relative> (impact)`, `ø = not affected`, `? = missing data`
   > Powered by [Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=footer). Last update [b5d093a...cc857d0](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=lastupdated). Read the [comment docs](https://docs.codecov.io/docs/pull-request-comments).
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] codecov-io edited a comment on issue #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
codecov-io edited a comment on issue #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#issuecomment-610761048
 
 
   # [Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=h1) Report
   > Merging [#1495](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=desc) into [master](https://codecov.io/gh/apache/incubator-hudi/commit/c0f96e072650d39433929c6efe3bc0b2cd882a39&el=desc) will **decrease** coverage by `0.07%`.
   > The diff coverage is `81.48%`.
   
   [![Impacted file tree graph](https://codecov.io/gh/apache/incubator-hudi/pull/1495/graphs/tree.svg?width=650&height=150&src=pr&token=VTTXabwbs2)](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=tree)
   
   ```diff
   @@             Coverage Diff              @@
   ##             master    #1495      +/-   ##
   ============================================
   - Coverage     72.25%   72.17%   -0.08%     
     Complexity      289      289              
   ============================================
     Files           338      365      +27     
     Lines         15946    16228     +282     
     Branches       1624     1632       +8     
   ============================================
   + Hits          11521    11712     +191     
   - Misses         3697     3783      +86     
   - Partials        728      733       +5     
   ```
   
   
   | [Impacted Files](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=tree) | Coverage Δ | Complexity Δ | |
   |---|---|---|---|
   | [...c/main/java/org/apache/hudi/table/HoodieTable.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvSG9vZGllVGFibGUuamF2YQ==) | `79.64% <ø> (ø)` | `0.00 <0.00> (ø)` | |
   | [...g/apache/hudi/table/action/BaseActionExecutor.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL0Jhc2VBY3Rpb25FeGVjdXRvci5qYXZh) | `100.00% <ø> (ø)` | `0.00 <0.00> (ø)` | |
   | [...ltacommit/BulkInsertDeltaCommitActionExecutor.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2RlbHRhY29tbWl0L0J1bGtJbnNlcnREZWx0YUNvbW1pdEFjdGlvbkV4ZWN1dG9yLmphdmE=) | `0.00% <0.00%> (ø)` | `0.00 <0.00> (?)` | |
   | [...it/BulkInsertPreppedDeltaCommitActionExecutor.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2RlbHRhY29tbWl0L0J1bGtJbnNlcnRQcmVwcGVkRGVsdGFDb21taXRBY3Rpb25FeGVjdXRvci5qYXZh) | `0.00% <0.00%> (ø)` | `0.00 <0.00> (?)` | |
   | [...commit/InsertPreppedDeltaCommitActionExecutor.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2RlbHRhY29tbWl0L0luc2VydFByZXBwZWREZWx0YUNvbW1pdEFjdGlvbkV4ZWN1dG9yLmphdmE=) | `0.00% <0.00%> (ø)` | `0.00 <0.00> (?)` | |
   | [...commit/UpsertPreppedDeltaCommitActionExecutor.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2RlbHRhY29tbWl0L1Vwc2VydFByZXBwZWREZWx0YUNvbW1pdEFjdGlvbkV4ZWN1dG9yLmphdmE=) | `0.00% <0.00%> (ø)` | `0.00 <0.00> (?)` | |
   | [.../org/apache/hudi/table/HoodieMergeOnReadTable.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvSG9vZGllTWVyZ2VPblJlYWRUYWJsZS5qYXZh) | `73.33% <22.22%> (-9.80%)` | `0.00 <0.00> (ø)` | |
   | [.../action/commit/BulkInsertCommitActionExecutor.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2NvbW1pdC9CdWxrSW5zZXJ0Q29tbWl0QWN0aW9uRXhlY3V0b3IuamF2YQ==) | `55.55% <55.55%> (ø)` | `0.00 <0.00> (?)` | |
   | [.../commit/BulkInsertPreppedCommitActionExecutor.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2NvbW1pdC9CdWxrSW5zZXJ0UHJlcHBlZENvbW1pdEFjdGlvbkV4ZWN1dG9yLmphdmE=) | `55.55% <55.55%> (ø)` | `0.00 <0.00> (?)` | |
   | [.../apache/hudi/table/action/commit/DeleteHelper.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2NvbW1pdC9EZWxldGVIZWxwZXIuamF2YQ==) | `64.28% <64.28%> (ø)` | `0.00 <0.00> (?)` | |
   | ... and [60 more](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree-more) | |
   
   ------
   
   [Continue to review full report at Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=continue).
   > **Legend** - [Click here to learn more](https://docs.codecov.io/docs/codecov-delta)
   > `Δ = absolute <relative> (impact)`, `ø = not affected`, `? = missing data`
   > Powered by [Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=footer). Last update [c0f96e0...8a3b612](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=lastupdated). Read the [comment docs](https://docs.codecov.io/docs/pull-request-comments).
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] bvaradar commented on issue #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
bvaradar commented on issue #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#issuecomment-612693953
 
 
   @vinothchandar : Addressed review comments. Please take a look when you get a chance.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] bvaradar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
bvaradar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#discussion_r407320535
 
 

 ##########
 File path: hudi-client/src/main/java/org/apache/hudi/table/action/commit/DedupeWriteHelper.java
 ##########
 @@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.table.action.commit;
+
+import org.apache.hudi.common.model.HoodieKey;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.exception.HoodieUpsertException;
+import org.apache.hudi.index.HoodieIndex;
+import org.apache.hudi.table.HoodieTable;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+
+import java.time.Duration;
+import java.time.Instant;
+import scala.Tuple2;
+
+public class DedupeWriteHelper<T extends HoodieRecordPayload<T>> {
 
 Review comment:
   Fixed.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] vinothchandar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
vinothchandar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#discussion_r407272044
 
 

 ##########
 File path: hudi-client/src/main/java/org/apache/hudi/table/action/commit/DedupeWriteHelper.java
 ##########
 @@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.table.action.commit;
+
+import org.apache.hudi.common.model.HoodieKey;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.exception.HoodieUpsertException;
+import org.apache.hudi.index.HoodieIndex;
+import org.apache.hudi.table.HoodieTable;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+
+import java.time.Duration;
+import java.time.Instant;
+import scala.Tuple2;
+
+public class DedupeWriteHelper<T extends HoodieRecordPayload<T>> {
 
 Review comment:
   This name is very misleading.. Just `WriteHelper` is even better.. this also does all the indexing per se.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] bvaradar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
bvaradar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#discussion_r407252820
 
 

 ##########
 File path: hudi-common/src/main/java/org/apache/hudi/common/model/WriteOperationType.java
 ##########
 @@ -69,4 +71,8 @@ public static WriteOperationType fromValue(String value) {
         throw new HoodieException("Invalid value of Type.");
     }
   }
+
+  public boolean isUpsert() {
 
 Review comment:
   Done

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] bvaradar merged pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
bvaradar merged pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495
 
 
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] vinothchandar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
vinothchandar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#discussion_r405956442
 
 

 ##########
 File path: hudi-common/src/main/java/org/apache/hudi/common/model/WriteOperationType.java
 ##########
 @@ -27,23 +27,25 @@
  */
 public enum WriteOperationType {
   // directly insert
-  INSERT("insert"),
-  INSERT_PREPPED("insert_prepped"),
+  INSERT("insert", false),
+  INSERT_PREPPED("insert_prepped", false),
   // update and insert
-  UPSERT("upsert"),
-  UPSERT_PREPPED("upsert_prepped"),
+  UPSERT("upsert", true),
+  UPSERT_PREPPED("upsert_prepped", true),
   // bulk insert
-  BULK_INSERT("bulk_insert"),
-  BULK_INSERT_PREPPED("bulk_insert_prepped"),
+  BULK_INSERT("bulk_insert", false),
+  BULK_INSERT_PREPPED("bulk_insert_prepped", false),
   // delete
-  DELETE("delete"),
+  DELETE("delete", true),
   // used for old version
-  UNKNOWN("unknown");
+  UNKNOWN("unknown", false);
 
   private final String value;
+  private final boolean isUpsert;
 
-  WriteOperationType(String value) {
+  WriteOperationType(String value, boolean isUpsert) {
 
 Review comment:
   is nt this really debt? can we get rid of this boolean and just replace with a helper method that takes in the operation type and returns true/false?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] vinothchandar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
vinothchandar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#discussion_r405953421
 
 

 ##########
 File path: hudi-client/src/main/java/org/apache/hudi/client/HoodieWriteClient.java
 ##########
 @@ -455,49 +454,15 @@ private void saveWorkloadProfileMetadataToInflight(WorkloadProfile profile, Hood
   }
 
   private JavaRDD<WriteStatus> upsertRecordsInternal(JavaRDD<HoodieRecord<T>> preppedRecords, String instantTime,
-      HoodieTable<T> hoodieTable, final boolean isUpsert) {
-
-    // Cache the tagged records, so we don't end up computing both
-    // TODO: Consistent contract in HoodieWriteClient regarding preppedRecord storage level handling
-    if (preppedRecords.getStorageLevel() == StorageLevel.NONE()) {
-      preppedRecords.persist(StorageLevel.MEMORY_AND_DISK_SER());
-    } else {
-      LOG.info("RDD PreppedRecords was persisted at: " + preppedRecords.getStorageLevel());
-    }
-
-    WorkloadProfile profile = null;
-    if (hoodieTable.isWorkloadProfileNeeded()) {
-      profile = new WorkloadProfile(preppedRecords);
-      LOG.info("Workload profile :" + profile);
-      saveWorkloadProfileMetadataToInflight(profile, hoodieTable, instantTime);
-    }
-
-    // partition using the insert partitioner
-    final Partitioner partitioner = getPartitioner(hoodieTable, isUpsert, profile);
-    JavaRDD<HoodieRecord<T>> partitionedRecords = partition(preppedRecords, partitioner);
-    JavaRDD<WriteStatus> writeStatusRDD = partitionedRecords.mapPartitionsWithIndex((partition, recordItr) -> {
-      if (isUpsert) {
-        return hoodieTable.handleUpsertPartition(instantTime, partition, recordItr, partitioner);
-      } else {
-        return hoodieTable.handleInsertPartition(instantTime, partition, recordItr, partitioner);
-      }
-    }, true).flatMap(List::iterator);
-
-    return updateIndexAndCommitIfNeeded(writeStatusRDD, hoodieTable, instantTime);
-  }
-
-  private Partitioner getPartitioner(HoodieTable table, boolean isUpsert, WorkloadProfile profile) {
-    if (isUpsert) {
-      return table.getUpsertPartitioner(profile, jsc);
-    } else {
-      return table.getInsertPartitioner(profile, jsc);
+      HoodieTable<T> hoodieTable) {
+    CommitActionResult result = hoodieTable.ingest(jsc, preppedRecords, instantTime, getOperationType());
 
 Review comment:
   IMO we should just mirror the same APIs upsert, insert on the table.. `ingest` is confusing, since it also implies that we are reading from some source, whihc we are not..  

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] vinothchandar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
vinothchandar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#discussion_r405954061
 
 

 ##########
 File path: hudi-client/src/main/java/org/apache/hudi/client/AbstractHoodieWriteClient.java
 ##########
 @@ -165,6 +165,19 @@ private boolean commit(String instantTime, JavaRDD<WriteStatus> writeStatuses,
       activeTimeline.saveAsComplete(new HoodieInstant(true, actionType, instantTime),
           Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
 
+      doPostCommitAndEmitCommitMetrics(instantTime, metadata, extraMetadata, actionType);
+
+      LOG.info("Committed " + instantTime);
+    } catch (IOException e) {
+      throw new HoodieCommitException("Failed to complete commit " + config.getBasePath() + " at time " + instantTime,
+          e);
+    }
+    return true;
+  }
+
+  void doPostCommitAndEmitCommitMetrics(String instantTime, HoodieCommitMetadata metadata,
 
 Review comment:
   break this into two methods?  one to do the post commit and one to emit?  

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] bvaradar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
bvaradar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#discussion_r407320563
 
 

 ##########
 File path: hudi-client/src/main/java/org/apache/hudi/table/action/commit/MergeOnReadDeleteExecutor.java
 ##########
 @@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.table.action.commit;
+
+import org.apache.hudi.common.model.HoodieKey;
+import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.common.model.WriteOperationType;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.table.HoodieTable;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+
+public class MergeOnReadDeleteExecutor<T extends HoodieRecordPayload<T>>
 
 Review comment:
   Agree. 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] vinothchandar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
vinothchandar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#discussion_r407272746
 
 

 ##########
 File path: hudi-client/src/main/java/org/apache/hudi/table/action/commit/MergeOnReadDeleteExecutor.java
 ##########
 @@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.table.action.commit;
+
+import org.apache.hudi.common.model.HoodieKey;
+import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.common.model.WriteOperationType;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.table.HoodieTable;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+
+public class MergeOnReadDeleteExecutor<T extends HoodieRecordPayload<T>>
 
 Review comment:
   consistent naming of classes ending with `ActionExecutor` ? In fact, I wonder if it should be `DeleteDeltaCommitActionExecutor` .. (same for other classes)... That clearly shows that this is a Delta commit in the scope of deletes.. 
   
    Similarly `DeleteCommitActionExecutor` and so on for cow classes.. They are now shorter and IMO more precise. 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] vinothchandar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
vinothchandar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#discussion_r405957239
 
 

 ##########
 File path: hudi-client/src/main/java/org/apache/hudi/table/action/commit/CommitActionResult.java
 ##########
 @@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.table.action.commit;
+
+import java.util.List;
+import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.common.model.HoodieCommitMetadata;
+import org.apache.hudi.common.model.HoodieWriteStat;
+import org.apache.hudi.common.util.Option;
+
+import org.apache.spark.api.java.JavaRDD;
+
+import java.time.Duration;
+
+/**
+ * Contains metadata, write-statuses and latency times corresponding to a commit/delta-commit action.
+ */
+public class CommitActionResult {
 
 Review comment:
   Ideally we are able to return `HoodieCommitMetadata`... but since we can't, may be still rename this to `HoodieWriteMetadata` to stay consistent with the other objects we are returning now. (RollbackMetadata, RestoreMetadata etc)? 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] vinothchandar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
vinothchandar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#discussion_r405956603
 
 

 ##########
 File path: hudi-common/src/main/java/org/apache/hudi/common/model/WriteOperationType.java
 ##########
 @@ -69,4 +71,8 @@ public static WriteOperationType fromValue(String value) {
         throw new HoodieException("Invalid value of Type.");
     }
   }
+
+  public boolean isUpsert() {
 
 Review comment:
   Like this method, can be just a static method that returns true/false

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] vinothchandar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
vinothchandar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#discussion_r407272307
 
 

 ##########
 File path: hudi-client/src/main/java/org/apache/hudi/table/action/commit/MergeOnReadBulkInsertExecutor.java
 ##########
 @@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.table.action.commit;
 
 Review comment:
   anything that will produce a `deltacommit` needs to be in a `deltacommit` package instead?  We should avoid overloading commit loosely at this level.. ? wdyt?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] vinothchandar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
vinothchandar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#discussion_r405955610
 
 

 ##########
 File path: hudi-client/src/main/java/org/apache/hudi/table/action/commit/CommitActionResult.java
 ##########
 @@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.table.action.commit;
+
+import java.util.List;
+import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.common.model.HoodieCommitMetadata;
+import org.apache.hudi.common.model.HoodieWriteStat;
+import org.apache.hudi.common.util.Option;
+
+import org.apache.spark.api.java.JavaRDD;
+
+import java.time.Duration;
+
+/**
+ * Contains metadata, write-statuses and latency times corresponding to a commit/delta-commit action.
+ */
+public class CommitActionResult {
+
+  private JavaRDD<WriteStatus> writeStatuses;
+  private Duration indexUpdateDuration;
 
 Review comment:
   if tagging the index is outside this action, then updating should be moved out as well?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] vinothchandar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
vinothchandar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#discussion_r407273467
 
 

 ##########
 File path: hudi-common/src/main/java/org/apache/hudi/common/model/WriteOperationType.java
 ##########
 @@ -69,4 +69,8 @@ public static WriteOperationType fromValue(String value) {
         throw new HoodieException("Invalid value of Type.");
     }
   }
+
+  public static boolean isUpsert(WriteOperationType operationType) {
 
 Review comment:
   rename `isChangingRecords()`  or `isChangeOperation()` ? 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] vinothchandar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
vinothchandar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#discussion_r405954917
 
 

 ##########
 File path: hudi-client/src/main/java/org/apache/hudi/table/action/commit/AbstractBaseCommitActionExecutor.java
 ##########
 @@ -0,0 +1,290 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.table.action.commit;
+
+import org.apache.hudi.client.SparkTaskContextSupplier;
+import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.client.utils.SparkConfigUtils;
+import org.apache.hudi.common.model.HoodieCommitMetadata;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.common.model.HoodieWriteStat;
+import org.apache.hudi.common.model.WriteOperationType;
+import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.table.timeline.HoodieInstant.State;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.exception.HoodieCommitException;
+import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hudi.exception.HoodieUpsertException;
+import org.apache.hudi.table.HoodieTable;
+import org.apache.hudi.table.WorkloadProfile;
+import org.apache.hudi.table.WorkloadStat;
+import org.apache.hudi.table.action.BaseActionExecutor;
+
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.spark.Partitioner;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.storage.StorageLevel;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.time.Duration;
+import java.time.Instant;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import scala.Tuple2;
+
+public abstract class AbstractBaseCommitActionExecutor<T extends HoodieRecordPayload<T>>
 
 Review comment:
   rename to just `BaseCommitActionExecutor`?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] bvaradar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
bvaradar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#discussion_r407252838
 
 

 ##########
 File path: hudi-client/src/main/java/org/apache/hudi/client/HoodieWriteClient.java
 ##########
 @@ -455,49 +454,15 @@ private void saveWorkloadProfileMetadataToInflight(WorkloadProfile profile, Hood
   }
 
   private JavaRDD<WriteStatus> upsertRecordsInternal(JavaRDD<HoodieRecord<T>> preppedRecords, String instantTime,
-      HoodieTable<T> hoodieTable, final boolean isUpsert) {
-
-    // Cache the tagged records, so we don't end up computing both
-    // TODO: Consistent contract in HoodieWriteClient regarding preppedRecord storage level handling
-    if (preppedRecords.getStorageLevel() == StorageLevel.NONE()) {
-      preppedRecords.persist(StorageLevel.MEMORY_AND_DISK_SER());
-    } else {
-      LOG.info("RDD PreppedRecords was persisted at: " + preppedRecords.getStorageLevel());
-    }
-
-    WorkloadProfile profile = null;
-    if (hoodieTable.isWorkloadProfileNeeded()) {
-      profile = new WorkloadProfile(preppedRecords);
-      LOG.info("Workload profile :" + profile);
-      saveWorkloadProfileMetadataToInflight(profile, hoodieTable, instantTime);
-    }
-
-    // partition using the insert partitioner
-    final Partitioner partitioner = getPartitioner(hoodieTable, isUpsert, profile);
-    JavaRDD<HoodieRecord<T>> partitionedRecords = partition(preppedRecords, partitioner);
-    JavaRDD<WriteStatus> writeStatusRDD = partitionedRecords.mapPartitionsWithIndex((partition, recordItr) -> {
-      if (isUpsert) {
-        return hoodieTable.handleUpsertPartition(instantTime, partition, recordItr, partitioner);
-      } else {
-        return hoodieTable.handleInsertPartition(instantTime, partition, recordItr, partitioner);
-      }
-    }, true).flatMap(List::iterator);
-
-    return updateIndexAndCommitIfNeeded(writeStatusRDD, hoodieTable, instantTime);
-  }
-
-  private Partitioner getPartitioner(HoodieTable table, boolean isUpsert, WorkloadProfile profile) {
-    if (isUpsert) {
-      return table.getUpsertPartitioner(profile, jsc);
-    } else {
-      return table.getInsertPartitioner(profile, jsc);
+      HoodieTable<T> hoodieTable) {
+    CommitActionResult result = hoodieTable.ingest(jsc, preppedRecords, instantTime, getOperationType());
 
 Review comment:
   Yes, Supporting externally triggered commit is the reason why we have CommitActionResult.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] bvaradar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
bvaradar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#discussion_r407320544
 
 

 ##########
 File path: hudi-client/src/main/java/org/apache/hudi/table/action/commit/MergeOnReadBulkInsertExecutor.java
 ##########
 @@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.table.action.commit;
 
 Review comment:
   Makes sense. Done.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] codecov-io edited a comment on issue #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
codecov-io edited a comment on issue #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#issuecomment-610761048
 
 
   # [Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=h1) Report
   > Merging [#1495](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=desc) into [master](https://codecov.io/gh/apache/incubator-hudi/commit/c0f96e072650d39433929c6efe3bc0b2cd882a39&el=desc) will **decrease** coverage by `0.02%`.
   > The diff coverage is `81.48%`.
   
   [![Impacted file tree graph](https://codecov.io/gh/apache/incubator-hudi/pull/1495/graphs/tree.svg?width=650&height=150&src=pr&token=VTTXabwbs2)](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=tree)
   
   ```diff
   @@             Coverage Diff              @@
   ##             master    #1495      +/-   ##
   ============================================
   - Coverage     72.25%   72.22%   -0.03%     
     Complexity      289      289              
   ============================================
     Files           338      365      +27     
     Lines         15946    16225     +279     
     Branches       1624     1629       +5     
   ============================================
   + Hits          11521    11719     +198     
   - Misses         3697     3774      +77     
   - Partials        728      732       +4     
   ```
   
   
   | [Impacted Files](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=tree) | Coverage Δ | Complexity Δ | |
   |---|---|---|---|
   | [...c/main/java/org/apache/hudi/table/HoodieTable.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvSG9vZGllVGFibGUuamF2YQ==) | `79.64% <ø> (ø)` | `0.00 <0.00> (ø)` | |
   | [...g/apache/hudi/table/action/BaseActionExecutor.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL0Jhc2VBY3Rpb25FeGVjdXRvci5qYXZh) | `100.00% <ø> (ø)` | `0.00 <0.00> (ø)` | |
   | [...e/action/commit/MergeOnReadBulkInsertExecutor.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2NvbW1pdC9NZXJnZU9uUmVhZEJ1bGtJbnNlcnRFeGVjdXRvci5qYXZh) | `0.00% <0.00%> (ø)` | `0.00 <0.00> (?)` | |
   | [...n/commit/MergeOnReadBulkInsertPreppedExecutor.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2NvbW1pdC9NZXJnZU9uUmVhZEJ1bGtJbnNlcnRQcmVwcGVkRXhlY3V0b3IuamF2YQ==) | `0.00% <0.00%> (ø)` | `0.00 <0.00> (?)` | |
   | [...ction/commit/MergeOnReadInsertPreppedExecutor.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2NvbW1pdC9NZXJnZU9uUmVhZEluc2VydFByZXBwZWRFeGVjdXRvci5qYXZh) | `0.00% <0.00%> (ø)` | `0.00 <0.00> (?)` | |
   | [...ction/commit/MergeOnReadUpsertPreppedExecutor.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2NvbW1pdC9NZXJnZU9uUmVhZFVwc2VydFByZXBwZWRFeGVjdXRvci5qYXZh) | `0.00% <0.00%> (ø)` | `0.00 <0.00> (?)` | |
   | [.../org/apache/hudi/table/HoodieMergeOnReadTable.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvSG9vZGllTWVyZ2VPblJlYWRUYWJsZS5qYXZh) | `73.33% <22.22%> (-9.80%)` | `0.00 <0.00> (ø)` | |
   | [...e/action/commit/CopyOnWriteBulkInsertExecutor.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2NvbW1pdC9Db3B5T25Xcml0ZUJ1bGtJbnNlcnRFeGVjdXRvci5qYXZh) | `55.55% <55.55%> (ø)` | `0.00 <0.00> (?)` | |
   | [...n/commit/CopyOnWriteBulkInsertPreppedExecutor.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2NvbW1pdC9Db3B5T25Xcml0ZUJ1bGtJbnNlcnRQcmVwcGVkRXhlY3V0b3IuamF2YQ==) | `55.55% <55.55%> (ø)` | `0.00 <0.00> (?)` | |
   | [.../apache/hudi/table/action/commit/DeleteHelper.java](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree#diff-aHVkaS1jbGllbnQvc3JjL21haW4vamF2YS9vcmcvYXBhY2hlL2h1ZGkvdGFibGUvYWN0aW9uL2NvbW1pdC9EZWxldGVIZWxwZXIuamF2YQ==) | `64.28% <64.28%> (ø)` | `0.00 <0.00> (?)` | |
   | ... and [57 more](https://codecov.io/gh/apache/incubator-hudi/pull/1495/diff?src=pr&el=tree-more) | |
   
   ------
   
   [Continue to review full report at Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=continue).
   > **Legend** - [Click here to learn more](https://docs.codecov.io/docs/codecov-delta)
   > `Δ = absolute <relative> (impact)`, `ø = not affected`, `? = missing data`
   > Powered by [Codecov](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=footer). Last update [c0f96e0...ce8cc69](https://codecov.io/gh/apache/incubator-hudi/pull/1495?src=pr&el=lastupdated). Read the [comment docs](https://docs.codecov.io/docs/pull-request-comments).
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] bvaradar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
bvaradar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#discussion_r407320551
 
 

 ##########
 File path: hudi-client/src/main/java/org/apache/hudi/table/action/commit/MergeOnReadCommitActionExecutor.java
 ##########
 @@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.table.action.commit;
+
+import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.common.model.WriteOperationType;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.exception.HoodieUpsertException;
+import org.apache.hudi.execution.MergeOnReadLazyInsertIterable;
+import org.apache.hudi.io.HoodieAppendHandle;
+import org.apache.hudi.table.HoodieTable;
+import org.apache.hudi.table.WorkloadProfile;
+
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.spark.Partitioner;
+import org.apache.spark.api.java.JavaSparkContext;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+public abstract class MergeOnReadCommitActionExecutor<T extends HoodieRecordPayload<T>>
 
 Review comment:
   Removed MergeOnRead. Changed to DeltaCommitActionExecutor

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] bvaradar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
bvaradar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#discussion_r407267696
 
 

 ##########
 File path: hudi-client/src/main/java/org/apache/hudi/table/action/commit/CommitActionResult.java
 ##########
 @@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.table.action.commit;
+
+import java.util.List;
+import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.common.model.HoodieCommitMetadata;
+import org.apache.hudi.common.model.HoodieWriteStat;
+import org.apache.hudi.common.util.Option;
+
+import org.apache.spark.api.java.JavaRDD;
+
+import java.time.Duration;
+
+/**
+ * Contains metadata, write-statuses and latency times corresponding to a commit/delta-commit action.
+ */
+public class CommitActionResult {
 
 Review comment:
   Done

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] bvaradar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
bvaradar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#discussion_r407252843
 
 

 ##########
 File path: hudi-client/src/main/java/org/apache/hudi/client/AbstractHoodieWriteClient.java
 ##########
 @@ -165,6 +165,19 @@ private boolean commit(String instantTime, JavaRDD<WriteStatus> writeStatuses,
       activeTimeline.saveAsComplete(new HoodieInstant(true, actionType, instantTime),
           Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
 
+      doPostCommitAndEmitCommitMetrics(instantTime, metadata, extraMetadata, actionType);
+
+      LOG.info("Committed " + instantTime);
+    } catch (IOException e) {
+      throw new HoodieCommitException("Failed to complete commit " + config.getBasePath() + " at time " + instantTime,
+          e);
+    }
+    return true;
+  }
+
+  void doPostCommitAndEmitCommitMetrics(String instantTime, HoodieCommitMetadata metadata,
 
 Review comment:
   Done

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-hudi] bvaradar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package

Posted by GitBox <gi...@apache.org>.
bvaradar commented on a change in pull request #1495: [HUDI-770] Organize upsert/insert API implementation under a single package
URL: https://github.com/apache/incubator-hudi/pull/1495#discussion_r407252846
 
 

 ##########
 File path: hudi-client/src/main/java/org/apache/hudi/client/HoodieWriteClient.java
 ##########
 @@ -455,49 +454,15 @@ private void saveWorkloadProfileMetadataToInflight(WorkloadProfile profile, Hood
   }
 
   private JavaRDD<WriteStatus> upsertRecordsInternal(JavaRDD<HoodieRecord<T>> preppedRecords, String instantTime,
-      HoodieTable<T> hoodieTable, final boolean isUpsert) {
-
-    // Cache the tagged records, so we don't end up computing both
-    // TODO: Consistent contract in HoodieWriteClient regarding preppedRecord storage level handling
-    if (preppedRecords.getStorageLevel() == StorageLevel.NONE()) {
-      preppedRecords.persist(StorageLevel.MEMORY_AND_DISK_SER());
-    } else {
-      LOG.info("RDD PreppedRecords was persisted at: " + preppedRecords.getStorageLevel());
-    }
-
-    WorkloadProfile profile = null;
-    if (hoodieTable.isWorkloadProfileNeeded()) {
-      profile = new WorkloadProfile(preppedRecords);
-      LOG.info("Workload profile :" + profile);
-      saveWorkloadProfileMetadataToInflight(profile, hoodieTable, instantTime);
-    }
-
-    // partition using the insert partitioner
-    final Partitioner partitioner = getPartitioner(hoodieTable, isUpsert, profile);
-    JavaRDD<HoodieRecord<T>> partitionedRecords = partition(preppedRecords, partitioner);
-    JavaRDD<WriteStatus> writeStatusRDD = partitionedRecords.mapPartitionsWithIndex((partition, recordItr) -> {
-      if (isUpsert) {
-        return hoodieTable.handleUpsertPartition(instantTime, partition, recordItr, partitioner);
-      } else {
-        return hoodieTable.handleInsertPartition(instantTime, partition, recordItr, partitioner);
-      }
-    }, true).flatMap(List::iterator);
-
-    return updateIndexAndCommitIfNeeded(writeStatusRDD, hoodieTable, instantTime);
-  }
-
-  private Partitioner getPartitioner(HoodieTable table, boolean isUpsert, WorkloadProfile profile) {
-    if (isUpsert) {
-      return table.getUpsertPartitioner(profile, jsc);
-    } else {
-      return table.getInsertPartitioner(profile, jsc);
+      HoodieTable<T> hoodieTable) {
+    CommitActionResult result = hoodieTable.ingest(jsc, preppedRecords, instantTime, getOperationType());
 
 Review comment:
   Done

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services