You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by GitBox <gi...@apache.org> on 2022/03/24 16:03:09 UTC

[GitHub] [hudi] alexeykudinkin commented on a change in pull request #5117: [HUDI-3689] Fix glob path and hive sync in deltastreamer tests

alexeykudinkin commented on a change in pull request #5117:
URL: https://github.com/apache/hudi/pull/5117#discussion_r834475296



##########
File path: hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamer.java
##########
@@ -1168,54 +1170,54 @@ public void testBulkInsertsAndUpsertsWithSQLBasedTransformerFor2StepPipeline() t
     String tableBasePath = dfsBasePath + "/test_table2";
     String downstreamTableBasePath = dfsBasePath + "/test_downstream_table2";
 
-    HiveSyncConfig hiveSyncConfig = getHiveSyncConfig(tableBasePath, "hive_trips");
-
     // Initial bulk insert to ingest to first hudi table
     HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.BULK_INSERT,
         Collections.singletonList(SqlQueryBasedTransformer.class.getName()), PROPS_FILENAME_TEST_SOURCE, true);
+    // NOTE: We should not have need to set below config, 'datestr' should have assumed date partitioning
+    cfg.configs.add("hoodie.datasource.hive_sync.partition_fields=year,month,day");
     new HoodieDeltaStreamer(cfg, jsc, dfs, hiveServer.getHiveConf()).sync();
-    TestHelpers.assertRecordCount(1000, tableBasePath + "/*/*.parquet", sqlContext);
-    TestHelpers.assertDistanceCount(1000, tableBasePath + "/*/*.parquet", sqlContext);
-    TestHelpers.assertDistanceCountWithExactValue(1000, tableBasePath + "/*/*.parquet", sqlContext);
+    TestHelpers.assertRecordCount(1000, tableBasePath, sqlContext);
+    TestHelpers.assertDistanceCount(1000, tableBasePath, sqlContext);
+    TestHelpers.assertDistanceCountWithExactValue(1000, tableBasePath, sqlContext);
     String lastInstantForUpstreamTable = TestHelpers.assertCommitMetadata("00000", tableBasePath, dfs, 1);
 
     // Now incrementally pull from the above hudi table and ingest to second table
     HoodieDeltaStreamer.Config downstreamCfg =
         TestHelpers.makeConfigForHudiIncrSrc(tableBasePath, downstreamTableBasePath, WriteOperationType.BULK_INSERT,
             true, null);
     new HoodieDeltaStreamer(downstreamCfg, jsc, dfs, hiveServer.getHiveConf()).sync();
-    TestHelpers.assertRecordCount(1000, downstreamTableBasePath + "/*/*.parquet", sqlContext);
-    TestHelpers.assertDistanceCount(1000, downstreamTableBasePath + "/*/*.parquet", sqlContext);
-    TestHelpers.assertDistanceCountWithExactValue(1000, downstreamTableBasePath + "/*/*.parquet", sqlContext);
+    TestHelpers.assertRecordCount(1000, downstreamTableBasePath, sqlContext);

Review comment:
       @nsivabalan let's create a ticket to follow-up on globbing issue

##########
File path: hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamer.java
##########
@@ -1460,7 +1464,7 @@ private void testORCDFSSource(boolean useSchemaProvider, List<String> transforme
     orcProps.setProperty("include", "base.properties");
     orcProps.setProperty("hoodie.embed.timeline.server", "false");
     orcProps.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
-    orcProps.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
+    orcProps.setProperty("hoodie.datasource.write.partitionpath.field", "partition_path");

Review comment:
       @codope @nsivabalan aren't we changing the intent of the test here?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@hudi.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org