You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by GitBox <gi...@apache.org> on 2022/07/11 06:37:11 UTC

[GitHub] [hudi] boneanxs commented on a diff in pull request #5722: [HUDI-4170] Make user can use hoodie.datasource.read.paths to read necessary files

boneanxs commented on code in PR #5722:
URL: https://github.com/apache/hudi/pull/5722#discussion_r917585253


##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/MergeOnReadSnapshotRelation.scala:
##########
@@ -104,24 +104,8 @@ class MergeOnReadSnapshotRelation(sqlContext: SQLContext,
       val fileSlices = fileIndex.listFileSlices(convertedPartitionFilters)
       buildSplits(fileSlices.values.flatten.toSeq)
     } else {
-      val inMemoryFileIndex = HoodieInMemoryFileIndex.create(sparkSession, globPaths)
-      val partitionDirs = inMemoryFileIndex.listFiles(partitionFilters, dataFilters)
-
-      val fsView = new HoodieTableFileSystemView(metaClient, timeline, partitionDirs.flatMap(_.files).toArray)
-      val partitionPaths = fsView.getPartitionPaths.asScala
-
-      if (partitionPaths.isEmpty || latestInstant.isEmpty) {
-        // If this an empty table OR it has no completed commits yet, return
-        List.empty[HoodieMergeOnReadFileSplit]
-      } else {
-        val queryTimestamp = this.queryTimestamp.get
-
-        val fileSlices = partitionPaths.flatMap { partitionPath =>
-          val relativePath = getRelativePartitionPath(new Path(basePath), partitionPath)
-          fsView.getLatestMergedFileSlicesBeforeOrOn(relativePath, queryTimestamp).iterator().asScala.toSeq
-        }
-        buildSplits(fileSlices)
-      }
+      val fileSlices = listLatestFileSlices(globPaths, partitionFilters, dataFilters)

Review Comment:
   Hi @alexeykudinkin Could you plz review it these days, as it blocks the pr: https://github.com/apache/hudi/pull/6046, thanks!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@hudi.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org