You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@gobblin.apache.org by GitBox <gi...@apache.org> on 2022/07/20 01:43:51 UTC

[GitHub] [gobblin] ZihanLi58 commented on a diff in pull request #3528: [GOBBLIN-1669] Clean up TimeAwareRecursiveCopyableDataset to support seconds in time…

ZihanLi58 commented on code in PR #3528:
URL: https://github.com/apache/gobblin/pull/3528#discussion_r925039376


##########
gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/TimeAwareRecursiveCopyableDataset.java:
##########
@@ -128,55 +124,83 @@ public void remove() {
     }
   }
 
-  private boolean isDatePatternHourly(String datePattern) {
-    DateTimeFormatter formatter = DateTimeFormat.forPattern(datePattern);
-    LocalDateTime refDateTime = new LocalDateTime(2017, 01, 01, 10, 0, 0);
-    String refDateTimeString = refDateTime.toString(formatter);
-    LocalDateTime refDateTimeAtStartOfDay = refDateTime.withHourOfDay(0);
-    String refDateTimeStringAtStartOfDay = refDateTimeAtStartOfDay.toString(formatter);
-    return !refDateTimeString.equals(refDateTimeStringAtStartOfDay);
-  }
-
-  private boolean isDatePatternMinutely(String datePattern) {
+  private DatePattern validateLookbackWithDatePatternFormat(String datePattern, String lookbackTime) {
     DateTimeFormatter formatter = DateTimeFormat.forPattern(datePattern);
-    LocalDateTime refDateTime = new LocalDateTime(2017, 01, 01, 10, 59, 0);
+    LocalDateTime refDateTime = new LocalDateTime(2017, 01, 31, 10, 59, 59);
     String refDateTimeString = refDateTime.toString(formatter);
-    LocalDateTime refDateTimeAtStartOfHour = refDateTime.withMinuteOfHour(0);
-    String refDateTimeStringAtStartOfHour = refDateTimeAtStartOfHour.toString(formatter);
-    return !refDateTimeString.equals(refDateTimeStringAtStartOfHour);
-  }
+    PeriodFormatterBuilder formatterBuilder;
 
-  private boolean isLookbackTimeStringDaily(String lookbackTime) {
-    PeriodFormatter periodFormatter = new PeriodFormatterBuilder().appendDays().appendSuffix("d").toFormatter();
-    try {
-      periodFormatter.parsePeriod(lookbackTime);
-      return true;
-    } catch (Exception e) {
-      return false;
+    // Validate that the lookback is supported for the time format
+    if (!refDateTime.withSecondOfMinute(0).toString(formatter).equals(refDateTimeString)) {
+      formatterBuilder = new PeriodFormatterBuilder().appendDays().appendSuffix("d").appendHours().appendSuffix("h").appendMinutes().appendSuffix("m").appendSeconds().appendSuffix("s");
+      if (!lookbackTimeMatchesFormat(formatterBuilder, lookbackTime)) {
+        throw new IllegalArgumentException(String.format("Expected lookback time to be in daily or hourly or minutely or secondly format, check %s", LOOKBACK_TIME_KEY));
+      }
+      return DatePattern.SECONDLY;
+    } else if (!refDateTime.withMinuteOfHour(0).toString(formatter).equals(refDateTimeString)) {
+      formatterBuilder = new PeriodFormatterBuilder().appendDays().appendSuffix("d").appendHours().appendSuffix("h").appendMinutes().appendSuffix("m");
+      if (!lookbackTimeMatchesFormat(formatterBuilder, lookbackTime)) {
+        throw new IllegalArgumentException(String.format("Expected lookback time to be in daily or hourly or minutely format, check %s", LOOKBACK_TIME_KEY));
+      }
+      return DatePattern.MINUTELY;
+    } else if (!refDateTime.withHourOfDay(0).toString(formatter).equals(refDateTimeString)) {
+      formatterBuilder = new PeriodFormatterBuilder().appendDays().appendSuffix("d").appendHours().appendSuffix("h");
+      if (!lookbackTimeMatchesFormat(formatterBuilder, lookbackTime)) {
+        throw new IllegalArgumentException(String.format("Expected lookback time to be in daily or hourly format, check %s", LOOKBACK_TIME_KEY));
+      }
+      return DatePattern.HOURLY;
+    } else if (!refDateTime.withDayOfMonth(1).toString(formatter).equals(refDateTimeString) ) {
+      formatterBuilder = new PeriodFormatterBuilder().appendDays().appendSuffix("d");
+      if (!lookbackTimeMatchesFormat(formatterBuilder, lookbackTime)) {
+        throw new IllegalArgumentException(String.format("Expected lookback time to be in daily format, check %s", LOOKBACK_TIME_KEY));
+      }
+      return DatePattern.DAILY;
     }
+    return null;
   }
 
-  private boolean isLookbackTimeStringHourly(String lookbackTime) {
-    PeriodFormatter periodFormatter = new PeriodFormatterBuilder().appendDays().appendSuffix("d").appendHours().appendSuffix("h").toFormatter();
+  private boolean lookbackTimeMatchesFormat(PeriodFormatterBuilder formatterBuilder, String lookbackTime) {
     try {
-      periodFormatter.parsePeriod(lookbackTime);
-      return true;
-    } catch (Exception e) {
+      formatterBuilder.toFormatter().parsePeriod(lookbackTime);
+    } catch (IllegalArgumentException e) {
       return false;
     }
+    return true;
   }
 
   @Override
   protected List<FileStatus> getFilesAtPath(FileSystem fs, Path path, PathFilter fileFilter) throws IOException {
-    DateTimeFormatter formatter = DateTimeFormat.forPattern(datePattern);
+    DateTimeFormatter formatter = DateTimeFormat.forPattern(this.datePattern);
     LocalDateTime endDate = currentTime;
     LocalDateTime startDate = endDate.minus(this.lookbackPeriod);
-
-    DateRangeIterator dateRangeIterator = new DateRangeIterator(startDate, endDate, pattern);
     List<FileStatus> fileStatuses = Lists.newArrayList();
-    while (dateRangeIterator.hasNext()) {
-      Path pathWithDateTime = new Path(path, dateRangeIterator.next().toString(formatter));
-      fileStatuses.addAll(super.getFilesAtPath(fs, pathWithDateTime, fileFilter));
+
+    // Data inside of nested folders representing timestamps need to be fetched differently
+    if (datePattern.contains(FileSystems.getDefault().getSeparator())) {
+      // Use an iterator that traverses through all times from lookback to current time, based on format
+      DateRangeIterator dateRangeIterator = new DateRangeIterator(startDate, endDate, this.patternQualifier);
+      while (dateRangeIterator.hasNext()) {
+        Path pathWithDateTime = new Path(path, dateRangeIterator.next().toString(formatter));
+        if (!fs.exists(pathWithDateTime)) {
+          continue;
+        }
+        fileStatuses.addAll(super.getFilesAtPath(fs, pathWithDateTime, fileFilter));
+      }
+    } else {
+      // Look at the top level directories and compare if those fit into the date format
+      Iterator<FileStatus> folderIterator = Arrays.asList(fs.listStatus(path)).iterator();
+      while (folderIterator.hasNext()) {
+        Path folderPath = folderIterator.next().getPath();
+        String datePath = folderPath.getName().split("/")[folderPath.getName().split("/").length -1];

Review Comment:
   What's this used for? isn't Path.getName() directly return you the name of current folder?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscribe@gobblin.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org