You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ja...@apache.org on 2022/06/02 18:22:40 UTC

[pinot] branch master updated: Change DateTimeFormatSpec delimiter (#8779)

This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 7a311428bc Change DateTimeFormatSpec delimiter (#8779)
7a311428bc is described below

commit 7a311428bcba238514e8206a6c17a6b7bed8db14
Author: Satyam Raj <sa...@gmail.com>
AuthorDate: Thu Jun 2 23:52:34 2022 +0530

    Change DateTimeFormatSpec delimiter (#8779)
    
    Support for new DateTime format:
    - EPOCH|<timeUnit>(|<size>)
    - SIMPLE_DATE_FORMAT|<timeFormat>(|<timezone>)
    - TIMESTAMP
---
 .../pinot/common/data/DateTimeFormatSpecTest.java  | 126 +++++++++++++++++++++
 .../pinot/spi/data/DateTimeFormatPatternSpec.java  |  14 +++
 .../apache/pinot/spi/data/DateTimeFormatSpec.java  | 102 ++++++++++++++---
 3 files changed, 227 insertions(+), 15 deletions(-)

diff --git a/pinot-common/src/test/java/org/apache/pinot/common/data/DateTimeFormatSpecTest.java b/pinot-common/src/test/java/org/apache/pinot/common/data/DateTimeFormatSpecTest.java
index 22c359bcfa..5f33b0227a 100644
--- a/pinot-common/src/test/java/org/apache/pinot/common/data/DateTimeFormatSpecTest.java
+++ b/pinot-common/src/test/java/org/apache/pinot/common/data/DateTimeFormatSpecTest.java
@@ -75,6 +75,36 @@ public class DateTimeFormatSpecTest {
     });
     entries.add(new Object[]{"1:HOURS:SIMPLE_DATE_FORMAT:yyyyMMdd HH Z", "20170701 00 -07:00", 1498892400000L});
     entries.add(new Object[]{"1:HOURS:SIMPLE_DATE_FORMAT:M/d/yyyy h:mm:ss a", "8/7/2017 12:45:50 AM", 1502066750000L});
+    entries.add(new Object[]{"EPOCH|HOURS|1", "416359", 1498892400000L});
+    entries.add(new Object[]{"EPOCH|HOURS", "416359", 1498892400000L});
+    entries.add(new Object[]{"EPOCH|MILLISECONDS|1", "1498892400000", 1498892400000L});
+    entries.add(new Object[]{"EPOCH|MILLISECONDS", "1498892400000", 1498892400000L});
+    entries.add(new Object[]{"EPOCH|HOURS|1", "0", 0L});
+    entries.add(new Object[]{"EPOCH|HOURS", "0", 0L});
+    entries.add(new Object[]{"EPOCH|MINUTES|5", "4996308", 1498892400000L});
+    entries.add(new Object[]{
+        "TIMESTAMP", "2017-07-01 00:00:00", Timestamp.valueOf("2017-07-01 00:00:00").getTime()
+    });
+    entries.add(new Object[]{"TIMESTAMP", "1498892400000", 1498892400000L});
+    entries.add(new Object[]{
+        "SIMPLE_DATE_FORMAT|yyyyMMdd", "20170701",
+        DateTimeFormat.forPattern("yyyyMMdd").withZoneUTC().parseMillis("20170701")
+    });
+    entries.add(new Object[]{
+        "SIMPLE_DATE_FORMAT|yyyyMMdd|America/Chicago", "20170701", DateTimeFormat.forPattern("yyyyMMdd")
+        .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("America/Chicago"))).parseMillis("20170701")
+    });
+    entries.add(new Object[]{
+        "SIMPLE_DATE_FORMAT|yyyyMMdd HH", "20170701 00",
+        DateTimeFormat.forPattern("yyyyMMdd HH").withZoneUTC().parseMillis("20170701 00")
+    });
+    entries.add(new Object[]{
+        "SIMPLE_DATE_FORMAT|yyyyMMdd HH|GMT+0600", "20170701 00", DateTimeFormat.forPattern("yyyyMMdd HH")
+        .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("GMT+0600"))).parseMillis("20170701 00")
+    });
+    entries.add(new Object[]{"SIMPLE_DATE_FORMAT|yyyyMMdd HH Z", "20170701 00 -07:00", 1498892400000L});
+    entries.add(new Object[]{"SIMPLE_DATE_FORMAT|M/d/yyyy h:mm:ss a", "8/7/2017 12:45:50 AM", 1502066750000L});
+
     return entries.toArray(new Object[entries.size()][]);
   }
 
@@ -129,6 +159,47 @@ public class DateTimeFormatSpecTest {
         "1:HOURS:SIMPLE_DATE_FORMAT:M/d/yyyy h a", 1502066750000L,
         DateTimeFormat.forPattern("M/d/yyyy h a").withZoneUTC().withLocale(Locale.ENGLISH).print(1502066750000L)
     });
+    entries.add(new Object[]{"EPOCH|HOURS|1", 1498892400000L, "416359"});
+    entries.add(new Object[]{"EPOCH|MILLISECONDS|1", 1498892400000L, "1498892400000"});
+    entries.add(new Object[]{"EPOCH|HOURS|1", 0L, "0"});
+    entries.add(new Object[]{"EPOCH|MINUTES|5", 1498892400000L, "4996308"});
+    entries.add(new Object[]{
+        "TIMESTAMP", Timestamp.valueOf("2017-07-01 00:00:00").getTime(), "2017-07-01 00:00:00.0"
+    });
+    entries.add(new Object[]{
+        "SIMPLE_DATE_FORMAT|yyyyMMdd", 1498892400000L,
+        DateTimeFormat.forPattern("yyyyMMdd").withZoneUTC().print(1498892400000L)
+    });
+    entries.add(new Object[]{
+        "SIMPLE_DATE_FORMAT|yyyyMMdd|America/New_York", 1498892400000L, DateTimeFormat.forPattern("yyyyMMdd")
+        .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("America/New_York"))).print(1498892400000L)
+    });
+    entries.add(new Object[]{
+        "SIMPLE_DATE_FORMAT|yyyyMMdd HH", 1498892400000L,
+        DateTimeFormat.forPattern("yyyyMMdd HH").withZoneUTC().print(1498892400000L)
+    });
+    entries.add(new Object[]{
+        "SIMPLE_DATE_FORMAT|yyyyMMdd HH|IST", 1498892400000L,
+        DateTimeFormat.forPattern("yyyyMMdd HH").withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("IST"))).print(
+            1498892400000L)
+    });
+    entries.add(new Object[]{
+        "SIMPLE_DATE_FORMAT|yyyyMMdd HH Z", 1498892400000L,
+        DateTimeFormat.forPattern("yyyyMMdd HH Z").withZoneUTC().print(1498892400000L)
+    });
+    entries.add(new Object[]{
+        "SIMPLE_DATE_FORMAT|yyyyMMdd HH Z|GMT+0500", 1498892400000L,
+        DateTimeFormat.forPattern("yyyyMMdd HH Z")
+            .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("GMT+0500"))).print(1498892400000L)
+    });
+    entries.add(new Object[]{
+        "SIMPLE_DATE_FORMAT|M/d/yyyy h:mm:ss a", 1498892400000L,
+        DateTimeFormat.forPattern("M/d/yyyy h:mm:ss a").withZoneUTC().withLocale(Locale.ENGLISH).print(1498892400000L)
+    });
+    entries.add(new Object[]{
+        "SIMPLE_DATE_FORMAT|M/d/yyyy h a", 1502066750000L,
+        DateTimeFormat.forPattern("M/d/yyyy h a").withZoneUTC().withLocale(Locale.ENGLISH).print(1502066750000L)
+    });
     return entries.toArray(new Object[entries.size()][]);
   }
 
@@ -219,6 +290,61 @@ public class DateTimeFormatSpecTest {
         DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "M/d/yyyy h:mm:ss a",
         DateTimeZone.forTimeZone(TimeZone.getTimeZone("Asia/Tokyo"))
     });
+
+    //test new format
+    entries.add(
+        new Object[]{"TIMESTAMP", 1, TimeUnit.MILLISECONDS, TimeFormat.TIMESTAMP, null,
+            DateTimeZone.UTC});
+
+    entries.add(
+        new Object[]{"EPOCH|HOURS|1", 1, TimeUnit.HOURS, DateTimeFieldSpec.TimeFormat.EPOCH, null, DateTimeZone.UTC});
+
+    entries.add(new Object[]{
+        "EPOCH|MINUTES|5", 5, TimeUnit.MINUTES, DateTimeFieldSpec.TimeFormat.EPOCH, null, DateTimeZone.UTC
+    });
+
+    entries.add(new Object[]{
+        "SIMPLE_DATE_FORMAT|yyyyMMdd", 1, TimeUnit.DAYS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT,
+        "yyyyMMdd", DateTimeZone.UTC
+    });
+
+    entries.add(new Object[]{
+        "SIMPLE_DATE_FORMAT|yyyyMMdd|IST", 1, TimeUnit.DAYS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT,
+        "yyyyMMdd", DateTimeZone.forTimeZone(TimeZone.getTimeZone("IST"))
+    });
+
+    entries.add(new Object[]{
+        "SIMPLE_DATE_FORMAT|yyyyMMdd|IST", 1, TimeUnit.DAYS,
+        DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd",
+        DateTimeZone.forTimeZone(TimeZone.getTimeZone("IST"))
+    });
+
+    entries.add(new Object[]{
+        "SIMPLE_DATE_FORMAT|yyyyMMdd|IST", 1, TimeUnit.DAYS,
+        DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd",
+        DateTimeZone.forTimeZone(TimeZone.getTimeZone("IST"))
+    });
+
+    entries.add(new Object[]{
+        "SIMPLE_DATE_FORMAT|yyyyMMdd HH", 1, TimeUnit.DAYS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT,
+        "yyyyMMdd HH", DateTimeZone.UTC
+    });
+
+    entries.add(new Object[]{
+        "SIMPLE_DATE_FORMAT|yyyyMMdd HH|dummy", 1, TimeUnit.DAYS,
+        DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd HH", DateTimeZone.UTC
+    });
+
+    entries.add(new Object[]{
+        "SIMPLE_DATE_FORMAT|M/d/yyyy h:mm:ss a", 1, TimeUnit.DAYS,
+        DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "M/d/yyyy h:mm:ss a", DateTimeZone.UTC
+    });
+
+    entries.add(new Object[]{
+        "SIMPLE_DATE_FORMAT|M/d/yyyy h:mm:ss a|Asia/Tokyo", 1, TimeUnit.DAYS,
+        DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "M/d/yyyy h:mm:ss a",
+        DateTimeZone.forTimeZone(TimeZone.getTimeZone("Asia/Tokyo"))
+    });
     return entries.toArray(new Object[entries.size()][]);
   }
 
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatPatternSpec.java b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatPatternSpec.java
index 889202490e..f237270e2a 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatPatternSpec.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatPatternSpec.java
@@ -65,6 +65,20 @@ public class DateTimeFormatPatternSpec {
     }
   }
 
+  public DateTimeFormatPatternSpec(DateTimeFieldSpec.TimeFormat timeFormat, @Nullable String sdfPattern,
+      @Nullable String timeZone) {
+    _timeFormat = timeFormat;
+    if (_timeFormat.equals(DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT)) {
+      if (timeZone != null) {
+        _dateTimeZone = DateTimeZone.forTimeZone(TimeZone.getTimeZone(timeZone));
+      }
+      _dateTimeFormatter = DateTimeFormat.forPattern(sdfPattern).
+          withZone(_dateTimeZone).
+          withLocale(DEFAULT_LOCALE);
+      _sdfPattern = sdfPattern;
+    }
+  }
+
   public DateTimeFieldSpec.TimeFormat getTimeFormat() {
     return _timeFormat;
   }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java
index c8083f9b1e..0a1de16214 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java
@@ -37,6 +37,7 @@ public class DateTimeFormatSpec {
 
   public static final String NUMBER_REGEX = "[1-9][0-9]*";
   public static final String COLON_SEPARATOR = ":";
+  public static final String PIPE_SEPARATOR = "|";
 
   /* DateTimeFieldSpec format is of format size:timeUnit:timeformat:pattern tz(timezone)
    * tz(timezone) is optional. If not specified, UTC timezone is used */
@@ -47,6 +48,18 @@ public class DateTimeFormatSpec {
   public static final int MIN_FORMAT_TOKENS = 3;
   public static final int MAX_FORMAT_TOKENS = 4;
 
+  public static final int FORMAT_TIMEFORMAT_POSITION_PIPE = 0;
+  public static final int MIN_FORMAT_TOKENS_PIPE = 1;
+  public static final int MAX_FORMAT_TOKENS_PIPE = 3;
+
+  // Applicable for SIMPLE_DATE_FORMAT|<timeFormat>(|<timezone>)
+  public static final int SDF_PATTERN_POSITION = 1;
+  public static final int SDF_TIMEZONE_POSITION = 2;
+
+  // Applicable for EPOCH|<timeUnit>(|<size>)
+  public static final int EPOCH_UNIT_POSITION = 1;
+  public static final int EPOCH_SIZE_POSITION = 2;
+
   private final String _format;
   private final int _size;
   private final DateTimeFormatUnitSpec _unitSpec;
@@ -54,21 +67,51 @@ public class DateTimeFormatSpec {
 
   public DateTimeFormatSpec(String format) {
     _format = format;
-    validateFormat(format);
-    String[] formatTokens = StringUtils.split(format, COLON_SEPARATOR, MAX_FORMAT_TOKENS);
-    if (formatTokens.length == MAX_FORMAT_TOKENS) {
-      _patternSpec = new DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION],
-          formatTokens[FORMAT_PATTERN_POSITION]);
+    if (Character.isDigit(_format.charAt(0))) {
+      String[] formatTokens = validateFormat(format);
+      if (formatTokens.length == MAX_FORMAT_TOKENS) {
+        _patternSpec = new DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION],
+            formatTokens[FORMAT_PATTERN_POSITION]);
+      } else {
+        _patternSpec = new DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION]);
+      }
+      if (_patternSpec.getTimeFormat() == TimeFormat.TIMESTAMP) {
+        // TIMESTAMP type stores millis since epoch
+        _size = 1;
+        _unitSpec = new DateTimeFormatUnitSpec("MILLISECONDS");
+      } else {
+        _size = Integer.parseInt(formatTokens[FORMAT_SIZE_POSITION]);
+        _unitSpec = new DateTimeFormatUnitSpec(formatTokens[FORMAT_UNIT_POSITION]);
+      }
     } else {
-      _patternSpec = new DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION]);
-    }
-    if (_patternSpec.getTimeFormat() == TimeFormat.TIMESTAMP) {
-      // TIMESTAMP type stores millis since epoch
-      _size = 1;
-      _unitSpec = new DateTimeFormatUnitSpec("MILLISECONDS");
-    } else {
-      _size = Integer.parseInt(formatTokens[FORMAT_SIZE_POSITION]);
-      _unitSpec = new DateTimeFormatUnitSpec(formatTokens[FORMAT_UNIT_POSITION]);
+      String[] formatTokens = validatePipeFormat(format);
+      if (formatTokens[FORMAT_TIMEFORMAT_POSITION_PIPE].equals(TimeFormat.EPOCH.toString())) {
+        _patternSpec = new DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION_PIPE]);
+        _unitSpec = new DateTimeFormatUnitSpec(formatTokens[EPOCH_UNIT_POSITION]);
+        if (formatTokens.length == MAX_FORMAT_TOKENS_PIPE) {
+          _size = Integer.parseInt(formatTokens[EPOCH_SIZE_POSITION]);
+        } else {
+          _size = 1;
+        }
+      } else if (formatTokens[FORMAT_TIMEFORMAT_POSITION_PIPE].equals(TimeFormat.SIMPLE_DATE_FORMAT.toString())) {
+        if (formatTokens.length == MAX_FORMAT_TOKENS_PIPE) {
+          _patternSpec = new DateTimeFormatPatternSpec(TimeFormat.valueOf(
+              formatTokens[FORMAT_TIMEFORMAT_POSITION_PIPE]),
+              formatTokens[SDF_PATTERN_POSITION],
+              formatTokens[SDF_TIMEZONE_POSITION]);
+        } else {
+          _patternSpec = new DateTimeFormatPatternSpec(TimeFormat.valueOf(
+              formatTokens[FORMAT_TIMEFORMAT_POSITION_PIPE]),
+              formatTokens[SDF_PATTERN_POSITION], null);
+        }
+        _unitSpec = new DateTimeFormatUnitSpec(TimeUnit.DAYS.toString());
+        _size = 1;
+      } else {
+        // Applicable for TIMESTAMP format
+        _patternSpec = new DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION_PIPE]);
+        _unitSpec = new DateTimeFormatUnitSpec(TimeUnit.MILLISECONDS.toString());
+        _size = 1;
+      }
     }
   }
 
@@ -177,7 +220,7 @@ public class DateTimeFormatSpec {
   /**
    * Validates the format string in the dateTimeFieldSpec
    */
-  public static void validateFormat(String format) {
+  public static String[] validateFormat(String format) {
     Preconditions.checkNotNull(format, "Format string in dateTimeFieldSpec must not be null");
     String[] formatTokens = StringUtils.split(format, COLON_SEPARATOR, MAX_FORMAT_TOKENS);
     Preconditions.checkState(formatTokens.length >= MIN_FORMAT_TOKENS && formatTokens.length <= MAX_FORMAT_TOKENS,
@@ -200,6 +243,35 @@ public class DateTimeFormatSpec {
               formatTokens[FORMAT_TIMEFORMAT_POSITION], format);
       DateTimeFormatPatternSpec.validateFormat(formatTokens[FORMAT_PATTERN_POSITION]);
     }
+    return formatTokens;
+  }
+
+  /**
+   * Validates the pipe format string in the dateTimeFieldSpec
+   */
+  public static String[] validatePipeFormat(String format) {
+    Preconditions.checkNotNull(format, "Format string in dateTimeFieldSpec must not be null");
+    String[] formatTokens = StringUtils.split(format, PIPE_SEPARATOR, MAX_FORMAT_TOKENS_PIPE);
+    Preconditions.checkState(formatTokens.length >= MIN_FORMAT_TOKENS_PIPE
+            && formatTokens.length <= MAX_FORMAT_TOKENS_PIPE,
+        "Incorrect format: %s. Must be of the format 'EPOCH|<timeUnit>(|<size>)'"
+            + " or 'SDF|<timeFormat>(|<timezone>)' or 'TIMESTAMP'");
+    if (formatTokens.length == MIN_FORMAT_TOKENS_PIPE) {
+      Preconditions.checkState(formatTokens[FORMAT_TIMEFORMAT_POSITION_PIPE].equals(TimeFormat.TIMESTAMP.toString()),
+          "Incorrect format type: %s. Must be of TIMESTAMP", formatTokens[FORMAT_TIMEFORMAT_POSITION_PIPE]);
+    } else {
+      Preconditions.checkState(formatTokens[FORMAT_SIZE_POSITION].equals(TimeFormat.EPOCH.toString())
+              || formatTokens[FORMAT_SIZE_POSITION].equals(TimeFormat.SIMPLE_DATE_FORMAT.toString()),
+          "Incorrect format %s. Must be of 'EPOCH|<timeUnit>(|<size>)' or" + "'SDF|<timeFormat>(|<timezone>)'");
+
+      if (formatTokens.length == MAX_FORMAT_TOKENS_PIPE
+          && formatTokens[FORMAT_SIZE_POSITION].equals(TimeFormat.EPOCH.toString())) {
+          Preconditions.checkState(formatTokens[EPOCH_SIZE_POSITION].matches(NUMBER_REGEX),
+              "Incorrect format size: %s in format: %s. Must be of format 'EPOCH|<timeUnit>|[0-9]+'",
+              formatTokens[EPOCH_SIZE_POSITION], format);
+      }
+    }
+    return formatTokens;
   }
 
   @Override


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org