You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ja...@apache.org on 2022/06/02 18:22:40 UTC
[pinot] branch master updated: Change DateTimeFormatSpec delimiter (#8779)
This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 7a311428bc Change DateTimeFormatSpec delimiter (#8779)
7a311428bc is described below
commit 7a311428bcba238514e8206a6c17a6b7bed8db14
Author: Satyam Raj <sa...@gmail.com>
AuthorDate: Thu Jun 2 23:52:34 2022 +0530
Change DateTimeFormatSpec delimiter (#8779)
Support for new DateTime format:
- EPOCH|<timeUnit>(|<size>)
- SIMPLE_DATE_FORMAT|<timeFormat>(|<timezone>)
- TIMESTAMP
---
.../pinot/common/data/DateTimeFormatSpecTest.java | 126 +++++++++++++++++++++
.../pinot/spi/data/DateTimeFormatPatternSpec.java | 14 +++
.../apache/pinot/spi/data/DateTimeFormatSpec.java | 102 ++++++++++++++---
3 files changed, 227 insertions(+), 15 deletions(-)
diff --git a/pinot-common/src/test/java/org/apache/pinot/common/data/DateTimeFormatSpecTest.java b/pinot-common/src/test/java/org/apache/pinot/common/data/DateTimeFormatSpecTest.java
index 22c359bcfa..5f33b0227a 100644
--- a/pinot-common/src/test/java/org/apache/pinot/common/data/DateTimeFormatSpecTest.java
+++ b/pinot-common/src/test/java/org/apache/pinot/common/data/DateTimeFormatSpecTest.java
@@ -75,6 +75,36 @@ public class DateTimeFormatSpecTest {
});
entries.add(new Object[]{"1:HOURS:SIMPLE_DATE_FORMAT:yyyyMMdd HH Z", "20170701 00 -07:00", 1498892400000L});
entries.add(new Object[]{"1:HOURS:SIMPLE_DATE_FORMAT:M/d/yyyy h:mm:ss a", "8/7/2017 12:45:50 AM", 1502066750000L});
+ entries.add(new Object[]{"EPOCH|HOURS|1", "416359", 1498892400000L});
+ entries.add(new Object[]{"EPOCH|HOURS", "416359", 1498892400000L});
+ entries.add(new Object[]{"EPOCH|MILLISECONDS|1", "1498892400000", 1498892400000L});
+ entries.add(new Object[]{"EPOCH|MILLISECONDS", "1498892400000", 1498892400000L});
+ entries.add(new Object[]{"EPOCH|HOURS|1", "0", 0L});
+ entries.add(new Object[]{"EPOCH|HOURS", "0", 0L});
+ entries.add(new Object[]{"EPOCH|MINUTES|5", "4996308", 1498892400000L});
+ entries.add(new Object[]{
+ "TIMESTAMP", "2017-07-01 00:00:00", Timestamp.valueOf("2017-07-01 00:00:00").getTime()
+ });
+ entries.add(new Object[]{"TIMESTAMP", "1498892400000", 1498892400000L});
+ entries.add(new Object[]{
+ "SIMPLE_DATE_FORMAT|yyyyMMdd", "20170701",
+ DateTimeFormat.forPattern("yyyyMMdd").withZoneUTC().parseMillis("20170701")
+ });
+ entries.add(new Object[]{
+ "SIMPLE_DATE_FORMAT|yyyyMMdd|America/Chicago", "20170701", DateTimeFormat.forPattern("yyyyMMdd")
+ .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("America/Chicago"))).parseMillis("20170701")
+ });
+ entries.add(new Object[]{
+ "SIMPLE_DATE_FORMAT|yyyyMMdd HH", "20170701 00",
+ DateTimeFormat.forPattern("yyyyMMdd HH").withZoneUTC().parseMillis("20170701 00")
+ });
+ entries.add(new Object[]{
+ "SIMPLE_DATE_FORMAT|yyyyMMdd HH|GMT+0600", "20170701 00", DateTimeFormat.forPattern("yyyyMMdd HH")
+ .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("GMT+0600"))).parseMillis("20170701 00")
+ });
+ entries.add(new Object[]{"SIMPLE_DATE_FORMAT|yyyyMMdd HH Z", "20170701 00 -07:00", 1498892400000L});
+ entries.add(new Object[]{"SIMPLE_DATE_FORMAT|M/d/yyyy h:mm:ss a", "8/7/2017 12:45:50 AM", 1502066750000L});
+
return entries.toArray(new Object[entries.size()][]);
}
@@ -129,6 +159,47 @@ public class DateTimeFormatSpecTest {
"1:HOURS:SIMPLE_DATE_FORMAT:M/d/yyyy h a", 1502066750000L,
DateTimeFormat.forPattern("M/d/yyyy h a").withZoneUTC().withLocale(Locale.ENGLISH).print(1502066750000L)
});
+ entries.add(new Object[]{"EPOCH|HOURS|1", 1498892400000L, "416359"});
+ entries.add(new Object[]{"EPOCH|MILLISECONDS|1", 1498892400000L, "1498892400000"});
+ entries.add(new Object[]{"EPOCH|HOURS|1", 0L, "0"});
+ entries.add(new Object[]{"EPOCH|MINUTES|5", 1498892400000L, "4996308"});
+ entries.add(new Object[]{
+ "TIMESTAMP", Timestamp.valueOf("2017-07-01 00:00:00").getTime(), "2017-07-01 00:00:00.0"
+ });
+ entries.add(new Object[]{
+ "SIMPLE_DATE_FORMAT|yyyyMMdd", 1498892400000L,
+ DateTimeFormat.forPattern("yyyyMMdd").withZoneUTC().print(1498892400000L)
+ });
+ entries.add(new Object[]{
+ "SIMPLE_DATE_FORMAT|yyyyMMdd|America/New_York", 1498892400000L, DateTimeFormat.forPattern("yyyyMMdd")
+ .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("America/New_York"))).print(1498892400000L)
+ });
+ entries.add(new Object[]{
+ "SIMPLE_DATE_FORMAT|yyyyMMdd HH", 1498892400000L,
+ DateTimeFormat.forPattern("yyyyMMdd HH").withZoneUTC().print(1498892400000L)
+ });
+ entries.add(new Object[]{
+ "SIMPLE_DATE_FORMAT|yyyyMMdd HH|IST", 1498892400000L,
+ DateTimeFormat.forPattern("yyyyMMdd HH").withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("IST"))).print(
+ 1498892400000L)
+ });
+ entries.add(new Object[]{
+ "SIMPLE_DATE_FORMAT|yyyyMMdd HH Z", 1498892400000L,
+ DateTimeFormat.forPattern("yyyyMMdd HH Z").withZoneUTC().print(1498892400000L)
+ });
+ entries.add(new Object[]{
+ "SIMPLE_DATE_FORMAT|yyyyMMdd HH Z|GMT+0500", 1498892400000L,
+ DateTimeFormat.forPattern("yyyyMMdd HH Z")
+ .withZone(DateTimeZone.forTimeZone(TimeZone.getTimeZone("GMT+0500"))).print(1498892400000L)
+ });
+ entries.add(new Object[]{
+ "SIMPLE_DATE_FORMAT|M/d/yyyy h:mm:ss a", 1498892400000L,
+ DateTimeFormat.forPattern("M/d/yyyy h:mm:ss a").withZoneUTC().withLocale(Locale.ENGLISH).print(1498892400000L)
+ });
+ entries.add(new Object[]{
+ "SIMPLE_DATE_FORMAT|M/d/yyyy h a", 1502066750000L,
+ DateTimeFormat.forPattern("M/d/yyyy h a").withZoneUTC().withLocale(Locale.ENGLISH).print(1502066750000L)
+ });
return entries.toArray(new Object[entries.size()][]);
}
@@ -219,6 +290,61 @@ public class DateTimeFormatSpecTest {
DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "M/d/yyyy h:mm:ss a",
DateTimeZone.forTimeZone(TimeZone.getTimeZone("Asia/Tokyo"))
});
+
+ //test new format
+ entries.add(
+ new Object[]{"TIMESTAMP", 1, TimeUnit.MILLISECONDS, TimeFormat.TIMESTAMP, null,
+ DateTimeZone.UTC});
+
+ entries.add(
+ new Object[]{"EPOCH|HOURS|1", 1, TimeUnit.HOURS, DateTimeFieldSpec.TimeFormat.EPOCH, null, DateTimeZone.UTC});
+
+ entries.add(new Object[]{
+ "EPOCH|MINUTES|5", 5, TimeUnit.MINUTES, DateTimeFieldSpec.TimeFormat.EPOCH, null, DateTimeZone.UTC
+ });
+
+ entries.add(new Object[]{
+ "SIMPLE_DATE_FORMAT|yyyyMMdd", 1, TimeUnit.DAYS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT,
+ "yyyyMMdd", DateTimeZone.UTC
+ });
+
+ entries.add(new Object[]{
+ "SIMPLE_DATE_FORMAT|yyyyMMdd|IST", 1, TimeUnit.DAYS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT,
+ "yyyyMMdd", DateTimeZone.forTimeZone(TimeZone.getTimeZone("IST"))
+ });
+
+ entries.add(new Object[]{
+ "SIMPLE_DATE_FORMAT|yyyyMMdd|IST", 1, TimeUnit.DAYS,
+ DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd",
+ DateTimeZone.forTimeZone(TimeZone.getTimeZone("IST"))
+ });
+
+ entries.add(new Object[]{
+ "SIMPLE_DATE_FORMAT|yyyyMMdd|IST", 1, TimeUnit.DAYS,
+ DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd",
+ DateTimeZone.forTimeZone(TimeZone.getTimeZone("IST"))
+ });
+
+ entries.add(new Object[]{
+ "SIMPLE_DATE_FORMAT|yyyyMMdd HH", 1, TimeUnit.DAYS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT,
+ "yyyyMMdd HH", DateTimeZone.UTC
+ });
+
+ entries.add(new Object[]{
+ "SIMPLE_DATE_FORMAT|yyyyMMdd HH|dummy", 1, TimeUnit.DAYS,
+ DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd HH", DateTimeZone.UTC
+ });
+
+ entries.add(new Object[]{
+ "SIMPLE_DATE_FORMAT|M/d/yyyy h:mm:ss a", 1, TimeUnit.DAYS,
+ DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "M/d/yyyy h:mm:ss a", DateTimeZone.UTC
+ });
+
+ entries.add(new Object[]{
+ "SIMPLE_DATE_FORMAT|M/d/yyyy h:mm:ss a|Asia/Tokyo", 1, TimeUnit.DAYS,
+ DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "M/d/yyyy h:mm:ss a",
+ DateTimeZone.forTimeZone(TimeZone.getTimeZone("Asia/Tokyo"))
+ });
return entries.toArray(new Object[entries.size()][]);
}
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatPatternSpec.java b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatPatternSpec.java
index 889202490e..f237270e2a 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatPatternSpec.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatPatternSpec.java
@@ -65,6 +65,20 @@ public class DateTimeFormatPatternSpec {
}
}
+ public DateTimeFormatPatternSpec(DateTimeFieldSpec.TimeFormat timeFormat, @Nullable String sdfPattern,
+ @Nullable String timeZone) {
+ _timeFormat = timeFormat;
+ if (_timeFormat.equals(DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT)) {
+ if (timeZone != null) {
+ _dateTimeZone = DateTimeZone.forTimeZone(TimeZone.getTimeZone(timeZone));
+ }
+ _dateTimeFormatter = DateTimeFormat.forPattern(sdfPattern).
+ withZone(_dateTimeZone).
+ withLocale(DEFAULT_LOCALE);
+ _sdfPattern = sdfPattern;
+ }
+ }
+
public DateTimeFieldSpec.TimeFormat getTimeFormat() {
return _timeFormat;
}
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java
index c8083f9b1e..0a1de16214 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java
@@ -37,6 +37,7 @@ public class DateTimeFormatSpec {
public static final String NUMBER_REGEX = "[1-9][0-9]*";
public static final String COLON_SEPARATOR = ":";
+ public static final String PIPE_SEPARATOR = "|";
/* DateTimeFieldSpec format is of format size:timeUnit:timeformat:pattern tz(timezone)
* tz(timezone) is optional. If not specified, UTC timezone is used */
@@ -47,6 +48,18 @@ public class DateTimeFormatSpec {
public static final int MIN_FORMAT_TOKENS = 3;
public static final int MAX_FORMAT_TOKENS = 4;
+ public static final int FORMAT_TIMEFORMAT_POSITION_PIPE = 0;
+ public static final int MIN_FORMAT_TOKENS_PIPE = 1;
+ public static final int MAX_FORMAT_TOKENS_PIPE = 3;
+
+ // Applicable for SIMPLE_DATE_FORMAT|<timeFormat>(|<timezone>)
+ public static final int SDF_PATTERN_POSITION = 1;
+ public static final int SDF_TIMEZONE_POSITION = 2;
+
+ // Applicable for EPOCH|<timeUnit>(|<size>)
+ public static final int EPOCH_UNIT_POSITION = 1;
+ public static final int EPOCH_SIZE_POSITION = 2;
+
private final String _format;
private final int _size;
private final DateTimeFormatUnitSpec _unitSpec;
@@ -54,21 +67,51 @@ public class DateTimeFormatSpec {
public DateTimeFormatSpec(String format) {
_format = format;
- validateFormat(format);
- String[] formatTokens = StringUtils.split(format, COLON_SEPARATOR, MAX_FORMAT_TOKENS);
- if (formatTokens.length == MAX_FORMAT_TOKENS) {
- _patternSpec = new DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION],
- formatTokens[FORMAT_PATTERN_POSITION]);
+ if (Character.isDigit(_format.charAt(0))) {
+ String[] formatTokens = validateFormat(format);
+ if (formatTokens.length == MAX_FORMAT_TOKENS) {
+ _patternSpec = new DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION],
+ formatTokens[FORMAT_PATTERN_POSITION]);
+ } else {
+ _patternSpec = new DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION]);
+ }
+ if (_patternSpec.getTimeFormat() == TimeFormat.TIMESTAMP) {
+ // TIMESTAMP type stores millis since epoch
+ _size = 1;
+ _unitSpec = new DateTimeFormatUnitSpec("MILLISECONDS");
+ } else {
+ _size = Integer.parseInt(formatTokens[FORMAT_SIZE_POSITION]);
+ _unitSpec = new DateTimeFormatUnitSpec(formatTokens[FORMAT_UNIT_POSITION]);
+ }
} else {
- _patternSpec = new DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION]);
- }
- if (_patternSpec.getTimeFormat() == TimeFormat.TIMESTAMP) {
- // TIMESTAMP type stores millis since epoch
- _size = 1;
- _unitSpec = new DateTimeFormatUnitSpec("MILLISECONDS");
- } else {
- _size = Integer.parseInt(formatTokens[FORMAT_SIZE_POSITION]);
- _unitSpec = new DateTimeFormatUnitSpec(formatTokens[FORMAT_UNIT_POSITION]);
+ String[] formatTokens = validatePipeFormat(format);
+ if (formatTokens[FORMAT_TIMEFORMAT_POSITION_PIPE].equals(TimeFormat.EPOCH.toString())) {
+ _patternSpec = new DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION_PIPE]);
+ _unitSpec = new DateTimeFormatUnitSpec(formatTokens[EPOCH_UNIT_POSITION]);
+ if (formatTokens.length == MAX_FORMAT_TOKENS_PIPE) {
+ _size = Integer.parseInt(formatTokens[EPOCH_SIZE_POSITION]);
+ } else {
+ _size = 1;
+ }
+ } else if (formatTokens[FORMAT_TIMEFORMAT_POSITION_PIPE].equals(TimeFormat.SIMPLE_DATE_FORMAT.toString())) {
+ if (formatTokens.length == MAX_FORMAT_TOKENS_PIPE) {
+ _patternSpec = new DateTimeFormatPatternSpec(TimeFormat.valueOf(
+ formatTokens[FORMAT_TIMEFORMAT_POSITION_PIPE]),
+ formatTokens[SDF_PATTERN_POSITION],
+ formatTokens[SDF_TIMEZONE_POSITION]);
+ } else {
+ _patternSpec = new DateTimeFormatPatternSpec(TimeFormat.valueOf(
+ formatTokens[FORMAT_TIMEFORMAT_POSITION_PIPE]),
+ formatTokens[SDF_PATTERN_POSITION], null);
+ }
+ _unitSpec = new DateTimeFormatUnitSpec(TimeUnit.DAYS.toString());
+ _size = 1;
+ } else {
+ // Applicable for TIMESTAMP format
+ _patternSpec = new DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION_PIPE]);
+ _unitSpec = new DateTimeFormatUnitSpec(TimeUnit.MILLISECONDS.toString());
+ _size = 1;
+ }
}
}
@@ -177,7 +220,7 @@ public class DateTimeFormatSpec {
/**
* Validates the format string in the dateTimeFieldSpec
*/
- public static void validateFormat(String format) {
+ public static String[] validateFormat(String format) {
Preconditions.checkNotNull(format, "Format string in dateTimeFieldSpec must not be null");
String[] formatTokens = StringUtils.split(format, COLON_SEPARATOR, MAX_FORMAT_TOKENS);
Preconditions.checkState(formatTokens.length >= MIN_FORMAT_TOKENS && formatTokens.length <= MAX_FORMAT_TOKENS,
@@ -200,6 +243,35 @@ public class DateTimeFormatSpec {
formatTokens[FORMAT_TIMEFORMAT_POSITION], format);
DateTimeFormatPatternSpec.validateFormat(formatTokens[FORMAT_PATTERN_POSITION]);
}
+ return formatTokens;
+ }
+
+ /**
+ * Validates the pipe format string in the dateTimeFieldSpec
+ */
+ public static String[] validatePipeFormat(String format) {
+ Preconditions.checkNotNull(format, "Format string in dateTimeFieldSpec must not be null");
+ String[] formatTokens = StringUtils.split(format, PIPE_SEPARATOR, MAX_FORMAT_TOKENS_PIPE);
+ Preconditions.checkState(formatTokens.length >= MIN_FORMAT_TOKENS_PIPE
+ && formatTokens.length <= MAX_FORMAT_TOKENS_PIPE,
+ "Incorrect format: %s. Must be of the format 'EPOCH|<timeUnit>(|<size>)'"
+ + " or 'SDF|<timeFormat>(|<timezone>)' or 'TIMESTAMP'");
+ if (formatTokens.length == MIN_FORMAT_TOKENS_PIPE) {
+ Preconditions.checkState(formatTokens[FORMAT_TIMEFORMAT_POSITION_PIPE].equals(TimeFormat.TIMESTAMP.toString()),
+ "Incorrect format type: %s. Must be of TIMESTAMP", formatTokens[FORMAT_TIMEFORMAT_POSITION_PIPE]);
+ } else {
+ Preconditions.checkState(formatTokens[FORMAT_SIZE_POSITION].equals(TimeFormat.EPOCH.toString())
+ || formatTokens[FORMAT_SIZE_POSITION].equals(TimeFormat.SIMPLE_DATE_FORMAT.toString()),
+ "Incorrect format %s. Must be of 'EPOCH|<timeUnit>(|<size>)' or" + "'SDF|<timeFormat>(|<timezone>)'");
+
+ if (formatTokens.length == MAX_FORMAT_TOKENS_PIPE
+ && formatTokens[FORMAT_SIZE_POSITION].equals(TimeFormat.EPOCH.toString())) {
+ Preconditions.checkState(formatTokens[EPOCH_SIZE_POSITION].matches(NUMBER_REGEX),
+ "Incorrect format size: %s in format: %s. Must be of format 'EPOCH|<timeUnit>|[0-9]+'",
+ formatTokens[EPOCH_SIZE_POSITION], format);
+ }
+ }
+ return formatTokens;
}
@Override
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org