You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ja...@apache.org on 2022/06/30 13:21:08 UTC
[pinot] branch master updated: Refector DateTime field specs to reduce overhead (#8960)
This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 9735291f02 Refector DateTime field specs to reduce overhead (#8960)
9735291f02 is described below
commit 9735291f025cb470c28baa085030ceaa02b761e3
Author: Xiaotian (Jackie) Jiang <17...@users.noreply.github.com>
AuthorDate: Thu Jun 30 06:21:01 2022 -0700
Refector DateTime field specs to reduce overhead (#8960)
- Integrate the validation logic into the constructor to avoid the overhead of extra validation and regex match
- Remove the unnecessary format reconstruction
- Cache `DateTimeFormatSpec` and `DateTimeGranularitySpec` within `DateTimeFieldSpec` to avoid parsing the spec string multiple times
- Use the new introduced pattern format in #8632
- This can improve the performance of the `dateTimeConvert` function
---
.../routing/segmentpruner/TimeSegmentPruner.java | 2 +-
.../routing/timeboundary/TimeBoundaryManager.java | 4 +-
.../broker/broker/HelixBrokerStarterTest.java | 7 +-
.../routing/segmentpruner/SegmentPrunerTest.java | 3 +-
.../pinot/common/data/DateTimeFormatSpecTest.java | 87 ++----
.../apache/pinot/common/data/FieldSpecTest.java | 2 +-
pinot-common/src/test/resources/schemaTest.schema | 2 +-
.../java/org/apache/pinot/compat/StreamOp.java | 3 +-
.../controller/util/AutoAddInvertedIndex.java | 3 +-
.../realtime/HLRealtimeSegmentDataManager.java | 3 +-
.../processing/timehandler/EpochTimeHandler.java | 2 +-
.../apache/pinot/core/util/SchemaUtilsTest.java | 77 +++--
.../batch/common/SegmentGenerationTaskRunner.java | 2 +-
.../hadoop/job/mappers/SegmentCreationMapper.java | 2 +-
.../job/mappers/SegmentPreprocessingMapper.java | 37 ++-
.../job/preprocess/DataPreprocessingHelper.java | 2 +-
.../spark/jobs/SparkSegmentCreationFunction.java | 11 +-
.../pinot/plugin/inputformat/avro/AvroUtils.java | 10 +-
.../recordtransformer/NullValueTransformer.java | 5 +-
.../pinot/segment/local/utils/IngestionUtils.java | 2 +-
.../pinot/segment/local/utils/SchemaUtils.java | 41 +--
.../local/segment/readers/PinotSegmentUtil.java | 4 +-
.../spi/creator/SegmentGeneratorConfig.java | 2 +-
.../creator/name/SegmentNameGeneratorFactory.java | 2 +-
.../NormalizedDateSegmentNameGeneratorTest.java | 32 +-
.../apache/pinot/spi/data/DateTimeFieldSpec.java | 32 +-
.../pinot/spi/data/DateTimeFormatPatternSpec.java | 120 +++----
.../apache/pinot/spi/data/DateTimeFormatSpec.java | 346 +++++++++++----------
.../pinot/spi/data/DateTimeFormatUnitSpec.java | 35 +--
.../pinot/spi/data/DateTimeGranularitySpec.java | 98 +++---
.../java/org/apache/pinot/spi/data/Schema.java | 28 +-
.../java/org/apache/pinot/spi/utils/JsonUtils.java | 10 +-
.../org/apache/pinot/spi/utils/StringUtil.java | 32 ++
.../spi/data/DateTimeFormatPatternSpecTest.java | 72 ++++-
.../pinot/spi/data/DateTimeFormatSpecTest.java | 73 ++++-
35 files changed, 651 insertions(+), 542 deletions(-)
diff --git a/pinot-broker/src/main/java/org/apache/pinot/broker/routing/segmentpruner/TimeSegmentPruner.java b/pinot-broker/src/main/java/org/apache/pinot/broker/routing/segmentpruner/TimeSegmentPruner.java
index ff9df4e004..0b57c38422 100644
--- a/pinot-broker/src/main/java/org/apache/pinot/broker/routing/segmentpruner/TimeSegmentPruner.java
+++ b/pinot-broker/src/main/java/org/apache/pinot/broker/routing/segmentpruner/TimeSegmentPruner.java
@@ -85,7 +85,7 @@ public class TimeSegmentPruner implements SegmentPruner {
DateTimeFieldSpec dateTimeSpec = schema.getSpecForTimeColumn(_timeColumn);
Preconditions.checkNotNull(dateTimeSpec, "Field spec must be specified in schema for time column: %s of table: %s",
_timeColumn, _tableNameWithType);
- _timeFormatSpec = new DateTimeFormatSpec(dateTimeSpec.getFormat());
+ _timeFormatSpec = dateTimeSpec.getFormatSpec();
}
@Override
diff --git a/pinot-broker/src/main/java/org/apache/pinot/broker/routing/timeboundary/TimeBoundaryManager.java b/pinot-broker/src/main/java/org/apache/pinot/broker/routing/timeboundary/TimeBoundaryManager.java
index e781443c1c..10ae73b18a 100644
--- a/pinot-broker/src/main/java/org/apache/pinot/broker/routing/timeboundary/TimeBoundaryManager.java
+++ b/pinot-broker/src/main/java/org/apache/pinot/broker/routing/timeboundary/TimeBoundaryManager.java
@@ -78,7 +78,7 @@ public class TimeBoundaryManager {
DateTimeFieldSpec dateTimeSpec = schema.getSpecForTimeColumn(_timeColumn);
Preconditions.checkNotNull(dateTimeSpec, "Field spec must be specified in schema for time column: %s of table: %s",
_timeColumn, _offlineTableName);
- _timeFormatSpec = new DateTimeFormatSpec(dateTimeSpec.getFormat());
+ _timeFormatSpec = dateTimeSpec.getFormatSpec();
Preconditions.checkNotNull(_timeFormatSpec.getColumnUnit(),
"Time unit must be configured in the field spec for time column: %s of table: %s", _timeColumn,
_offlineTableName);
@@ -91,7 +91,7 @@ public class TimeBoundaryManager {
_timeOffsetMs = isHourlyTable ? TimeUnit.HOURS.toMillis(1) : TimeUnit.DAYS.toMillis(1);
LOGGER.info("Constructed TimeBoundaryManager with timeColumn: {}, timeFormat: {}, isHourlyTable: {} for table: {}",
- _timeColumn, _timeFormatSpec.getFormat(), isHourlyTable, _offlineTableName);
+ _timeColumn, dateTimeSpec.getFormat(), isHourlyTable, _offlineTableName);
}
/**
diff --git a/pinot-broker/src/test/java/org/apache/pinot/broker/broker/HelixBrokerStarterTest.java b/pinot-broker/src/test/java/org/apache/pinot/broker/broker/HelixBrokerStarterTest.java
index ca5cc76de7..ddab2f7bf9 100644
--- a/pinot-broker/src/test/java/org/apache/pinot/broker/broker/HelixBrokerStarterTest.java
+++ b/pinot-broker/src/test/java/org/apache/pinot/broker/broker/HelixBrokerStarterTest.java
@@ -35,9 +35,6 @@ import org.apache.pinot.controller.utils.SegmentMetadataMockUtils;
import org.apache.pinot.core.routing.RoutingTable;
import org.apache.pinot.spi.config.table.TableConfig;
import org.apache.pinot.spi.config.table.TableType;
-import org.apache.pinot.spi.data.DateTimeFieldSpec;
-import org.apache.pinot.spi.data.DateTimeFormatSpec;
-import org.apache.pinot.spi.data.DateTimeGranularitySpec;
import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.data.Schema;
import org.apache.pinot.spi.env.PinotConfiguration;
@@ -90,9 +87,7 @@ public class HelixBrokerStarterTest extends ControllerTest {
addFakeServerInstancesToAutoJoinHelixCluster(NUM_SERVERS, true);
Schema schema = new Schema.SchemaBuilder().setSchemaName(RAW_TABLE_NAME)
- .addDateTime(TIME_COLUMN_NAME, FieldSpec.DataType.INT, new DateTimeFormatSpec(1, TimeUnit.DAYS.toString(),
- DateTimeFieldSpec.TimeFormat.EPOCH.toString()).getFormat(),
- new DateTimeGranularitySpec(1, TimeUnit.DAYS).getGranularity()).build();
+ .addDateTime(TIME_COLUMN_NAME, FieldSpec.DataType.INT, "EPOCH|DAYS", "1:DAYS").build();
_helixResourceManager.addSchema(schema, true);
TableConfig offlineTableConfig =
new TableConfigBuilder(TableType.OFFLINE).setTableName(RAW_TABLE_NAME).setTimeColumnName(TIME_COLUMN_NAME)
diff --git a/pinot-broker/src/test/java/org/apache/pinot/broker/routing/segmentpruner/SegmentPrunerTest.java b/pinot-broker/src/test/java/org/apache/pinot/broker/routing/segmentpruner/SegmentPrunerTest.java
index 53ef6fc6e3..70d41313bc 100644
--- a/pinot-broker/src/test/java/org/apache/pinot/broker/routing/segmentpruner/SegmentPrunerTest.java
+++ b/pinot-broker/src/test/java/org/apache/pinot/broker/routing/segmentpruner/SegmentPrunerTest.java
@@ -543,8 +543,7 @@ public class SegmentPrunerTest extends ControllerTest {
TimeSegmentPruner segmentPruner = new TimeSegmentPruner(tableConfig, _propertyStore);
Schema schema = ZKMetadataProvider.getTableSchema(_propertyStore, RAW_TABLE_NAME);
- DateTimeFormatSpec dateTimeFormatSpec =
- new DateTimeFormatSpec(schema.getSpecForTimeColumn(TIME_COLUMN).getFormat());
+ DateTimeFormatSpec dateTimeFormatSpec = schema.getSpecForTimeColumn(TIME_COLUMN).getFormatSpec();
Set<String> onlineSegments = new HashSet<>();
String segment0 = "segment0";
diff --git a/pinot-common/src/test/java/org/apache/pinot/common/data/DateTimeFormatSpecTest.java b/pinot-common/src/test/java/org/apache/pinot/common/data/DateTimeFormatSpecTest.java
index 5f33b0227a..3a1ad600ce 100644
--- a/pinot-common/src/test/java/org/apache/pinot/common/data/DateTimeFormatSpecTest.java
+++ b/pinot-common/src/test/java/org/apache/pinot/common/data/DateTimeFormatSpecTest.java
@@ -249,44 +249,45 @@ public class DateTimeFormatSpecTest {
});
entries.add(new Object[]{
- "1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd", 1, TimeUnit.DAYS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT,
+ "1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd", 1, TimeUnit.MILLISECONDS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT,
"yyyyMMdd", DateTimeZone.UTC
});
entries.add(new Object[]{
- "1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd tz(IST)", 1, TimeUnit.DAYS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT,
- "yyyyMMdd", DateTimeZone.forTimeZone(TimeZone.getTimeZone("IST"))
+ "1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd tz(IST)", 1, TimeUnit.MILLISECONDS,
+ DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd", DateTimeZone.forTimeZone(
+ TimeZone.getTimeZone("IST"))
});
entries.add(new Object[]{
- "1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd tz(IST)", 1, TimeUnit.DAYS,
+ "1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd tz(IST)", 1, TimeUnit.MILLISECONDS,
DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd",
DateTimeZone.forTimeZone(TimeZone.getTimeZone("IST"))
});
entries.add(new Object[]{
- "1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd tz ( IST ) ", 1, TimeUnit.DAYS,
+ "1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd tz ( IST ) ", 1, TimeUnit.MILLISECONDS,
DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd",
DateTimeZone.forTimeZone(TimeZone.getTimeZone("IST"))
});
entries.add(new Object[]{
- "1:HOURS:SIMPLE_DATE_FORMAT:yyyyMMdd HH", 1, TimeUnit.HOURS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT,
- "yyyyMMdd HH", DateTimeZone.UTC
+ "1:HOURS:SIMPLE_DATE_FORMAT:yyyyMMdd HH", 1, TimeUnit.MILLISECONDS,
+ DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd HH", DateTimeZone.UTC
});
entries.add(new Object[]{
- "1:HOURS:SIMPLE_DATE_FORMAT:yyyyMMdd HH tz(dummy)", 1, TimeUnit.HOURS,
+ "1:HOURS:SIMPLE_DATE_FORMAT:yyyyMMdd HH tz(dummy)", 1, TimeUnit.MILLISECONDS,
DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd HH", DateTimeZone.UTC
});
entries.add(new Object[]{
- "1:HOURS:SIMPLE_DATE_FORMAT:M/d/yyyy h:mm:ss a", 1, TimeUnit.HOURS,
+ "1:HOURS:SIMPLE_DATE_FORMAT:M/d/yyyy h:mm:ss a", 1, TimeUnit.MILLISECONDS,
DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "M/d/yyyy h:mm:ss a", DateTimeZone.UTC
});
entries.add(new Object[]{
- "1:HOURS:SIMPLE_DATE_FORMAT:M/d/yyyy h:mm:ss a tz(Asia/Tokyo)", 1, TimeUnit.HOURS,
+ "1:HOURS:SIMPLE_DATE_FORMAT:M/d/yyyy h:mm:ss a tz(Asia/Tokyo)", 1, TimeUnit.MILLISECONDS,
DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "M/d/yyyy h:mm:ss a",
DateTimeZone.forTimeZone(TimeZone.getTimeZone("Asia/Tokyo"))
});
@@ -304,97 +305,47 @@ public class DateTimeFormatSpecTest {
});
entries.add(new Object[]{
- "SIMPLE_DATE_FORMAT|yyyyMMdd", 1, TimeUnit.DAYS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT,
+ "SIMPLE_DATE_FORMAT|yyyyMMdd", 1, TimeUnit.MILLISECONDS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT,
"yyyyMMdd", DateTimeZone.UTC
});
entries.add(new Object[]{
- "SIMPLE_DATE_FORMAT|yyyyMMdd|IST", 1, TimeUnit.DAYS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT,
+ "SIMPLE_DATE_FORMAT|yyyyMMdd|IST", 1, TimeUnit.MILLISECONDS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT,
"yyyyMMdd", DateTimeZone.forTimeZone(TimeZone.getTimeZone("IST"))
});
entries.add(new Object[]{
- "SIMPLE_DATE_FORMAT|yyyyMMdd|IST", 1, TimeUnit.DAYS,
+ "SIMPLE_DATE_FORMAT|yyyyMMdd|IST", 1, TimeUnit.MILLISECONDS,
DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd",
DateTimeZone.forTimeZone(TimeZone.getTimeZone("IST"))
});
entries.add(new Object[]{
- "SIMPLE_DATE_FORMAT|yyyyMMdd|IST", 1, TimeUnit.DAYS,
+ "SIMPLE_DATE_FORMAT|yyyyMMdd|IST", 1, TimeUnit.MILLISECONDS,
DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd",
DateTimeZone.forTimeZone(TimeZone.getTimeZone("IST"))
});
entries.add(new Object[]{
- "SIMPLE_DATE_FORMAT|yyyyMMdd HH", 1, TimeUnit.DAYS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT,
+ "SIMPLE_DATE_FORMAT|yyyyMMdd HH", 1, TimeUnit.MILLISECONDS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT,
"yyyyMMdd HH", DateTimeZone.UTC
});
entries.add(new Object[]{
- "SIMPLE_DATE_FORMAT|yyyyMMdd HH|dummy", 1, TimeUnit.DAYS,
+ "SIMPLE_DATE_FORMAT|yyyyMMdd HH|dummy", 1, TimeUnit.MILLISECONDS,
DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd HH", DateTimeZone.UTC
});
entries.add(new Object[]{
- "SIMPLE_DATE_FORMAT|M/d/yyyy h:mm:ss a", 1, TimeUnit.DAYS,
+ "SIMPLE_DATE_FORMAT|M/d/yyyy h:mm:ss a", 1, TimeUnit.MILLISECONDS,
DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "M/d/yyyy h:mm:ss a", DateTimeZone.UTC
});
entries.add(new Object[]{
- "SIMPLE_DATE_FORMAT|M/d/yyyy h:mm:ss a|Asia/Tokyo", 1, TimeUnit.DAYS,
+ "SIMPLE_DATE_FORMAT|M/d/yyyy h:mm:ss a|Asia/Tokyo", 1, TimeUnit.MILLISECONDS,
DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT, "M/d/yyyy h:mm:ss a",
DateTimeZone.forTimeZone(TimeZone.getTimeZone("Asia/Tokyo"))
});
return entries.toArray(new Object[entries.size()][]);
}
-
- // Test construct format given its components
- @Test(dataProvider = "testConstructFormatDataProvider")
- public void testConstructFormat(int columnSize, TimeUnit columnUnit, String columnTimeFormat, String pattern,
- DateTimeFormatSpec formatExpected1, DateTimeFormatSpec formatExpected2) {
- DateTimeFormatSpec formatActual1 = null;
- try {
- formatActual1 = new DateTimeFormatSpec(columnSize, columnUnit.toString(), columnTimeFormat);
- } catch (Exception e) {
- // invalid arguments
- }
- Assert.assertEquals(formatActual1, formatExpected1);
-
- DateTimeFormatSpec formatActual2 = null;
- try {
- formatActual2 = new DateTimeFormatSpec(columnSize, columnUnit.toString(), columnTimeFormat, pattern);
- } catch (Exception e) {
- // invalid arguments
- }
- Assert.assertEquals(formatActual2, formatExpected2);
- }
-
- @DataProvider(name = "testConstructFormatDataProvider")
- public Object[][] provideTestConstructFormatData() {
-
- List<Object[]> entries = new ArrayList<>();
-
- entries.add(new Object[]{1, TimeUnit.HOURS, "EPOCH", null, new DateTimeFormatSpec("1:HOURS:EPOCH"), null});
- entries.add(new Object[]{1, TimeUnit.HOURS, "EPOCH", "yyyyMMdd", new DateTimeFormatSpec("1:HOURS:EPOCH"), null});
- entries.add(new Object[]{5, TimeUnit.MINUTES, "EPOCH", null, new DateTimeFormatSpec("5:MINUTES:EPOCH"), null});
- entries.add(new Object[]{0, TimeUnit.HOURS, "EPOCH", null, null, null});
- entries.add(new Object[]{1, null, "EPOCH", null, null, null});
- entries.add(new Object[]{1, TimeUnit.HOURS, null, null, null, null});
- entries.add(new Object[]{1, TimeUnit.HOURS, "DUMMY", "yyyyMMdd", null, null});
- entries.add(new Object[]{
- 1, TimeUnit.HOURS, "SIMPLE_DATE_FORMAT", "yyyyMMdd", null,
- new DateTimeFormatSpec("1:HOURS:SIMPLE_DATE_FORMAT:yyyyMMdd")
- });
- entries.add(new Object[]{
- 1, TimeUnit.HOURS, "SIMPLE_DATE_FORMAT", "yyyyMMdd tz(America/Los_Angeles)", null,
- new DateTimeFormatSpec("1:HOURS:SIMPLE_DATE_FORMAT:yyyyMMdd tz(America/Los_Angeles)")
- });
- entries.add(new Object[]{1, TimeUnit.HOURS, "SIMPLE_DATE_FORMAT", null, null, null});
- entries.add(new Object[]{-1, TimeUnit.HOURS, "SIMPLE_DATE_FORMAT", "yyyyMMDD", null, null});
- entries.add(new Object[]{
- 1, TimeUnit.HOURS, "SIMPLE_DATE_FORMAT", "M/d/yyyy h:mm:ss a", null,
- new DateTimeFormatSpec("1:HOURS:SIMPLE_DATE_FORMAT:M/d/yyyy h:mm:ss a")
- });
- return entries.toArray(new Object[entries.size()][]);
- }
}
diff --git a/pinot-common/src/test/java/org/apache/pinot/common/data/FieldSpecTest.java b/pinot-common/src/test/java/org/apache/pinot/common/data/FieldSpecTest.java
index 1d2c92a8a8..0623068b35 100644
--- a/pinot-common/src/test/java/org/apache/pinot/common/data/FieldSpecTest.java
+++ b/pinot-common/src/test/java/org/apache/pinot/common/data/FieldSpecTest.java
@@ -245,7 +245,7 @@ public class FieldSpecTest {
boolean exceptionActual = false;
try {
dateTimeFieldActual = new DateTimeFieldSpec(name, dataType, format, granularity);
- } catch (IllegalStateException e) {
+ } catch (IllegalArgumentException e) {
exceptionActual = true;
}
Assert.assertEquals(exceptionActual, exceptionExpected);
diff --git a/pinot-common/src/test/resources/schemaTest.schema b/pinot-common/src/test/resources/schemaTest.schema
index d6c5704b2b..443b10e2d6 100644
--- a/pinot-common/src/test/resources/schemaTest.schema
+++ b/pinot-common/src/test/resources/schemaTest.schema
@@ -93,7 +93,7 @@
"name": "dateTime3",
"dataType": "TIMESTAMP",
"format": "1:MILLISECONDS:TIMESTAMP",
- "granularity": "1:SECOND"
+ "granularity": "1:SECONDS"
}
],
"schemaName": "schemaTest"
diff --git a/pinot-compatibility-verifier/src/main/java/org/apache/pinot/compat/StreamOp.java b/pinot-compatibility-verifier/src/main/java/org/apache/pinot/compat/StreamOp.java
index a50487e05b..ac69226211 100644
--- a/pinot-compatibility-verifier/src/main/java/org/apache/pinot/compat/StreamOp.java
+++ b/pinot-compatibility-verifier/src/main/java/org/apache/pinot/compat/StreamOp.java
@@ -226,8 +226,7 @@ public class StreamOp extends BaseOp {
.sendGetRequest(ControllerRequestURLBuilder.baseUrl(ClusterDescriptor.getInstance().getControllerUrl())
.forSchemaGet(schemaName));
Schema schema = JsonUtils.stringToObject(schemaString, Schema.class);
- DateTimeFormatSpec dateTimeFormatSpec =
- new DateTimeFormatSpec(schema.getSpecForTimeColumn(timeColumn).getFormat());
+ DateTimeFormatSpec dateTimeFormatSpec = schema.getSpecForTimeColumn(timeColumn).getFormatSpec();
try (RecordReader csvRecordReader = RecordReaderFactory
.getRecordReader(FileFormat.CSV, localReplacedCSVFile, columnNames, recordReaderConfig)) {
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/util/AutoAddInvertedIndex.java b/pinot-controller/src/main/java/org/apache/pinot/controller/util/AutoAddInvertedIndex.java
index 89d06479f8..3d57ceb9fb 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/util/AutoAddInvertedIndex.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/util/AutoAddInvertedIndex.java
@@ -41,7 +41,6 @@ import org.apache.pinot.common.metadata.ZKMetadataProvider;
import org.apache.pinot.spi.config.table.IndexingConfig;
import org.apache.pinot.spi.config.table.TableConfig;
import org.apache.pinot.spi.data.DateTimeFieldSpec;
-import org.apache.pinot.spi.data.DateTimeFormatSpec;
import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.data.Schema;
import org.apache.pinot.spi.utils.JsonUtils;
@@ -243,7 +242,7 @@ public class AutoAddInvertedIndex {
tableNameWithType);
continue;
}
- TimeUnit timeUnit = new DateTimeFormatSpec(dateTimeSpec.getFormat()).getColumnUnit();
+ TimeUnit timeUnit = dateTimeSpec.getFormatSpec().getColumnUnit();
if (timeUnit != TimeUnit.DAYS) {
LOGGER.warn("Table: {}, time column {] has non-DAYS time unit: {}", timeColumnName, timeUnit);
}
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/HLRealtimeSegmentDataManager.java b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/HLRealtimeSegmentDataManager.java
index 3033508406..b84c449ab5 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/HLRealtimeSegmentDataManager.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/HLRealtimeSegmentDataManager.java
@@ -47,7 +47,6 @@ import org.apache.pinot.segment.spi.creator.SegmentVersion;
import org.apache.pinot.spi.config.table.IndexingConfig;
import org.apache.pinot.spi.config.table.TableConfig;
import org.apache.pinot.spi.data.DateTimeFieldSpec;
-import org.apache.pinot.spi.data.DateTimeFormatSpec;
import org.apache.pinot.spi.data.Schema;
import org.apache.pinot.spi.data.readers.GenericRow;
import org.apache.pinot.spi.metrics.PinotMeter;
@@ -121,7 +120,7 @@ public class HLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
_tableNameWithType);
DateTimeFieldSpec dateTimeFieldSpec = schema.getSpecForTimeColumn(_timeColumnName);
Preconditions.checkNotNull(dateTimeFieldSpec, "Must provide field spec for time column {}", _timeColumnName);
- _timeType = new DateTimeFormatSpec(dateTimeFieldSpec.getFormat()).getColumnUnit();
+ _timeType = dateTimeFieldSpec.getFormatSpec().getColumnUnit();
List<String> sortedColumns = indexLoadingConfig.getSortedColumns();
if (sortedColumns.isEmpty()) {
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/processing/timehandler/EpochTimeHandler.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/processing/timehandler/EpochTimeHandler.java
index ccba8bfc9f..9e6ad822b5 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/segment/processing/timehandler/EpochTimeHandler.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/segment/processing/timehandler/EpochTimeHandler.java
@@ -43,7 +43,7 @@ public class EpochTimeHandler implements TimeHandler {
long roundBucketMs, long partitionBucketMs) {
_timeColumn = fieldSpec.getName();
_dataType = fieldSpec.getDataType();
- _formatSpec = new DateTimeFormatSpec(fieldSpec.getFormat());
+ _formatSpec = fieldSpec.getFormatSpec();
_startTimeMs = startTimeMs;
_endTimeMs = endTimeMs;
_negateWindowFilter = negateWindowFilter;
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/util/SchemaUtilsTest.java b/pinot-core/src/test/java/org/apache/pinot/core/util/SchemaUtilsTest.java
index e8f29ad725..09dc8928b0 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/util/SchemaUtilsTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/util/SchemaUtilsTest.java
@@ -47,7 +47,6 @@ import static org.testng.Assert.assertThrows;
* Tests schema validations
*/
public class SchemaUtilsTest {
-
private static final String TABLE_NAME = "testTable";
private static final String TIME_COLUMN = "timeColumn";
@@ -209,21 +208,21 @@ public class SchemaUtilsTest {
public void testValidateTimeFieldSpec() {
Schema pinotSchema;
// time field spec using same name for incoming and outgoing
- pinotSchema = new Schema.SchemaBuilder()
- .addTime(new TimeGranularitySpec(DataType.LONG, TimeUnit.MILLISECONDS, "time"),
+ pinotSchema =
+ new Schema.SchemaBuilder().addTime(new TimeGranularitySpec(DataType.LONG, TimeUnit.MILLISECONDS, "time"),
new TimeGranularitySpec(DataType.INT, TimeUnit.DAYS, "time")).build();
checkValidationFails(pinotSchema);
// time field spec using SIMPLE_DATE_FORMAT, not allowed when conversion is needed
- pinotSchema = new Schema.SchemaBuilder()
- .addTime(new TimeGranularitySpec(DataType.LONG, TimeUnit.MILLISECONDS, "incoming"),
+ pinotSchema =
+ new Schema.SchemaBuilder().addTime(new TimeGranularitySpec(DataType.LONG, TimeUnit.MILLISECONDS, "incoming"),
new TimeGranularitySpec(DataType.INT, TimeUnit.DAYS,
TimeGranularitySpec.TimeFormat.SIMPLE_DATE_FORMAT.toString(), "outgoing")).build();
checkValidationFails(pinotSchema);
// valid time field spec
- pinotSchema = new Schema.SchemaBuilder()
- .addTime(new TimeGranularitySpec(DataType.LONG, TimeUnit.MILLISECONDS, "incoming"),
+ pinotSchema =
+ new Schema.SchemaBuilder().addTime(new TimeGranularitySpec(DataType.LONG, TimeUnit.MILLISECONDS, "incoming"),
new TimeGranularitySpec(DataType.INT, TimeUnit.DAYS, "outgoing")).build();
SchemaUtils.validate(pinotSchema);
}
@@ -232,19 +231,20 @@ public class SchemaUtilsTest {
public void testValidateDateTimeFieldSpec() {
Schema pinotSchema;
// valid date time.
- pinotSchema = new Schema.SchemaBuilder()
- .addDateTime("datetime1", FieldSpec.DataType.STRING, "1:DAYS:SIMPLE_DATE_FORMAT:yyyy-MM-dd", "1:DAYS")
+ pinotSchema = new Schema.SchemaBuilder().addDateTime("datetime1", FieldSpec.DataType.STRING,
+ "1:DAYS:SIMPLE_DATE_FORMAT:yyyy-MM-dd", "1:DAYS")
.addDateTime("datetime2", FieldSpec.DataType.STRING, "1:DAYS:SIMPLE_DATE_FORMAT:yyyy-MM-ww-dd", "1:DAYS")
.build();
SchemaUtils.validate(pinotSchema);
// date time field spec using SIMPLE_DATE_FORMAT needs to be valid.
- assertThrows(IllegalStateException.class, () -> new Schema.SchemaBuilder()
- .addDateTime("datetime3", FieldSpec.DataType.STRING, "1:DAYS:SIMPLE_DATE_FORMAT:foo_bar", "1:DAYS").build());
+ assertThrows(IllegalArgumentException.class,
+ () -> new Schema.SchemaBuilder().addDateTime("datetime3", FieldSpec.DataType.STRING,
+ "1:DAYS:SIMPLE_DATE_FORMAT:foo_bar", "1:DAYS").build());
// date time field spec using SIMPLE_DATE_FORMAT needs to be lexicographical order.
- pinotSchema = new Schema.SchemaBuilder()
- .addDateTime("datetime4", FieldSpec.DataType.STRING, "1:DAYS:SIMPLE_DATE_FORMAT:M/d/yyyy", "1:DAYS").build();
+ pinotSchema = new Schema.SchemaBuilder().addDateTime("datetime4", FieldSpec.DataType.STRING,
+ "1:DAYS:SIMPLE_DATE_FORMAT:M/d/yyyy", "1:DAYS").build();
checkValidationFails(pinotSchema);
}
@@ -252,17 +252,17 @@ public class SchemaUtilsTest {
public void testValidatePrimaryKeyColumns() {
Schema pinotSchema;
// non-existing column used as primary key
- pinotSchema = new Schema.SchemaBuilder()
- .addTime(new TimeGranularitySpec(DataType.LONG, TimeUnit.MILLISECONDS, "incoming"),
- new TimeGranularitySpec(DataType.INT, TimeUnit.DAYS, "outgoing"))
- .addSingleValueDimension("col", DataType.INT).setPrimaryKeyColumns(Lists.newArrayList("test")).build();
+ pinotSchema =
+ new Schema.SchemaBuilder().addTime(new TimeGranularitySpec(DataType.LONG, TimeUnit.MILLISECONDS, "incoming"),
+ new TimeGranularitySpec(DataType.INT, TimeUnit.DAYS, "outgoing"))
+ .addSingleValueDimension("col", DataType.INT).setPrimaryKeyColumns(Lists.newArrayList("test")).build();
checkValidationFails(pinotSchema);
// valid primary key
- pinotSchema = new Schema.SchemaBuilder()
- .addTime(new TimeGranularitySpec(DataType.LONG, TimeUnit.MILLISECONDS, "incoming"),
- new TimeGranularitySpec(DataType.INT, TimeUnit.DAYS, "outgoing"))
- .addSingleValueDimension("col", DataType.INT).setPrimaryKeyColumns(Lists.newArrayList("col")).build();
+ pinotSchema =
+ new Schema.SchemaBuilder().addTime(new TimeGranularitySpec(DataType.LONG, TimeUnit.MILLISECONDS, "incoming"),
+ new TimeGranularitySpec(DataType.INT, TimeUnit.DAYS, "outgoing"))
+ .addSingleValueDimension("col", DataType.INT).setPrimaryKeyColumns(Lists.newArrayList("col")).build();
SchemaUtils.validate(pinotSchema);
}
@@ -298,54 +298,53 @@ public class SchemaUtilsTest {
@Test
public void testDateTimeFieldSpec()
throws IOException {
- Schema pinotSchema;
- pinotSchema = Schema.fromString(
+ Schema schema = Schema.fromString(
"{\"schemaName\":\"testSchema\"," + "\"dimensionFieldSpecs\":[ {\"name\":\"dim1\",\"dataType\":\"STRING\"}],"
+ "\"dateTimeFieldSpecs\":[{\"name\":\"dt1\",\"dataType\":\"INT\",\"format\":\"x:HOURS:EPOCH\","
+ "\"granularity\":\"1:HOURS\"}]}");
- checkValidationFails(pinotSchema);
+ checkValidationFails(schema);
- pinotSchema = Schema.fromString(
+ schema = Schema.fromString(
"{\"schemaName\":\"testSchema\"," + "\"dimensionFieldSpecs\":[ {\"name\":\"dim1\",\"dataType\":\"STRING\"}],"
+ "\"dateTimeFieldSpecs\":[{\"name\":\"dt1\",\"dataType\":\"INT\",\"format\":\"1:DUMMY:EPOCH\","
+ "\"granularity\":\"1:HOURS\"}]}");
- checkValidationFails(pinotSchema);
+ checkValidationFails(schema);
- pinotSchema = Schema.fromString(
+ schema = Schema.fromString(
"{\"schemaName\":\"testSchema\"," + "\"dimensionFieldSpecs\":[ {\"name\":\"dim1\",\"dataType\":\"STRING\"}],"
+ "\"dateTimeFieldSpecs\":[{\"name\":\"dt1\",\"dataType\":\"INT\",\"format\":\"1:HOURS:DUMMY\","
+ "\"granularity\":\"1:HOURS\"}]}");
- checkValidationFails(pinotSchema);
+ checkValidationFails(schema);
- pinotSchema = Schema.fromString(
+ schema = Schema.fromString(
"{\"schemaName\":\"testSchema\"," + "\"dimensionFieldSpecs\":[ {\"name\":\"dim1\",\"dataType\":\"STRING\"}],"
+ "\"dateTimeFieldSpecs\":[{\"name\":\"dt1\",\"dataType\":\"INT\",\"format\":\"1:HOURS:EPOCH\","
+ "\"granularity\":\"x:HOURS\"}]}");
- checkValidationFails(pinotSchema);
+ checkValidationFails(schema);
- pinotSchema = Schema.fromString(
+ schema = Schema.fromString(
"{\"schemaName\":\"testSchema\"," + "\"dimensionFieldSpecs\":[ {\"name\":\"dim1\",\"dataType\":\"STRING\"}],"
+ "\"dateTimeFieldSpecs\":[{\"name\":\"dt1\",\"dataType\":\"INT\",\"format\":\"1:HOURS:EPOCH\","
+ "\"granularity\":\"1:DUMMY\"}]}");
- checkValidationFails(pinotSchema);
+ checkValidationFails(schema);
- pinotSchema = Schema.fromString(
+ schema = Schema.fromString(
"{\"schemaName\":\"testSchema\"," + "\"dimensionFieldSpecs\":[ {\"name\":\"dim1\",\"dataType\":\"STRING\"}],"
+ "\"dateTimeFieldSpecs\":[{\"name\":\"dt1\",\"dataType\":\"INT\","
+ "\"format\":\"1:DAYS:SIMPLE_DATE_FORMAT\",\"granularity\":\"1:DAYS\"}]}");
- checkValidationFails(pinotSchema);
+ checkValidationFails(schema);
- pinotSchema = Schema.fromString(
+ schema = Schema.fromString(
"{\"schemaName\":\"testSchema\"," + "\"dimensionFieldSpecs\":[ {\"name\":\"dim1\",\"dataType\":\"STRING\"}],"
+ "\"dateTimeFieldSpecs\":[{\"name\":\"dt1\",\"dataType\":\"INT\",\"format\":\"1:HOURS:EPOCH\","
+ "\"granularity\":\"1:HOURS\"}]}");
- SchemaUtils.validate(pinotSchema);
+ SchemaUtils.validate(schema);
- pinotSchema = Schema.fromString(
+ schema = Schema.fromString(
"{\"schemaName\":\"testSchema\"," + "\"dimensionFieldSpecs\":[ {\"name\":\"dim1\",\"dataType\":\"STRING\"}],"
+ "\"dateTimeFieldSpecs\":[{\"name\":\"dt1\",\"dataType\":\"INT\","
+ "\"format\":\"1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd\",\"granularity\":\"1:DAYS\"}]}");
- SchemaUtils.validate(pinotSchema);
+ SchemaUtils.validate(schema);
}
/**
@@ -394,7 +393,7 @@ public class SchemaUtilsTest {
try {
SchemaUtils.validate(pinotSchema);
Assert.fail("Schema validation should have failed.");
- } catch (IllegalStateException e) {
+ } catch (IllegalArgumentException | IllegalStateException e) {
// expected
}
}
diff --git a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-common/src/main/java/org/apache/pinot/plugin/ingestion/batch/common/SegmentGenerationTaskRunner.java b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-common/src/main/java/org/apache/pinot/plugin/ingestion/batch/common/SegmentGenerationTaskRunner.java
index 530bf2110c..847b92002d 100644
--- a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-common/src/main/java/org/apache/pinot/plugin/ingestion/batch/common/SegmentGenerationTaskRunner.java
+++ b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-common/src/main/java/org/apache/pinot/plugin/ingestion/batch/common/SegmentGenerationTaskRunner.java
@@ -156,7 +156,7 @@ public class SegmentGenerationTaskRunner implements Serializable {
if (timeColumnName != null) {
DateTimeFieldSpec dateTimeFieldSpec = schema.getSpecForTimeColumn(timeColumnName);
if (dateTimeFieldSpec != null) {
- dateTimeFormatSpec = new DateTimeFormatSpec(dateTimeFieldSpec.getFormat());
+ dateTimeFormatSpec = dateTimeFieldSpec.getFormatSpec();
}
}
return new NormalizedDateSegmentNameGenerator(tableName, segmentNameGeneratorConfigs.get(SEGMENT_NAME_PREFIX),
diff --git a/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mappers/SegmentCreationMapper.java b/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mappers/SegmentCreationMapper.java
index e2232f6891..5f5d510bbb 100644
--- a/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mappers/SegmentCreationMapper.java
+++ b/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mappers/SegmentCreationMapper.java
@@ -158,7 +158,7 @@ public class SegmentCreationMapper extends Mapper<LongWritable, Text, LongWritab
if (timeColumnName != null) {
DateTimeFieldSpec dateTimeFieldSpec = _schema.getSpecForTimeColumn(timeColumnName);
if (dateTimeFieldSpec != null) {
- dateTimeFormatSpec = new DateTimeFormatSpec(dateTimeFieldSpec.getFormat());
+ dateTimeFormatSpec = dateTimeFieldSpec.getFormatSpec();
}
}
_segmentNameGenerator =
diff --git a/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mappers/SegmentPreprocessingMapper.java b/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mappers/SegmentPreprocessingMapper.java
index aad0e63c5f..408081eb88 100644
--- a/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mappers/SegmentPreprocessingMapper.java
+++ b/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/mappers/SegmentPreprocessingMapper.java
@@ -68,15 +68,36 @@ public class SegmentPreprocessingMapper
String timeColumnValue = _jobConf.get(InternalConfigConstants.TIME_COLUMN_VALUE);
String pushFrequency = _jobConf.get(InternalConfigConstants.SEGMENT_PUSH_FREQUENCY);
- String timeType = _jobConf.get(InternalConfigConstants.SEGMENT_TIME_TYPE);
- String timeFormat = _jobConf.get(InternalConfigConstants.SEGMENT_TIME_FORMAT);
+ String timeFormatStr = _jobConf.get(InternalConfigConstants.SEGMENT_TIME_FORMAT);
+ DateTimeFieldSpec.TimeFormat timeFormat;
+ try {
+ timeFormat = DateTimeFieldSpec.TimeFormat.valueOf(timeFormatStr);
+ } catch (Exception e) {
+ throw new IllegalArgumentException("Invalid time format: " + timeFormatStr);
+ }
DateTimeFormatSpec dateTimeFormatSpec;
- if (timeFormat.equals(DateTimeFieldSpec.TimeFormat.EPOCH.toString()) || timeFormat.equals(
- DateTimeFieldSpec.TimeFormat.TIMESTAMP.toString())) {
- dateTimeFormatSpec = new DateTimeFormatSpec(1, timeType, timeFormat);
- } else {
- dateTimeFormatSpec = new DateTimeFormatSpec(1, timeType, timeFormat,
- _jobConf.get(InternalConfigConstants.SEGMENT_TIME_SDF_PATTERN));
+ switch (timeFormat) {
+ case EPOCH:
+ String timeTypeStr = _jobConf.get(InternalConfigConstants.SEGMENT_TIME_TYPE);
+ try {
+ dateTimeFormatSpec = DateTimeFormatSpec.forEpoch(timeTypeStr);
+ } catch (Exception e) {
+ throw new IllegalArgumentException("Invalid time type: " + timeTypeStr, e);
+ }
+ break;
+ case TIMESTAMP:
+ dateTimeFormatSpec = DateTimeFormatSpec.forTimestamp();
+ break;
+ case SIMPLE_DATE_FORMAT:
+ String sdfPattern = _jobConf.get(InternalConfigConstants.SEGMENT_TIME_SDF_PATTERN);
+ try {
+ dateTimeFormatSpec = DateTimeFormatSpec.forSimpleDateFormat(sdfPattern);
+ } catch (Exception e) {
+ throw new IllegalArgumentException("Invalid SDF pattern: " + sdfPattern, e);
+ }
+ break;
+ default:
+ throw new IllegalStateException("Unsupported time format: " + timeFormat);
}
_normalizedDateSegmentNameGenerator =
new NormalizedDateSegmentNameGenerator(tableName, null, false, "APPEND", pushFrequency, dateTimeFormatSpec,
diff --git a/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/preprocess/DataPreprocessingHelper.java b/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/preprocess/DataPreprocessingHelper.java
index dc51e71e66..a9a0db40d6 100644
--- a/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/preprocess/DataPreprocessingHelper.java
+++ b/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-hadoop/src/main/java/org/apache/pinot/hadoop/job/preprocess/DataPreprocessingHelper.java
@@ -201,7 +201,7 @@ public abstract class DataPreprocessingHelper {
if (timeColumnName != null) {
DateTimeFieldSpec dateTimeFieldSpec = _pinotTableSchema.getSpecForTimeColumn(timeColumnName);
if (dateTimeFieldSpec != null) {
- DateTimeFormatSpec formatSpec = new DateTimeFormatSpec(dateTimeFieldSpec.getFormat());
+ DateTimeFormatSpec formatSpec = dateTimeFieldSpec.getFormatSpec();
job.getConfiguration().set(InternalConfigConstants.SEGMENT_TIME_TYPE, formatSpec.getColumnUnit().toString());
job.getConfiguration()
.set(InternalConfigConstants.SEGMENT_TIME_FORMAT, formatSpec.getTimeFormat().toString());
diff --git a/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-spark/src/main/java/org/apache/pinot/spark/jobs/SparkSegmentCreationFunction.java b/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-spark/src/main/java/org/apache/pinot/spark/jobs/SparkSegmentCreationFunction.java
index a5c79215b8..0a1791e4df 100644
--- a/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-spark/src/main/java/org/apache/pinot/spark/jobs/SparkSegmentCreationFunction.java
+++ b/pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-spark/src/main/java/org/apache/pinot/spark/jobs/SparkSegmentCreationFunction.java
@@ -125,7 +125,7 @@ public class SparkSegmentCreationFunction implements Serializable {
if (timeColumnName != null) {
DateTimeFieldSpec dateTimeFieldSpec = _schema.getSpecForTimeColumn(timeColumnName);
if (dateTimeFieldSpec != null) {
- dateTimeFormatSpec = new DateTimeFormatSpec(dateTimeFieldSpec.getFormat());
+ dateTimeFormatSpec = dateTimeFieldSpec.getFormatSpec();
}
}
_segmentNameGenerator =
@@ -150,8 +150,8 @@ public class SparkSegmentCreationFunction implements Serializable {
_logger.warn("Deleting existing file: {}", _localStagingDir);
FileUtils.forceDelete(_localStagingDir);
}
- _logger
- .info("Making local temporary directories: {}, {}, {}", _localStagingDir, _localInputDir, _localSegmentTarDir);
+ _logger.info("Making local temporary directories: {}, {}, {}", _localStagingDir, _localInputDir,
+ _localSegmentTarDir);
Preconditions.checkState(_localStagingDir.mkdirs());
Preconditions.checkState(_localInputDir.mkdir());
Preconditions.checkState(_localSegmentDir.mkdir());
@@ -250,8 +250,9 @@ public class SparkSegmentCreationFunction implements Serializable {
Path hdfsSegmentTarFile = new Path(_hdfsSegmentTarDir, segmentTarFileName);
if (_useRelativePath) {
- Path relativeOutputPath = SegmentCreationJob.getRelativeOutputPath(
- new Path(_jobConf.get(JobConfigConstants.PATH_TO_INPUT)).toUri(), hdfsInputFile.toUri(), _hdfsSegmentTarDir);
+ Path relativeOutputPath =
+ SegmentCreationJob.getRelativeOutputPath(new Path(_jobConf.get(JobConfigConstants.PATH_TO_INPUT)).toUri(),
+ hdfsInputFile.toUri(), _hdfsSegmentTarDir);
hdfsSegmentTarFile = new Path(relativeOutputPath, segmentTarFileName);
}
_logger.info("Copying segment tar file from: {} to: {}", localSegmentTarFile, hdfsSegmentTarFile);
diff --git a/pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroUtils.java b/pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroUtils.java
index bebd903dbc..8c98f57a3f 100644
--- a/pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroUtils.java
+++ b/pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroUtils.java
@@ -34,8 +34,6 @@ import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecord;
import org.apache.pinot.spi.config.table.ingestion.ComplexTypeConfig;
import org.apache.pinot.spi.data.DateTimeFieldSpec;
-import org.apache.pinot.spi.data.DateTimeFormatSpec;
-import org.apache.pinot.spi.data.DateTimeGranularitySpec;
import org.apache.pinot.spi.data.DimensionFieldSpec;
import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.data.FieldSpec.DataType;
@@ -369,9 +367,11 @@ public class AvroUtils {
case DATE_TIME:
Preconditions.checkState(isSingleValueField, "Time field: %s cannot be multi-valued", name);
Preconditions.checkNotNull(timeUnit, "Time unit cannot be null");
- pinotSchema.addField(new DateTimeFieldSpec(name, dataType,
- new DateTimeFormatSpec(1, timeUnit.toString(), DateTimeFieldSpec.TimeFormat.EPOCH.toString()).getFormat(),
- new DateTimeGranularitySpec(1, timeUnit).getGranularity()));
+ // TODO: Switch to new format after releasing 0.11.0
+ // "EPOCH|" + timeUnit.name()
+ String format = "1:" + timeUnit.name() + ":EPOCH";
+ String granularity = "1:" + timeUnit.name();
+ pinotSchema.addField(new DateTimeFieldSpec(name, dataType, format, granularity));
break;
default:
throw new UnsupportedOperationException("Unsupported field type: " + fieldType + " for field: " + name);
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/NullValueTransformer.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/NullValueTransformer.java
index 605ce80134..823f449a00 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/NullValueTransformer.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/NullValueTransformer.java
@@ -62,8 +62,7 @@ public class NullValueTransformer implements RecordTransformer {
schema.getSchemaName());
String defaultTimeString = timeColumnSpec.getDefaultNullValueString();
- String timeFormat = timeColumnSpec.getFormat();
- DateTimeFormatSpec dateTimeFormatSpec = new DateTimeFormatSpec(timeFormat);
+ DateTimeFormatSpec dateTimeFormatSpec = timeColumnSpec.getFormatSpec();
try {
long defaultTimeMs = dateTimeFormatSpec.fromFormatToMillis(defaultTimeString);
if (TimeUtils.timeValueInValidRange(defaultTimeMs)) {
@@ -78,7 +77,7 @@ public class NullValueTransformer implements RecordTransformer {
_defaultNullValues.put(timeColumnName, currentTime);
LOGGER.info(
"Default time: {} does not comply with format: {}, using current time: {} as the default time for table: {}",
- defaultTimeString, timeFormat, currentTime, tableConfig.getTableName());
+ defaultTimeString, timeColumnSpec.getFormat(), currentTime, tableConfig.getTableName());
}
}
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/IngestionUtils.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/IngestionUtils.java
index 2953646364..72aadb70cb 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/IngestionUtils.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/IngestionUtils.java
@@ -158,7 +158,7 @@ public final class IngestionUtils {
if (timeColumnName != null) {
DateTimeFieldSpec dateTimeFieldSpec = schema.getSpecForTimeColumn(timeColumnName);
if (dateTimeFieldSpec != null) {
- dateTimeFormatSpec = new DateTimeFormatSpec(dateTimeFieldSpec.getFormat());
+ dateTimeFormatSpec = dateTimeFieldSpec.getFormatSpec();
}
}
return new NormalizedDateSegmentNameGenerator(rawTableName, batchConfig.getSegmentNamePrefix(),
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SchemaUtils.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SchemaUtils.java
index 3b4e5183bd..337531c3e6 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SchemaUtils.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SchemaUtils.java
@@ -48,6 +48,7 @@ public class SchemaUtils {
// checker to ensure simple date format matches lexicographic ordering.
private static final Map<Character, Integer> DATETIME_PATTERN_ORDERING = new HashMap<>();
+
static {
char[] patternOrdering = new char[]{'y', 'M', 'd', 'H', 'm', 's', 'S'};
for (int i = 0; i < patternOrdering.length; i++) {
@@ -120,10 +121,10 @@ public class SchemaUtils {
}
}
if (fieldSpec.getFieldType().equals(FieldSpec.FieldType.TIME)) {
- validateTimeFieldSpec(fieldSpec);
+ validateTimeFieldSpec((TimeFieldSpec) fieldSpec);
}
if (fieldSpec.getFieldType().equals(FieldSpec.FieldType.DATE_TIME)) {
- validateDateTimeFieldSpec(fieldSpec);
+ validateDateTimeFieldSpec((DateTimeFieldSpec) fieldSpec);
}
}
}
@@ -132,8 +133,8 @@ public class SchemaUtils {
transformedColumns.retainAll(argumentColumns));
if (schema.getPrimaryKeyColumns() != null) {
for (String primaryKeyColumn : schema.getPrimaryKeyColumns()) {
- Preconditions
- .checkState(primaryKeyColumnCandidates.contains(primaryKeyColumn), "The primary key column must exist");
+ Preconditions.checkState(primaryKeyColumnCandidates.contains(primaryKeyColumn),
+ "The primary key column must exist");
}
}
}
@@ -154,8 +155,7 @@ public class SchemaUtils {
/**
* Checks for valid incoming and outgoing granularity spec in the time field spec
*/
- private static void validateTimeFieldSpec(FieldSpec fieldSpec) {
- TimeFieldSpec timeFieldSpec = (TimeFieldSpec) fieldSpec;
+ private static void validateTimeFieldSpec(TimeFieldSpec timeFieldSpec) {
TimeGranularitySpec incomingGranularitySpec = timeFieldSpec.getIncomingGranularitySpec();
TimeGranularitySpec outgoingGranularitySpec = timeFieldSpec.getOutgoingGranularitySpec();
@@ -167,28 +167,21 @@ public class SchemaUtils {
Preconditions.checkState(
incomingGranularitySpec.getTimeFormat().equals(TimeGranularitySpec.TimeFormat.EPOCH.toString())
&& outgoingGranularitySpec.getTimeFormat().equals(TimeGranularitySpec.TimeFormat.EPOCH.toString()),
- "Cannot perform time conversion for time format other than EPOCH. TimeFieldSpec: %s", fieldSpec);
+ "Cannot perform time conversion for time format other than EPOCH. TimeFieldSpec: %s", timeFieldSpec);
}
}
/**
* Checks for valid format and granularity string in dateTimeFieldSpec
*/
- private static void validateDateTimeFieldSpec(FieldSpec fieldSpec) {
- DateTimeFieldSpec dateTimeFieldSpec = (DateTimeFieldSpec) fieldSpec;
- validateDateTimeFormat(dateTimeFieldSpec.getFormat());
- DateTimeGranularitySpec.validateGranularity(dateTimeFieldSpec.getGranularity());
- }
-
- private static void validateDateTimeFormat(String format) {
- DateTimeFormatSpec dateTimeFormatSpec;
+ private static void validateDateTimeFieldSpec(DateTimeFieldSpec dateTimeFieldSpec) {
+ DateTimeFormatSpec formatSpec;
try {
- dateTimeFormatSpec = new DateTimeFormatSpec(format);
+ formatSpec = dateTimeFieldSpec.getFormatSpec();
} catch (Exception e) {
- throw new IllegalStateException(String.format("invalid datetime format: %s", format), e);
+ throw new IllegalArgumentException("Invalid format: " + dateTimeFieldSpec.getFormat(), e);
}
- // validate the format is correct.
- String sdfPattern = dateTimeFormatSpec.getSDFPattern();
+ String sdfPattern = formatSpec.getSDFPattern();
if (sdfPattern != null) {
// must be in "yyyy MM dd HH mm ss SSS" to make sure it is sorted by both lexicographical and datetime order.
int[] maxIndexes = new int[]{-1, -1, -1, -1, -1, -1, -1, -1};
@@ -198,11 +191,19 @@ public class SchemaUtils {
}
// last index doesn't need to be checked.
for (int idx = 0; idx < maxIndexes.length - 2; idx++) {
- Preconditions.checkState(maxIndexes[idx] <= maxIndexes[idx + 1] || maxIndexes[idx + 1] == -1,
+ Preconditions.checkArgument(maxIndexes[idx] <= maxIndexes[idx + 1] || maxIndexes[idx + 1] == -1,
String.format("SIMPLE_DATE_FORMAT pattern %s has to be sorted by both lexicographical and datetime order",
sdfPattern));
maxIndexes[idx + 1] = Math.max(maxIndexes[idx + 1], maxIndexes[idx]);
}
}
+
+ DateTimeGranularitySpec granularitySpec;
+ try {
+ granularitySpec = dateTimeFieldSpec.getGranularitySpec();
+ } catch (Exception e) {
+ throw new IllegalArgumentException("Invalid granularity: " + dateTimeFieldSpec.getGranularity(), e);
+ }
+ Preconditions.checkNotNull(granularitySpec);
}
}
diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/readers/PinotSegmentUtil.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/readers/PinotSegmentUtil.java
index ab73f25e00..cf61964f7c 100644
--- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/readers/PinotSegmentUtil.java
+++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/readers/PinotSegmentUtil.java
@@ -33,7 +33,6 @@ import org.apache.pinot.segment.local.segment.creator.impl.SegmentIndexCreationD
import org.apache.pinot.segment.spi.creator.SegmentGeneratorConfig;
import org.apache.pinot.spi.config.table.TableConfig;
import org.apache.pinot.spi.data.DateTimeFieldSpec;
-import org.apache.pinot.spi.data.DateTimeFormatSpec;
import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.data.FieldSpec.DataType;
import org.apache.pinot.spi.data.Schema;
@@ -111,8 +110,7 @@ public class PinotSegmentUtil {
TimeUnit unit = timeFieldSpec.getIncomingGranularitySpec().getTimeType();
return generateTimeValue(random, unit);
} else if (fieldSpec instanceof DateTimeFieldSpec) {
- DateTimeFieldSpec dateTimeFieldSpec = (DateTimeFieldSpec) fieldSpec;
- TimeUnit unit = new DateTimeFormatSpec(dateTimeFieldSpec.getFormat()).getColumnUnit();
+ TimeUnit unit = ((DateTimeFieldSpec) fieldSpec).getFormatSpec().getColumnUnit();
return generateTimeValue(random, unit);
} else {
DataType storedType = fieldSpec.getDataType().getStoredType();
diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java
index e28d566c86..d81e69e18d 100644
--- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java
+++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java
@@ -264,7 +264,7 @@ public class SegmentGeneratorConfig implements Serializable {
DateTimeFieldSpec dateTimeFieldSpec = schema.getSpecForTimeColumn(timeColumnName);
if (dateTimeFieldSpec != null) {
setTimeColumnName(dateTimeFieldSpec.getName());
- setDateTimeFormatSpec(new DateTimeFormatSpec(dateTimeFieldSpec.getFormat()));
+ setDateTimeFormatSpec(dateTimeFieldSpec.getFormatSpec());
}
}
}
diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/SegmentNameGeneratorFactory.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/SegmentNameGeneratorFactory.java
index 0bd4c3c6cc..9fd6c97f35 100644
--- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/SegmentNameGeneratorFactory.java
+++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/SegmentNameGeneratorFactory.java
@@ -63,7 +63,7 @@ public class SegmentNameGeneratorFactory {
DateTimeFieldSpec dateTimeFieldSpec = schema.getSpecForTimeColumn(timeColumnName);
Preconditions.checkNotNull(dateTimeFieldSpec,
"Schema does not contain the time column specified in the table config.");
- dateTimeFormatSpec = new DateTimeFormatSpec(dateTimeFieldSpec.getFormat());
+ dateTimeFormatSpec = dateTimeFieldSpec.getFormatSpec();
}
return new NormalizedDateSegmentNameGenerator(tableName, prefix, excludeSequenceId,
IngestionConfigUtils.getBatchSegmentIngestionType(tableConfig),
diff --git a/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/creator/name/NormalizedDateSegmentNameGeneratorTest.java b/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/creator/name/NormalizedDateSegmentNameGeneratorTest.java
index 3316929502..0fef3e6991 100644
--- a/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/creator/name/NormalizedDateSegmentNameGeneratorTest.java
+++ b/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/creator/name/NormalizedDateSegmentNameGeneratorTest.java
@@ -35,8 +35,6 @@ public class NormalizedDateSegmentNameGeneratorTest {
private static final String MALFORMED_SEGMENT_NAME_POSTFIX = "my\\postfix";
private static final String APPEND_PUSH_TYPE = "APPEND";
private static final String REFRESH_PUSH_TYPE = "REFRESH";
- private static final String EPOCH_TIME_FORMAT = "EPOCH";
- private static final String SIMPLE_DATE_TIME_FORMAT = "SIMPLE_DATE_FORMAT";
private static final String LONG_SIMPLE_DATE_FORMAT = "yyyyMMdd";
private static final String STRING_SIMPLE_DATE_FORMAT = "yyyy-MM-dd";
private static final String STRING_SLASH_DATE_FORMAT = "yyyy/MM/dd";
@@ -139,7 +137,7 @@ public class NormalizedDateSegmentNameGeneratorTest {
public void testAppend() {
SegmentNameGenerator segmentNameGenerator =
new NormalizedDateSegmentNameGenerator(TABLE_NAME, null, false, APPEND_PUSH_TYPE, DAILY_PUSH_FREQUENCY,
- new DateTimeFormatSpec(1, TimeUnit.DAYS.toString(), EPOCH_TIME_FORMAT), null);
+ DateTimeFormatSpec.forEpoch(TimeUnit.DAYS.name()), null);
assertEquals(segmentNameGenerator.toString(),
"NormalizedDateSegmentNameGenerator: segmentNamePrefix=myTable, appendPushType=true, outputSDF=yyyy-MM-dd, "
+ "inputTimeUnit=DAYS");
@@ -153,7 +151,7 @@ public class NormalizedDateSegmentNameGeneratorTest {
public void testAppendWithSegmentNamePrefix() {
SegmentNameGenerator segmentNameGenerator =
new NormalizedDateSegmentNameGenerator(TABLE_NAME, SEGMENT_NAME_PREFIX, false, APPEND_PUSH_TYPE,
- DAILY_PUSH_FREQUENCY, new DateTimeFormatSpec(1, TimeUnit.DAYS.toString(), EPOCH_TIME_FORMAT), null);
+ DAILY_PUSH_FREQUENCY, DateTimeFormatSpec.forEpoch(TimeUnit.DAYS.name()), null);
assertEquals(segmentNameGenerator.toString(),
"NormalizedDateSegmentNameGenerator: segmentNamePrefix=myTable_daily, appendPushType=true, "
+ "outputSDF=yyyy-MM-dd, inputTimeUnit=DAYS");
@@ -167,8 +165,7 @@ public class NormalizedDateSegmentNameGeneratorTest {
public void testAppendWithSegmentNamePrefixPostfix() {
SegmentNameGenerator segmentNameGenerator =
new NormalizedDateSegmentNameGenerator(TABLE_NAME, SEGMENT_NAME_PREFIX, false, APPEND_PUSH_TYPE,
- DAILY_PUSH_FREQUENCY, new DateTimeFormatSpec(1, TimeUnit.DAYS.toString(), EPOCH_TIME_FORMAT),
- SEGMENT_NAME_POSTFIX);
+ DAILY_PUSH_FREQUENCY, DateTimeFormatSpec.forEpoch(TimeUnit.DAYS.name()), SEGMENT_NAME_POSTFIX);
assertEquals(segmentNameGenerator.toString(),
"NormalizedDateSegmentNameGenerator: segmentNamePrefix=myTable_daily, segmentNamePostfix=myPostfix, "
+ "appendPushType=true, outputSDF=yyyy-MM-dd, inputTimeUnit=DAYS");
@@ -182,7 +179,7 @@ public class NormalizedDateSegmentNameGeneratorTest {
public void testHoursTimeType() {
SegmentNameGenerator segmentNameGenerator =
new NormalizedDateSegmentNameGenerator(TABLE_NAME, null, false, APPEND_PUSH_TYPE, DAILY_PUSH_FREQUENCY,
- new DateTimeFormatSpec(1, TimeUnit.HOURS.toString(), EPOCH_TIME_FORMAT), null);
+ DateTimeFormatSpec.forEpoch(TimeUnit.HOURS.name()), null);
assertEquals(segmentNameGenerator.toString(),
"NormalizedDateSegmentNameGenerator: segmentNamePrefix=myTable, appendPushType=true, outputSDF=yyyy-MM-dd, "
+ "inputTimeUnit=HOURS");
@@ -196,8 +193,7 @@ public class NormalizedDateSegmentNameGeneratorTest {
public void testLongSimpleDateFormat() {
SegmentNameGenerator segmentNameGenerator =
new NormalizedDateSegmentNameGenerator(TABLE_NAME, null, false, APPEND_PUSH_TYPE, DAILY_PUSH_FREQUENCY,
- new DateTimeFormatSpec(1, TimeUnit.DAYS.toString(), SIMPLE_DATE_TIME_FORMAT, LONG_SIMPLE_DATE_FORMAT),
- null);
+ DateTimeFormatSpec.forSimpleDateFormat(LONG_SIMPLE_DATE_FORMAT), null);
assertEquals(segmentNameGenerator.toString(),
"NormalizedDateSegmentNameGenerator: segmentNamePrefix=myTable, appendPushType=true, outputSDF=yyyy-MM-dd, "
+ "inputSDF=yyyyMMdd");
@@ -211,8 +207,7 @@ public class NormalizedDateSegmentNameGeneratorTest {
public void testStringSimpleDateFormat() {
SegmentNameGenerator segmentNameGenerator =
new NormalizedDateSegmentNameGenerator(TABLE_NAME, null, false, APPEND_PUSH_TYPE, DAILY_PUSH_FREQUENCY,
- new DateTimeFormatSpec(1, TimeUnit.DAYS.toString(), SIMPLE_DATE_TIME_FORMAT, STRING_SIMPLE_DATE_FORMAT),
- null);
+ DateTimeFormatSpec.forSimpleDateFormat(STRING_SIMPLE_DATE_FORMAT), null);
assertEquals(segmentNameGenerator.toString(),
"NormalizedDateSegmentNameGenerator: segmentNamePrefix=myTable, appendPushType=true, outputSDF=yyyy-MM-dd, "
+ "inputSDF=yyyy-MM-dd");
@@ -224,26 +219,24 @@ public class NormalizedDateSegmentNameGeneratorTest {
@Test
public void testMalFormedTableNameAndSegmentNamePrefixPostfix() {
+ DateTimeFormatSpec dateTimeFormatSpec = DateTimeFormatSpec.forSimpleDateFormat(STRING_SLASH_DATE_FORMAT);
try {
new NormalizedDateSegmentNameGenerator(MALFORMED_TABLE_NAME, null, false, APPEND_PUSH_TYPE, DAILY_PUSH_FREQUENCY,
- new DateTimeFormatSpec(1, TimeUnit.DAYS.toString(), SIMPLE_DATE_TIME_FORMAT, STRING_SLASH_DATE_FORMAT), null);
+ dateTimeFormatSpec, null);
Assert.fail();
} catch (IllegalArgumentException e) {
// Expected
}
try {
new NormalizedDateSegmentNameGenerator(TABLE_NAME, MALFORMED_SEGMENT_NAME_PREFIX, false, APPEND_PUSH_TYPE,
- DAILY_PUSH_FREQUENCY,
- new DateTimeFormatSpec(1, TimeUnit.DAYS.toString(), SIMPLE_DATE_TIME_FORMAT, STRING_SLASH_DATE_FORMAT), null);
+ DAILY_PUSH_FREQUENCY, dateTimeFormatSpec, null);
Assert.fail();
} catch (IllegalArgumentException e) {
// Expected
}
try {
new NormalizedDateSegmentNameGenerator(TABLE_NAME, SEGMENT_NAME_PREFIX, false, APPEND_PUSH_TYPE,
- DAILY_PUSH_FREQUENCY,
- new DateTimeFormatSpec(1, TimeUnit.DAYS.toString(), SIMPLE_DATE_TIME_FORMAT, STRING_SLASH_DATE_FORMAT),
- MALFORMED_SEGMENT_NAME_POSTFIX);
+ DAILY_PUSH_FREQUENCY, dateTimeFormatSpec, MALFORMED_SEGMENT_NAME_POSTFIX);
Assert.fail();
} catch (IllegalArgumentException e) {
// Expected
@@ -255,8 +248,7 @@ public class NormalizedDateSegmentNameGeneratorTest {
public void testMalFormedDateFormatAndTimeValue() {
SegmentNameGenerator segmentNameGenerator =
new NormalizedDateSegmentNameGenerator(TABLE_NAME, null, false, APPEND_PUSH_TYPE, DAILY_PUSH_FREQUENCY,
- new DateTimeFormatSpec(1, TimeUnit.DAYS.toString(), SIMPLE_DATE_TIME_FORMAT, STRING_SLASH_DATE_FORMAT),
- null);
+ DateTimeFormatSpec.forSimpleDateFormat(STRING_SLASH_DATE_FORMAT), null);
assertEquals(segmentNameGenerator.toString(), "NormalizedDateSegmentNameGenerator: segmentNamePrefix=myTable, "
+ "appendPushType=true, outputSDF=yyyy-MM-dd, inputSDF=yyyy/MM/dd");
assertEquals(segmentNameGenerator.generateSegmentName(INVALID_SEQUENCE_ID, "1970/01/02", "1970/01/04"),
@@ -270,7 +262,7 @@ public class NormalizedDateSegmentNameGeneratorTest {
public void testHourlyPushFrequency() {
SegmentNameGenerator segmentNameGenerator =
new NormalizedDateSegmentNameGenerator(TABLE_NAME, null, false, APPEND_PUSH_TYPE, HOURLY_PUSH_FREQUENCY,
- new DateTimeFormatSpec(1, TimeUnit.DAYS.toString(), EPOCH_TIME_FORMAT), null);
+ DateTimeFormatSpec.forEpoch(TimeUnit.DAYS.name()), null);
assertEquals(segmentNameGenerator.toString(),
"NormalizedDateSegmentNameGenerator: segmentNamePrefix=myTable, appendPushType=true, outputSDF=yyyy-MM-dd-HH,"
+ " inputTimeUnit=DAYS");
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFieldSpec.java b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFieldSpec.java
index 400cd7d433..ddb04f70ca 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFieldSpec.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFieldSpec.java
@@ -31,6 +31,8 @@ import org.apache.pinot.spi.utils.EqualityUtils;
public final class DateTimeFieldSpec extends FieldSpec {
private String _format;
private String _granularity;
+ private transient DateTimeFormatSpec _formatSpec;
+ private transient DateTimeGranularitySpec _granularitySpec;
public enum TimeFormat {
EPOCH, TIMESTAMP, SIMPLE_DATE_FORMAT
@@ -70,17 +72,11 @@ public final class DateTimeFieldSpec extends FieldSpec {
*/
public DateTimeFieldSpec(String name, DataType dataType, String format, String granularity) {
super(name, dataType, true);
- Preconditions.checkNotNull(name);
- Preconditions.checkNotNull(dataType);
- if (Character.isDigit(format.charAt(0))) {
- DateTimeFormatSpec.validateFormat(format);
- } else {
- DateTimeFormatSpec.validatePipeFormat(format);
- }
- DateTimeGranularitySpec.validateGranularity(granularity);
_format = format;
_granularity = granularity;
+ _formatSpec = new DateTimeFormatSpec(format);
+ _granularitySpec = new DateTimeGranularitySpec(granularity);
}
/**
@@ -116,6 +112,16 @@ public final class DateTimeFieldSpec extends FieldSpec {
_format = format;
}
+ @JsonIgnore
+ public DateTimeFormatSpec getFormatSpec() {
+ DateTimeFormatSpec formatSpec = _formatSpec;
+ if (formatSpec == null) {
+ formatSpec = new DateTimeFormatSpec(_format);
+ _formatSpec = formatSpec;
+ }
+ return formatSpec;
+ }
+
public String getGranularity() {
return _granularity;
}
@@ -125,6 +131,16 @@ public final class DateTimeFieldSpec extends FieldSpec {
_granularity = granularity;
}
+ @JsonIgnore
+ public DateTimeGranularitySpec getGranularitySpec() {
+ DateTimeGranularitySpec granularitySpec = _granularitySpec;
+ if (granularitySpec == null) {
+ granularitySpec = new DateTimeGranularitySpec(_granularity);
+ _granularitySpec = granularitySpec;
+ }
+ return granularitySpec;
+ }
+
@Override
public ObjectNode toJsonObject() {
ObjectNode jsonObject = super.toJsonObject();
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatPatternSpec.java b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatPatternSpec.java
index f237270e2a..a6d607d151 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatPatternSpec.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatPatternSpec.java
@@ -20,66 +20,95 @@ package org.apache.pinot.spi.data;
import com.google.common.base.Preconditions;
import java.util.Locale;
+import java.util.Objects;
import java.util.TimeZone;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
-import org.apache.pinot.spi.utils.EqualityUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.pinot.spi.data.DateTimeFieldSpec.TimeFormat;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
public class DateTimeFormatPatternSpec {
+ public static final DateTimeZone DEFAULT_DATE_TIME_ZONE = DateTimeZone.UTC;
+ public static final Locale DEFAULT_LOCALE = Locale.ENGLISH;
+
+ public static final DateTimeFormatPatternSpec EPOCH = new DateTimeFormatPatternSpec(TimeFormat.EPOCH);
+ public static final DateTimeFormatPatternSpec TIMESTAMP = new DateTimeFormatPatternSpec(TimeFormat.TIMESTAMP);
/** eg: yyyyMMdd tz(CST) or yyyyMMdd HH tz(GMT+0700) or yyyyMMddHH tz(America/Chicago) **/
private static final Pattern SDF_PATTERN_WITH_TIMEZONE = Pattern.compile("^(.+)( tz[ ]*\\((.+)\\))[ ]*");
private static final int SDF_PATTERN_GROUP = 1;
- private static final int TIMEZONE_GROUP = 3;
- public static final DateTimeZone DEFAULT_DATETIMEZONE = DateTimeZone.UTC;
- public static final Locale DEFAULT_LOCALE = Locale.ENGLISH;
+ private static final int TIME_ZONE_GROUP = 3;
- private final DateTimeFieldSpec.TimeFormat _timeFormat;
- private String _sdfPattern = null;
- private DateTimeZone _dateTimeZone = DEFAULT_DATETIMEZONE;
- private transient DateTimeFormatter _dateTimeFormatter;
+ private final TimeFormat _timeFormat;
+ private final String _sdfPattern;
+ private final DateTimeZone _dateTimeZone;
+ private transient final DateTimeFormatter _dateTimeFormatter;
- public DateTimeFormatPatternSpec(String timeFormat) {
+ public DateTimeFormatPatternSpec(TimeFormat timeFormat) {
this(timeFormat, null);
}
- public DateTimeFormatPatternSpec(String timeFormat, @Nullable String sdfPatternWithTz) {
- _timeFormat = DateTimeFieldSpec.TimeFormat.valueOf(timeFormat);
- if (_timeFormat.equals(DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT)) {
- Preconditions.checkNotNull(sdfPatternWithTz, String.format(
- "Must provide simple date format pattern with time format type: %s", timeFormat));
+ public DateTimeFormatPatternSpec(TimeFormat timeFormat, @Nullable String sdfPatternWithTz) {
+ _timeFormat = timeFormat;
+ if (timeFormat == TimeFormat.SIMPLE_DATE_FORMAT) {
+ Preconditions.checkArgument(StringUtils.isNotEmpty(sdfPatternWithTz), "Must provide SIMPLE_DATE_FORMAT pattern");
Matcher m = SDF_PATTERN_WITH_TIMEZONE.matcher(sdfPatternWithTz);
if (m.find()) {
_sdfPattern = m.group(SDF_PATTERN_GROUP).trim();
- String timezoneString = m.group(TIMEZONE_GROUP).trim();
- _dateTimeZone = DateTimeZone.forTimeZone(TimeZone.getTimeZone(timezoneString));
+ String timeZone = m.group(TIME_ZONE_GROUP).trim();
+ try {
+ _dateTimeZone = DateTimeZone.forTimeZone(TimeZone.getTimeZone(timeZone));
+ } catch (Exception e) {
+ throw new IllegalArgumentException("Invalid time zone: " + timeZone);
+ }
} else {
_sdfPattern = sdfPatternWithTz;
+ _dateTimeZone = DEFAULT_DATE_TIME_ZONE;
+ }
+ try {
+ _dateTimeFormatter = DateTimeFormat.forPattern(_sdfPattern).withZone(_dateTimeZone).withLocale(DEFAULT_LOCALE);
+ } catch (Exception e) {
+ throw new IllegalArgumentException("Invalid SIMPLE_DATE_FORMAT pattern: " + _sdfPattern);
}
- _dateTimeFormatter = DateTimeFormat.forPattern(_sdfPattern).withZone(_dateTimeZone).withLocale(DEFAULT_LOCALE);
+ } else {
+ _sdfPattern = null;
+ _dateTimeZone = DEFAULT_DATE_TIME_ZONE;
+ _dateTimeFormatter = null;
}
}
- public DateTimeFormatPatternSpec(DateTimeFieldSpec.TimeFormat timeFormat, @Nullable String sdfPattern,
- @Nullable String timeZone) {
+ public DateTimeFormatPatternSpec(TimeFormat timeFormat, @Nullable String sdfPattern, @Nullable String timeZone) {
_timeFormat = timeFormat;
- if (_timeFormat.equals(DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT)) {
+ if (_timeFormat == TimeFormat.SIMPLE_DATE_FORMAT) {
+ Preconditions.checkArgument(StringUtils.isNotEmpty(sdfPattern), "Must provide SIMPLE_DATE_FORMAT pattern");
+ _sdfPattern = sdfPattern;
if (timeZone != null) {
- _dateTimeZone = DateTimeZone.forTimeZone(TimeZone.getTimeZone(timeZone));
+ try {
+ _dateTimeZone = DateTimeZone.forTimeZone(TimeZone.getTimeZone(timeZone));
+ } catch (Exception e) {
+ throw new IllegalArgumentException("Invalid time zone: " + timeZone);
+ }
+ } else {
+ _dateTimeZone = DEFAULT_DATE_TIME_ZONE;
}
- _dateTimeFormatter = DateTimeFormat.forPattern(sdfPattern).
- withZone(_dateTimeZone).
- withLocale(DEFAULT_LOCALE);
- _sdfPattern = sdfPattern;
+ try {
+ _dateTimeFormatter = DateTimeFormat.forPattern(_sdfPattern).withZone(_dateTimeZone).withLocale(DEFAULT_LOCALE);
+ } catch (Exception e) {
+ throw new IllegalArgumentException("Invalid SIMPLE_DATE_FORMAT pattern: " + _sdfPattern);
+ }
+ } else {
+ _sdfPattern = null;
+ _dateTimeZone = DEFAULT_DATE_TIME_ZONE;
+ _dateTimeFormatter = null;
}
}
- public DateTimeFieldSpec.TimeFormat getTimeFormat() {
+ public TimeFormat getTimeFormat() {
return _timeFormat;
}
@@ -95,54 +124,27 @@ public class DateTimeFormatPatternSpec {
return _dateTimeFormatter;
}
- /**
- * Validates the sdf pattern
- */
- public static void validateFormat(String sdfPatternWithTz) {
- try {
- String sdfPattern;
- Matcher m = SDF_PATTERN_WITH_TIMEZONE.matcher(sdfPatternWithTz);
- if (m.find()) {
- sdfPattern = m.group(SDF_PATTERN_GROUP).trim();
- String timezoneString = m.group(TIMEZONE_GROUP).trim();
- DateTimeZone dateTimeZone = DateTimeZone.forTimeZone(TimeZone.getTimeZone(timezoneString));
- DateTimeFormat.forPattern(sdfPattern).withZone(dateTimeZone);
- } else {
- sdfPattern = sdfPatternWithTz;
- DateTimeFormat.forPattern(sdfPattern);
- }
- } catch (Exception e) {
- throw new IllegalStateException("Unsupported simple date format pattern or time zone: " + sdfPatternWithTz);
- }
- }
-
@Override
public boolean equals(Object o) {
- if (EqualityUtils.isSameReference(this, o)) {
+ if (this == o) {
return true;
}
-
- if (EqualityUtils.isNullOrNotSameClass(this, o)) {
+ if (o == null || getClass() != o.getClass()) {
return false;
}
-
DateTimeFormatPatternSpec that = (DateTimeFormatPatternSpec) o;
-
- return EqualityUtils.isEqual(_timeFormat, that._timeFormat) && EqualityUtils.isEqual(_sdfPattern, that._sdfPattern)
- && EqualityUtils.isEqual(_dateTimeZone, that._dateTimeZone);
+ return _timeFormat == that._timeFormat && Objects.equals(_sdfPattern, that._sdfPattern) && _dateTimeZone.equals(
+ that._dateTimeZone);
}
@Override
public int hashCode() {
- int result = EqualityUtils.hashCodeOf(_timeFormat);
- result = EqualityUtils.hashCodeOf(result, _sdfPattern);
- result = EqualityUtils.hashCodeOf(result, _dateTimeZone);
- return result;
+ return Objects.hash(_timeFormat, _sdfPattern, _dateTimeZone);
}
@Override
public String toString() {
return "DateTimeFormatPatternSpec{" + "_timeFormat=" + _timeFormat + ", _sdfPattern='" + _sdfPattern + '\''
- + ", _dateTimeZone=" + _dateTimeZone + ", _dateTimeFormatter=" + _dateTimeFormatter + '}';
+ + ", _dateTimeZone=" + _dateTimeZone + '}';
}
}
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java
index 0a1de16214..c99c8a370e 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatSpec.java
@@ -18,13 +18,14 @@
*/
package org.apache.pinot.spi.data;
-import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import java.sql.Timestamp;
+import java.util.Objects;
import java.util.concurrent.TimeUnit;
+import javax.annotation.Nullable;
import org.apache.commons.lang3.StringUtils;
import org.apache.pinot.spi.data.DateTimeFieldSpec.TimeFormat;
-import org.apache.pinot.spi.utils.EqualityUtils;
+import org.apache.pinot.spi.utils.StringUtil;
import org.apache.pinot.spi.utils.TimestampUtils;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormatter;
@@ -35,113 +36,201 @@ import org.joda.time.format.DateTimeFormatter;
*/
public class DateTimeFormatSpec {
- public static final String NUMBER_REGEX = "[1-9][0-9]*";
- public static final String COLON_SEPARATOR = ":";
- public static final String PIPE_SEPARATOR = "|";
-
- /* DateTimeFieldSpec format is of format size:timeUnit:timeformat:pattern tz(timezone)
- * tz(timezone) is optional. If not specified, UTC timezone is used */
- public static final int FORMAT_SIZE_POSITION = 0;
- public static final int FORMAT_UNIT_POSITION = 1;
- public static final int FORMAT_TIMEFORMAT_POSITION = 2;
- public static final int FORMAT_PATTERN_POSITION = 3;
- public static final int MIN_FORMAT_TOKENS = 3;
- public static final int MAX_FORMAT_TOKENS = 4;
-
- public static final int FORMAT_TIMEFORMAT_POSITION_PIPE = 0;
- public static final int MIN_FORMAT_TOKENS_PIPE = 1;
- public static final int MAX_FORMAT_TOKENS_PIPE = 3;
-
- // Applicable for SIMPLE_DATE_FORMAT|<timeFormat>(|<timezone>)
- public static final int SDF_PATTERN_POSITION = 1;
- public static final int SDF_TIMEZONE_POSITION = 2;
-
- // Applicable for EPOCH|<timeUnit>(|<size>)
- public static final int EPOCH_UNIT_POSITION = 1;
- public static final int EPOCH_SIZE_POSITION = 2;
-
- private final String _format;
+ // Colon format: 'size:timeUnit:timeFormat:pattern tz(timeZone)'
+ // 'pattern' applies to the 'SIMPLE_DATE_FORMAT' time format
+ // 'tz(timeZone)' is optional in the 'pattern'. If not specified, UTC timezone is used.
+ private static final char COLON_SEPARATOR = ':';
+ private static final int COLON_FORMAT_SIZE_POSITION = 0;
+ private static final int COLON_FORMAT_TIME_UNIT_POSITION = 1;
+ private static final int COLON_FORMAT_TIME_FORMAT_POSITION = 2;
+ private static final int COLON_FORMAT_PATTERN_POSITION = 3;
+ private static final int COLON_FORMAT_MIN_TOKENS = 3;
+ private static final int COLON_FORMAT_MAX_TOKENS = 4;
+
+ // Pipe format:
+ // - EPOCH|timeUnit(|size)
+ // - SIMPLE_DATE_FORMAT|pattern(|timeZone)
+ // - TIMESTAMP
+ private static final char PIPE_SEPARATOR = '|';
+ private static final int PIPE_FORMAT_TIME_FORMAT_POSITION = 0;
+ private static final int PIPE_FORMAT_TIME_UNIT_POSITION = 1;
+ private static final int PIPE_FORMAT_SIZE_POSITION = 2;
+ private static final int PIPE_FORMAT_PATTERN_POSITION = 1;
+ private static final int PIPE_FORMAT_TIME_ZONE_POSITION = 2;
+ private static final int PIPE_FORMAT_MIN_TOKENS = 1;
+ private static final int PIPE_FORMAT_MAX_TOKENS = 3;
+
+ private static final DateTimeFormatSpec TIMESTAMP =
+ new DateTimeFormatSpec(1, DateTimeFormatUnitSpec.MILLISECONDS, DateTimeFormatPatternSpec.TIMESTAMP);
+
+ // For EPOCH
private final int _size;
private final DateTimeFormatUnitSpec _unitSpec;
+ // For SIMPLE_DATE_FORMAT
private final DateTimeFormatPatternSpec _patternSpec;
public DateTimeFormatSpec(String format) {
- _format = format;
- if (Character.isDigit(_format.charAt(0))) {
- String[] formatTokens = validateFormat(format);
- if (formatTokens.length == MAX_FORMAT_TOKENS) {
- _patternSpec = new DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION],
- formatTokens[FORMAT_PATTERN_POSITION]);
- } else {
- _patternSpec = new DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION]);
+ Preconditions.checkArgument(StringUtils.isNotEmpty(format), "Must provide format");
+
+ if (Character.isDigit(format.charAt(0))) {
+ // Colon format
+
+ String[] tokens = StringUtil.split(format, COLON_SEPARATOR, COLON_FORMAT_MAX_TOKENS);
+ Preconditions.checkArgument(tokens.length >= COLON_FORMAT_MIN_TOKENS && tokens.length <= COLON_FORMAT_MAX_TOKENS,
+ "Invalid format: %s, must be of format 'size:timeUnit:timeFormat(:patternWithTz)'", format);
+
+ TimeFormat timeFormat;
+ try {
+ timeFormat = TimeFormat.valueOf(tokens[COLON_FORMAT_TIME_FORMAT_POSITION]);
+ } catch (Exception e) {
+ throw new IllegalArgumentException(
+ String.format("Invalid time format: %s in format: %s", tokens[COLON_FORMAT_TIME_FORMAT_POSITION], format));
}
- if (_patternSpec.getTimeFormat() == TimeFormat.TIMESTAMP) {
- // TIMESTAMP type stores millis since epoch
- _size = 1;
- _unitSpec = new DateTimeFormatUnitSpec("MILLISECONDS");
- } else {
- _size = Integer.parseInt(formatTokens[FORMAT_SIZE_POSITION]);
- _unitSpec = new DateTimeFormatUnitSpec(formatTokens[FORMAT_UNIT_POSITION]);
+
+ switch (timeFormat) {
+ case EPOCH:
+ String sizeStr = tokens[COLON_FORMAT_SIZE_POSITION];
+ try {
+ _size = Integer.parseInt(sizeStr);
+ } catch (Exception e) {
+ throw new IllegalArgumentException(String.format("Invalid size: %s in format: %s", sizeStr, format));
+ }
+ Preconditions.checkArgument(_size > 0, "Invalid size: %s in format: %s, must be positive", _size, format);
+ String timeUnitStr = tokens[COLON_FORMAT_TIME_UNIT_POSITION];
+ try {
+ _unitSpec = new DateTimeFormatUnitSpec(timeUnitStr);
+ } catch (Exception e) {
+ throw new IllegalArgumentException(
+ String.format("Invalid time unit: %s in format: %s", timeUnitStr, format));
+ }
+ _patternSpec = DateTimeFormatPatternSpec.EPOCH;
+ break;
+ case TIMESTAMP:
+ _size = 1;
+ _unitSpec = DateTimeFormatUnitSpec.MILLISECONDS;
+ _patternSpec = DateTimeFormatPatternSpec.TIMESTAMP;
+ break;
+ case SIMPLE_DATE_FORMAT:
+ _size = 1;
+ _unitSpec = DateTimeFormatUnitSpec.MILLISECONDS;
+ Preconditions.checkArgument(tokens.length == COLON_FORMAT_MAX_TOKENS,
+ "Invalid SIMPLE_DATE_FORMAT format: %s, must be of format "
+ + "'<size>:<timeUnit>:SIMPLE_DATE_FORMAT:<patternWithTz>'", format);
+ String patternStr = tokens[COLON_FORMAT_PATTERN_POSITION];
+ try {
+ _patternSpec = new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, patternStr);
+ } catch (Exception e) {
+ throw new IllegalArgumentException(
+ String.format("Invalid SIMPLE_DATE_FORMAT pattern: %s in format: %s", patternStr, format));
+ }
+ break;
+ default:
+ throw new IllegalStateException("Unsupported time format: " + timeFormat);
}
} else {
- String[] formatTokens = validatePipeFormat(format);
- if (formatTokens[FORMAT_TIMEFORMAT_POSITION_PIPE].equals(TimeFormat.EPOCH.toString())) {
- _patternSpec = new DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION_PIPE]);
- _unitSpec = new DateTimeFormatUnitSpec(formatTokens[EPOCH_UNIT_POSITION]);
- if (formatTokens.length == MAX_FORMAT_TOKENS_PIPE) {
- _size = Integer.parseInt(formatTokens[EPOCH_SIZE_POSITION]);
- } else {
+ // Pipe format
+
+ String[] tokens = StringUtil.split(format, PIPE_SEPARATOR, PIPE_FORMAT_MAX_TOKENS);
+ Preconditions.checkArgument(tokens.length >= PIPE_FORMAT_MIN_TOKENS && tokens.length <= PIPE_FORMAT_MAX_TOKENS,
+ "Invalid format: %s, must be of format 'EPOCH|<timeUnit>(|<size>)' or "
+ + "'SIMPLE_DATE_FORMAT|<pattern>(|<timeZone>)' or 'TIMESTAMP'", format);
+
+ TimeFormat timeFormat;
+ try {
+ timeFormat = TimeFormat.valueOf(tokens[PIPE_FORMAT_TIME_FORMAT_POSITION]);
+ } catch (Exception e) {
+ throw new IllegalArgumentException(
+ String.format("Invalid time format: %s in format: %s", tokens[PIPE_FORMAT_TIME_FORMAT_POSITION], format));
+ }
+
+ switch (timeFormat) {
+ case EPOCH:
+ if (tokens.length > PIPE_FORMAT_SIZE_POSITION) {
+ try {
+ _size = Integer.parseInt(tokens[PIPE_FORMAT_SIZE_POSITION]);
+ } catch (Exception e) {
+ throw new IllegalArgumentException(
+ String.format("Invalid size: %s in format: %s", tokens[COLON_FORMAT_SIZE_POSITION], format));
+ }
+ Preconditions.checkArgument(_size > 0, "Invalid size: %s in format: %s, must be positive", _size, format);
+ } else {
+ _size = 1;
+ }
+ Preconditions.checkArgument(tokens.length > PIPE_FORMAT_TIME_UNIT_POSITION,
+ "Invalid EPOCH format: %s, must be of format 'EPOCH|<timeUnit>(|<size>)'", format);
+ try {
+ _unitSpec = new DateTimeFormatUnitSpec(tokens[PIPE_FORMAT_TIME_UNIT_POSITION]);
+ } catch (Exception e) {
+ throw new IllegalArgumentException(
+ String.format("Invalid time unit: %s in format: %s", tokens[PIPE_FORMAT_TIME_UNIT_POSITION], format));
+ }
+ _patternSpec = DateTimeFormatPatternSpec.EPOCH;
+ break;
+ case TIMESTAMP:
_size = 1;
- }
- } else if (formatTokens[FORMAT_TIMEFORMAT_POSITION_PIPE].equals(TimeFormat.SIMPLE_DATE_FORMAT.toString())) {
- if (formatTokens.length == MAX_FORMAT_TOKENS_PIPE) {
- _patternSpec = new DateTimeFormatPatternSpec(TimeFormat.valueOf(
- formatTokens[FORMAT_TIMEFORMAT_POSITION_PIPE]),
- formatTokens[SDF_PATTERN_POSITION],
- formatTokens[SDF_TIMEZONE_POSITION]);
- } else {
- _patternSpec = new DateTimeFormatPatternSpec(TimeFormat.valueOf(
- formatTokens[FORMAT_TIMEFORMAT_POSITION_PIPE]),
- formatTokens[SDF_PATTERN_POSITION], null);
- }
- _unitSpec = new DateTimeFormatUnitSpec(TimeUnit.DAYS.toString());
- _size = 1;
- } else {
- // Applicable for TIMESTAMP format
- _patternSpec = new DateTimeFormatPatternSpec(formatTokens[FORMAT_TIMEFORMAT_POSITION_PIPE]);
- _unitSpec = new DateTimeFormatUnitSpec(TimeUnit.MILLISECONDS.toString());
- _size = 1;
+ _unitSpec = DateTimeFormatUnitSpec.MILLISECONDS;
+ _patternSpec = DateTimeFormatPatternSpec.TIMESTAMP;
+ break;
+ case SIMPLE_DATE_FORMAT:
+ _size = 1;
+ _unitSpec = DateTimeFormatUnitSpec.MILLISECONDS;
+ Preconditions.checkArgument(tokens.length > PIPE_FORMAT_PATTERN_POSITION,
+ "Invalid SIMPLE_DATE_FORMAT format: %s, must be of format 'SIMPLE_DATE_FORMAT|<pattern>(|<timeZone>)'",
+ format);
+ if (tokens.length > PIPE_FORMAT_TIME_ZONE_POSITION) {
+ try {
+ _patternSpec =
+ new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, tokens[PIPE_FORMAT_PATTERN_POSITION],
+ tokens[PIPE_FORMAT_TIME_ZONE_POSITION]);
+ } catch (Exception e) {
+ throw new IllegalArgumentException(
+ String.format("Invalid SIMPLE_DATE_FORMAT pattern: %s, time zone: %s in format: %s",
+ tokens[PIPE_FORMAT_PATTERN_POSITION], tokens[PIPE_FORMAT_TIME_ZONE_POSITION], format));
+ }
+ } else {
+ try {
+ _patternSpec =
+ new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, tokens[PIPE_FORMAT_PATTERN_POSITION]);
+ } catch (Exception e) {
+ throw new IllegalArgumentException(String.format("Invalid SIMPLE_DATE_FORMAT pattern: %s in format: %s",
+ tokens[PIPE_FORMAT_PATTERN_POSITION], format));
+ }
+ }
+ break;
+ default:
+ throw new IllegalStateException("Unsupported time format: " + timeFormat);
}
}
}
- /**
- * Constructs a dateTimeSpec format, given the components of a format
- */
- public DateTimeFormatSpec(int columnSize, String columnUnit, String columnTimeFormat) {
- _format = Joiner.on(COLON_SEPARATOR).join(columnSize, columnUnit, columnTimeFormat);
- validateFormat(_format);
+ private DateTimeFormatSpec(int size, DateTimeFormatUnitSpec unitSpec, DateTimeFormatPatternSpec patternSpec) {
+ _size = size;
+ _unitSpec = unitSpec;
+ _patternSpec = patternSpec;
+ }
- _size = columnSize;
- _unitSpec = new DateTimeFormatUnitSpec(columnUnit);
- _patternSpec = new DateTimeFormatPatternSpec(columnTimeFormat);
+ public static DateTimeFormatSpec forTimestamp() {
+ return TIMESTAMP;
}
- /**
- * Constructs a dateTimeSpec format, given the components of a format
- * @param sdfPattern and tz
- */
- public DateTimeFormatSpec(int columnSize, String columnUnit, String columnTimeFormat, String sdfPattern) {
- _format = Joiner.on(COLON_SEPARATOR).join(columnSize, columnUnit, columnTimeFormat, sdfPattern);
- validateFormat(_format);
+ public static DateTimeFormatSpec forEpoch(String timeUnit) {
+ return forEpoch(1, timeUnit);
+ }
+
+ public static DateTimeFormatSpec forEpoch(int size, String timeUnit) {
+ Preconditions.checkArgument(size > 0, "Invalid size: {}, must be positive", size);
+ Preconditions.checkArgument(timeUnit != null, "Must provide time unit");
+ return new DateTimeFormatSpec(size, new DateTimeFormatUnitSpec(timeUnit), DateTimeFormatPatternSpec.EPOCH);
+ }
- _size = columnSize;
- _unitSpec = new DateTimeFormatUnitSpec(columnUnit);
- _patternSpec = new DateTimeFormatPatternSpec(columnTimeFormat, sdfPattern);
+ public static DateTimeFormatSpec forSimpleDateFormat(String patternWithTz) {
+ return new DateTimeFormatSpec(1, DateTimeFormatUnitSpec.MILLISECONDS,
+ new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, patternWithTz));
}
- public String getFormat() {
- return _format;
+ public static DateTimeFormatSpec forSimpleDateFormat(String pattern, @Nullable String timeZone) {
+ return new DateTimeFormatSpec(1, DateTimeFormatUnitSpec.MILLISECONDS,
+ new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, pattern, timeZone));
}
public int getColumnSize() {
@@ -217,91 +306,26 @@ public class DateTimeFormatSpec {
}
}
- /**
- * Validates the format string in the dateTimeFieldSpec
- */
- public static String[] validateFormat(String format) {
- Preconditions.checkNotNull(format, "Format string in dateTimeFieldSpec must not be null");
- String[] formatTokens = StringUtils.split(format, COLON_SEPARATOR, MAX_FORMAT_TOKENS);
- Preconditions.checkState(formatTokens.length >= MIN_FORMAT_TOKENS && formatTokens.length <= MAX_FORMAT_TOKENS,
- "Incorrect format: %s. Must be of format 'size:timeunit:timeformat(:pattern)'", format);
- Preconditions.checkState(formatTokens[FORMAT_SIZE_POSITION].matches(NUMBER_REGEX),
- "Incorrect format size: %s in format: %s. Must be of format '[0-9]+:<TimeUnit>:<TimeFormat>(:pattern)'",
- formatTokens[FORMAT_SIZE_POSITION], format);
-
- DateTimeFormatUnitSpec.validateUnitSpec(formatTokens[FORMAT_UNIT_POSITION]);
-
- if (formatTokens.length == MIN_FORMAT_TOKENS) {
- Preconditions.checkState(formatTokens[FORMAT_TIMEFORMAT_POSITION].equals(TimeFormat.EPOCH.toString())
- || formatTokens[FORMAT_TIMEFORMAT_POSITION].equals(TimeFormat.TIMESTAMP.toString()),
- "Incorrect format type: %s in format: %s. Must be of '[0-9]+:<TimeUnit>:EPOCH|TIMESTAMP'",
- formatTokens[FORMAT_TIMEFORMAT_POSITION], format);
- } else {
- Preconditions
- .checkState(formatTokens[FORMAT_TIMEFORMAT_POSITION].equals(TimeFormat.SIMPLE_DATE_FORMAT.toString()),
- "Incorrect format type: %s in format: %s. Must be of '[0-9]+:<TimeUnit>:SIMPLE_DATE_FORMAT:pattern'",
- formatTokens[FORMAT_TIMEFORMAT_POSITION], format);
- DateTimeFormatPatternSpec.validateFormat(formatTokens[FORMAT_PATTERN_POSITION]);
- }
- return formatTokens;
- }
-
- /**
- * Validates the pipe format string in the dateTimeFieldSpec
- */
- public static String[] validatePipeFormat(String format) {
- Preconditions.checkNotNull(format, "Format string in dateTimeFieldSpec must not be null");
- String[] formatTokens = StringUtils.split(format, PIPE_SEPARATOR, MAX_FORMAT_TOKENS_PIPE);
- Preconditions.checkState(formatTokens.length >= MIN_FORMAT_TOKENS_PIPE
- && formatTokens.length <= MAX_FORMAT_TOKENS_PIPE,
- "Incorrect format: %s. Must be of the format 'EPOCH|<timeUnit>(|<size>)'"
- + " or 'SDF|<timeFormat>(|<timezone>)' or 'TIMESTAMP'");
- if (formatTokens.length == MIN_FORMAT_TOKENS_PIPE) {
- Preconditions.checkState(formatTokens[FORMAT_TIMEFORMAT_POSITION_PIPE].equals(TimeFormat.TIMESTAMP.toString()),
- "Incorrect format type: %s. Must be of TIMESTAMP", formatTokens[FORMAT_TIMEFORMAT_POSITION_PIPE]);
- } else {
- Preconditions.checkState(formatTokens[FORMAT_SIZE_POSITION].equals(TimeFormat.EPOCH.toString())
- || formatTokens[FORMAT_SIZE_POSITION].equals(TimeFormat.SIMPLE_DATE_FORMAT.toString()),
- "Incorrect format %s. Must be of 'EPOCH|<timeUnit>(|<size>)' or" + "'SDF|<timeFormat>(|<timezone>)'");
-
- if (formatTokens.length == MAX_FORMAT_TOKENS_PIPE
- && formatTokens[FORMAT_SIZE_POSITION].equals(TimeFormat.EPOCH.toString())) {
- Preconditions.checkState(formatTokens[EPOCH_SIZE_POSITION].matches(NUMBER_REGEX),
- "Incorrect format size: %s in format: %s. Must be of format 'EPOCH|<timeUnit>|[0-9]+'",
- formatTokens[EPOCH_SIZE_POSITION], format);
- }
- }
- return formatTokens;
- }
-
@Override
public boolean equals(Object o) {
- if (EqualityUtils.isSameReference(this, o)) {
+ if (this == o) {
return true;
}
-
- if (EqualityUtils.isNullOrNotSameClass(this, o)) {
+ if (o == null || getClass() != o.getClass()) {
return false;
}
-
DateTimeFormatSpec that = (DateTimeFormatSpec) o;
-
- return EqualityUtils.isEqual(_size, that._size) && EqualityUtils.isEqual(_format, that._format) && EqualityUtils
- .isEqual(_unitSpec, that._unitSpec) && EqualityUtils.isEqual(_patternSpec, that._patternSpec);
+ return _size == that._size && _unitSpec.equals(that._unitSpec) && _patternSpec.equals(that._patternSpec);
}
@Override
public int hashCode() {
- int result = EqualityUtils.hashCodeOf(_format);
- result = EqualityUtils.hashCodeOf(result, _size);
- result = EqualityUtils.hashCodeOf(result, _unitSpec);
- result = EqualityUtils.hashCodeOf(result, _patternSpec);
- return result;
+ return Objects.hash(_size, _unitSpec, _patternSpec);
}
@Override
public String toString() {
- return "DateTimeFormatSpec{" + "_format='" + _format + '\'' + ", _size=" + _size + ", _unitSpec=" + _unitSpec
- + ", _patternSpec=" + _patternSpec + '}';
+ return "DateTimeFormatSpec{" + "_size=" + _size + ", _unitSpec=" + _unitSpec + ", _patternSpec=" + _patternSpec
+ + '}';
}
}
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatUnitSpec.java b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatUnitSpec.java
index 56c11e057f..9052f2abc7 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatUnitSpec.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeFormatUnitSpec.java
@@ -19,9 +19,9 @@
package org.apache.pinot.spi.data;
import com.google.common.base.Preconditions;
+import java.util.Objects;
import java.util.concurrent.TimeUnit;
import org.apache.commons.lang3.EnumUtils;
-import org.apache.pinot.spi.utils.EqualityUtils;
import org.joda.time.DurationFieldType;
import org.joda.time.chrono.ISOChronology;
@@ -98,17 +98,24 @@ public class DateTimeFormatUnitSpec {
public abstract long fromMillis(long millisSinceEpoch);
}
- private TimeUnit _timeUnit = null;
- private DateTimeTransformUnit _dateTimeTransformUnit = null;
+ public static final DateTimeFormatUnitSpec MILLISECONDS = new DateTimeFormatUnitSpec(TimeUnit.MILLISECONDS.name());
+
+ private final TimeUnit _timeUnit;
+ private final DateTimeTransformUnit _dateTimeTransformUnit;
public DateTimeFormatUnitSpec(String unit) {
- validateUnitSpec(unit);
if (EnumUtils.isValidEnum(TimeUnit.class, unit)) {
_timeUnit = TimeUnit.valueOf(unit);
+ } else {
+ _timeUnit = null;
}
if (EnumUtils.isValidEnum(DateTimeTransformUnit.class, unit)) {
_dateTimeTransformUnit = DateTimeTransformUnit.valueOf(unit);
+ } else {
+ _dateTimeTransformUnit = null;
}
+ Preconditions.checkArgument(_timeUnit != null || _dateTimeTransformUnit != null,
+ "Unit must belong to enum TimeUnit or DateTimeTransformUnit, got: %s", unit);
}
public TimeUnit getTimeUnit() {
@@ -119,32 +126,20 @@ public class DateTimeFormatUnitSpec {
return _dateTimeTransformUnit;
}
- public static void validateUnitSpec(String unit) {
- Preconditions.checkState(
- EnumUtils.isValidEnum(TimeUnit.class, unit) || EnumUtils.isValidEnum(DateTimeTransformUnit.class, unit),
- "Unit: %s must belong to enum TimeUnit or DateTimeTransformUnit", unit);
- }
-
@Override
public boolean equals(Object o) {
- if (EqualityUtils.isSameReference(this, o)) {
+ if (this == o) {
return true;
}
-
- if (EqualityUtils.isNullOrNotSameClass(this, o)) {
+ if (o == null || getClass() != o.getClass()) {
return false;
}
-
DateTimeFormatUnitSpec that = (DateTimeFormatUnitSpec) o;
-
- return EqualityUtils.isEqual(_timeUnit, that._timeUnit) && EqualityUtils
- .isEqual(_dateTimeTransformUnit, that._dateTimeTransformUnit);
+ return _timeUnit == that._timeUnit && _dateTimeTransformUnit == that._dateTimeTransformUnit;
}
@Override
public int hashCode() {
- int result = EqualityUtils.hashCodeOf(_timeUnit);
- result = EqualityUtils.hashCodeOf(result, _dateTimeTransformUnit);
- return result;
+ return Objects.hash(_timeUnit, _dateTimeTransformUnit);
}
}
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeGranularitySpec.java b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeGranularitySpec.java
index a4077d3a26..c8cd10eb15 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeGranularitySpec.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DateTimeGranularitySpec.java
@@ -18,28 +18,23 @@
*/
package org.apache.pinot.spi.data;
-import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
+import java.util.Objects;
import java.util.concurrent.TimeUnit;
-import org.apache.commons.lang3.EnumUtils;
-import org.apache.pinot.spi.utils.EqualityUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.pinot.spi.utils.StringUtil;
/**
* Class to represent granularity from {@link DateTimeFieldSpec}
*/
public class DateTimeGranularitySpec {
+ // 'size:timeUnit'
+ private static final char SEPARATOR = ':';
+ private static final int SIZE_POSITION = 0;
+ private static final int TIME_UNIT_POSITION = 1;
+ private static final int NUM_TOKENS = 2;
- public static final String NUMBER_REGEX = "[1-9][0-9]*";
-
- public static final String COLON_SEPARATOR = ":";
-
- /* DateTimeFieldSpec granularity is of format size:timeUnit */
- public static final int GRANULARITY_SIZE_POSITION = 0;
- public static final int GRANULARITY_UNIT_POSITION = 1;
- public static final int MAX_GRANULARITY_TOKENS = 2;
-
- private final String _granularity;
private final int _size;
private final TimeUnit _timeUnit;
@@ -47,25 +42,34 @@ public class DateTimeGranularitySpec {
* Constructs a dateTimeGranularitySpec granularity from a string
*/
public DateTimeGranularitySpec(String granularity) {
- validateGranularity(granularity);
- _granularity = granularity;
- String[] granularityTokens = _granularity.split(COLON_SEPARATOR);
- _size = Integer.parseInt(granularityTokens[GRANULARITY_SIZE_POSITION]);
- _timeUnit = TimeUnit.valueOf(granularityTokens[GRANULARITY_UNIT_POSITION]);
+ Preconditions.checkArgument(StringUtils.isNotEmpty(granularity), "Must provide granularity");
+ String[] granularityTokens = StringUtil.split(granularity, SEPARATOR, 2);
+ Preconditions.checkArgument(granularityTokens.length >= NUM_TOKENS,
+ "Invalid granularity: %s, must be of format 'size:timeUnit", granularity);
+ try {
+ _size = Integer.parseInt(granularityTokens[SIZE_POSITION]);
+ } catch (Exception e) {
+ throw new IllegalArgumentException(
+ String.format("Invalid size: %s in granularity: %s", granularityTokens[SIZE_POSITION], granularity));
+ }
+ Preconditions.checkArgument(_size > 0, "Invalid size: %s in granularity: %s, must be positive", _size, granularity);
+ try {
+ _timeUnit = TimeUnit.valueOf(granularityTokens[TIME_UNIT_POSITION]);
+ } catch (Exception e) {
+ throw new IllegalArgumentException(
+ String.format("Invalid time unit: %s in granularity: %s", granularityTokens[TIME_UNIT_POSITION],
+ granularity));
+ }
}
/**
* Constructs a dateTimeGranularitySpec granularity given the components of a granularity
*/
- public DateTimeGranularitySpec(int columnSize, TimeUnit columnUnit) {
- _granularity = Joiner.on(COLON_SEPARATOR).join(columnSize, columnUnit);
- validateGranularity(_granularity);
- _size = columnSize;
- _timeUnit = columnUnit;
- }
-
- public String getGranularity() {
- return _granularity;
+ public DateTimeGranularitySpec(int size, TimeUnit timeUnit) {
+ Preconditions.checkArgument(size > 0, "Invalid size: %s, must be positive", size);
+ Preconditions.checkArgument(timeUnit != null, "Must provide time unit");
+ _size = size;
+ _timeUnit = timeUnit;
}
public int getSize() {
@@ -77,55 +81,31 @@ public class DateTimeGranularitySpec {
}
/**
+ * Converts a granularity to millis.
* <ul>
- * <li>Convert a granularity to millis.
- * This method should not do validation of outputGranularity.
- * The validation should be handled by caller using {@link #validateGranularity}</li>
- * <ul>
- * <li>1) granularityToMillis(1:HOURS) = 3600000 (60*60*1000)</li>
- * <li>2) granularityToMillis(1:MILLISECONDS) = 1</li>
- * <li>3) granularityToMillis(15:MINUTES) = 900000 (15*60*1000)</li>
- * </ul>
+ * <li>1) granularityToMillis(1:HOURS) = 3600000 (60*60*1000)</li>
+ * <li>2) granularityToMillis(1:MILLISECONDS) = 1</li>
+ * <li>3) granularityToMillis(15:MINUTES) = 900000 (15*60*1000)</li>
* </ul>
*/
public long granularityToMillis() {
return TimeUnit.MILLISECONDS.convert(_size, _timeUnit);
}
- /**
- * Check correctness of granularity of {@link DateTimeFieldSpec}
- */
- public static void validateGranularity(String granularity) {
- Preconditions.checkNotNull(granularity, "Granularity string in dateTimeFieldSpec must not be null");
-
- String[] granularityTokens = granularity.split(COLON_SEPARATOR);
- Preconditions.checkState(granularityTokens.length == MAX_GRANULARITY_TOKENS,
- "Incorrect granularity: %s. Must be of format 'size:timeunit'", granularity);
- Preconditions.checkState(granularityTokens[GRANULARITY_SIZE_POSITION].matches(NUMBER_REGEX),
- "Incorrect granularity size: %s. Must be of format '[0-9]+:<TimeUnit>'",
- granularityTokens[GRANULARITY_SIZE_POSITION]);
- Preconditions.checkState(EnumUtils.isValidEnum(TimeUnit.class, granularityTokens[GRANULARITY_UNIT_POSITION]),
- "Incorrect granularity size: %s. Must be of format '[0-9]+:<TimeUnit>'",
- granularityTokens[GRANULARITY_SIZE_POSITION]);
- }
-
@Override
public boolean equals(Object o) {
- if (EqualityUtils.isSameReference(this, o)) {
+ if (this == o) {
return true;
}
-
- if (EqualityUtils.isNullOrNotSameClass(this, o)) {
+ if (o == null || getClass() != o.getClass()) {
return false;
}
-
DateTimeGranularitySpec that = (DateTimeGranularitySpec) o;
-
- return EqualityUtils.isEqual(_granularity, that._granularity);
+ return _size == that._size && _timeUnit == that._timeUnit;
}
@Override
public int hashCode() {
- return EqualityUtils.hashCodeOf(_granularity);
+ return Objects.hash(_size, _timeUnit);
}
}
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/Schema.java b/pinot-spi/src/main/java/org/apache/pinot/spi/data/Schema.java
index 0670f76e27..9c5a3cc42b 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/Schema.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/Schema.java
@@ -37,12 +37,12 @@ import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import javax.annotation.Nullable;
-import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.pinot.spi.data.FieldSpec.DataType;
import org.apache.pinot.spi.data.FieldSpec.FieldType;
import org.apache.pinot.spi.utils.EqualityUtils;
import org.apache.pinot.spi.utils.JsonUtils;
+import org.apache.pinot.spi.utils.StringUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -751,16 +751,26 @@ public final class Schema implements Serializable {
int outgoingTimeSize = outgoingGranularitySpec.getTimeUnitSize();
TimeUnit outgoingTimeUnit = outgoingGranularitySpec.getTimeType();
String outgoingTimeFormat = outgoingGranularitySpec.getTimeFormat();
- String[] split = StringUtils.split(outgoingTimeFormat, DateTimeFormatSpec.COLON_SEPARATOR, 2);
- DateTimeFormatSpec formatSpec;
- if (split[0].equals(DateTimeFieldSpec.TimeFormat.EPOCH.toString())) {
- formatSpec = new DateTimeFormatSpec(outgoingTimeSize, outgoingTimeUnit.toString(), split[0]);
+ String[] split = StringUtil.split(outgoingTimeFormat, ':', 2);
+ String timeFormat;
+ if (split[0].equals(DateTimeFieldSpec.TimeFormat.EPOCH.name())) {
+ timeFormat = outgoingTimeSize + ":" + outgoingTimeUnit.name() + ":EPOCH";
} else {
- formatSpec = new DateTimeFormatSpec(outgoingTimeSize, outgoingTimeUnit.toString(), split[0], split[1]);
- }
- dateTimeFieldSpec.setFormat(formatSpec.getFormat());
+ timeFormat = outgoingTimeSize + ":" + outgoingTimeUnit.name() + ":SIMPLE_DATE_FORMAT:" + split[1];
+ }
+ // TODO: Switch to new format after releasing 0.11.0
+// if (split[0].equals(DateTimeFieldSpec.TimeFormat.EPOCH.name())) {
+// timeFormat = "EPOCH|" + outgoingTimeUnit.name();
+// if (outgoingTimeSize != 1) {
+// timeFormat += "|" + outgoingTimeSize;
+// }
+// timeFormat = outgoingTimeSize + ":" + outgoingTimeUnit.name() + ":EPOCH";
+// } else {
+// timeFormat = "SIMPLE_DATE_FORMAT|" + split[1];
+// }
+ dateTimeFieldSpec.setFormat(timeFormat);
DateTimeGranularitySpec granularitySpec = new DateTimeGranularitySpec(outgoingTimeSize, outgoingTimeUnit);
- dateTimeFieldSpec.setGranularity(granularitySpec.getGranularity());
+ dateTimeFieldSpec.setGranularity(outgoingTimeSize + ":" + outgoingTimeUnit.name());
if (timeFieldSpec.getTransformFunction() != null) {
dateTimeFieldSpec.setTransformFunction(timeFieldSpec.getTransformFunction());
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java
index 13b638915d..d236bd76f0 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java
@@ -55,8 +55,6 @@ import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.pinot.spi.config.table.ingestion.ComplexTypeConfig;
import org.apache.pinot.spi.data.DateTimeFieldSpec;
-import org.apache.pinot.spi.data.DateTimeFormatSpec;
-import org.apache.pinot.spi.data.DateTimeGranularitySpec;
import org.apache.pinot.spi.data.DimensionFieldSpec;
import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.data.FieldSpec.DataType;
@@ -598,9 +596,11 @@ public class JsonUtils {
case DATE_TIME:
Preconditions.checkState(isSingleValueField, "Time field: %s cannot be multi-valued", name);
Preconditions.checkNotNull(timeUnit, "Time unit cannot be null");
- pinotSchema.addField(new DateTimeFieldSpec(name, dataType,
- new DateTimeFormatSpec(1, timeUnit.toString(), DateTimeFieldSpec.TimeFormat.EPOCH.toString()).getFormat(),
- new DateTimeGranularitySpec(1, timeUnit).getGranularity()));
+ // TODO: Switch to new format after releasing 0.11.0
+ // "EPOCH|" + timeUnit.name()
+ String format = "1:" + timeUnit.name() + ":EPOCH";
+ String granularity = "1:" + timeUnit.name();
+ pinotSchema.addField(new DateTimeFieldSpec(name, dataType, format, granularity));
break;
default:
throw new UnsupportedOperationException("Unsupported field type: " + fieldType + " for field: " + name);
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/StringUtil.java b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/StringUtil.java
index 6939a50b13..d7d2d25875 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/StringUtil.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/StringUtil.java
@@ -18,6 +18,9 @@
*/
package org.apache.pinot.spi.utils;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
@@ -35,6 +38,35 @@ public class StringUtil {
return StringUtils.join(keys, separator);
}
+ /**
+ * Splits the given string with the separator, returns an array with the given max length. When max <= 0, no limit is
+ * applied.
+ */
+ public static String[] split(String str, char separator, int max) {
+ int length = str.length();
+ if (length == 0) {
+ return ArrayUtils.EMPTY_STRING_ARRAY;
+ }
+ if (max == 1) {
+ return new String[]{str};
+ }
+ List<String> list = new ArrayList<>(max);
+ int start = 0;
+ int end = 0;
+ while (end < length) {
+ if (str.charAt(end) == separator) {
+ list.add(str.substring(start, end));
+ start = end + 1;
+ if (list.size() == max - 1) {
+ break;
+ }
+ }
+ end++;
+ }
+ list.add(str.substring(start, length));
+ return list.toArray(new String[0]);
+ }
+
/**
* Sanitizes a string value.
* <ul>
diff --git a/pinot-spi/src/test/java/org/apache/pinot/spi/data/DateTimeFormatPatternSpecTest.java b/pinot-spi/src/test/java/org/apache/pinot/spi/data/DateTimeFormatPatternSpecTest.java
index 3a730f2fc2..7d721c5290 100644
--- a/pinot-spi/src/test/java/org/apache/pinot/spi/data/DateTimeFormatPatternSpecTest.java
+++ b/pinot-spi/src/test/java/org/apache/pinot/spi/data/DateTimeFormatPatternSpecTest.java
@@ -18,26 +18,72 @@
*/
package org.apache.pinot.spi.data;
+import java.util.TimeZone;
+import org.apache.pinot.spi.data.DateTimeFieldSpec.TimeFormat;
+import org.joda.time.DateTimeZone;
import org.testng.annotations.Test;
+import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertThrows;
+
public class DateTimeFormatPatternSpecTest {
@Test
public void testValidateFormat() {
- DateTimeFormatPatternSpec.validateFormat("yyyy-MM-dd");
- DateTimeFormatPatternSpec.validateFormat("yyyyMMdd tz(CST)");
- DateTimeFormatPatternSpec.validateFormat("yyyyMMdd HH tz(GMT+0700)");
- DateTimeFormatPatternSpec.validateFormat("yyyyMMddHH tz(America/Chicago)");
-
- // Unknown tz is treated as UTC
- DateTimeFormatPatternSpec.validateFormat("yyyyMMdd tz(CSEMT)");
- DateTimeFormatPatternSpec.validateFormat("yyyyMMdd tz(GMT+5000)");
- DateTimeFormatPatternSpec.validateFormat("yyyyMMddHH tz(HAHA/Chicago)");
-
- // invalid chars will throw
- assertThrows(IllegalStateException.class, () -> DateTimeFormatPatternSpec.validateFormat("yyyc-MM-dd"));
- assertThrows(IllegalStateException.class, () -> DateTimeFormatPatternSpec.validateFormat("yyyy-MM-dd ff(a)"));
+ DateTimeFormatPatternSpec dateTimeFormatPatternSpec =
+ new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, "yyyy-MM-dd");
+ assertEquals(dateTimeFormatPatternSpec.getSdfPattern(), "yyyy-MM-dd");
+ assertEquals(dateTimeFormatPatternSpec.getDateTimeZone(), DateTimeZone.UTC);
+ assertEquals(dateTimeFormatPatternSpec,
+ new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, "yyyy-MM-dd", null));
+
+ dateTimeFormatPatternSpec = new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd tz(CST)");
+ assertEquals(dateTimeFormatPatternSpec.getSdfPattern(), "yyyyMMdd");
+ assertEquals(dateTimeFormatPatternSpec.getDateTimeZone(), DateTimeZone.forTimeZone(TimeZone.getTimeZone("CST")));
+ assertEquals(dateTimeFormatPatternSpec,
+ new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd", "CST"));
+
+ dateTimeFormatPatternSpec =
+ new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd HH tz(GMT+0700)");
+ assertEquals(dateTimeFormatPatternSpec.getSdfPattern(), "yyyyMMdd HH");
+ assertEquals(dateTimeFormatPatternSpec.getDateTimeZone(),
+ DateTimeZone.forTimeZone(TimeZone.getTimeZone("GMT+0700")));
+ assertEquals(dateTimeFormatPatternSpec,
+ new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd HH", "GMT+0700"));
+
+ dateTimeFormatPatternSpec =
+ new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMddHH tz(America/Chicago)");
+ assertEquals(dateTimeFormatPatternSpec.getSdfPattern(), "yyyyMMddHH");
+ assertEquals(dateTimeFormatPatternSpec.getDateTimeZone(),
+ DateTimeZone.forTimeZone(TimeZone.getTimeZone("America/Chicago")));
+ assertEquals(dateTimeFormatPatternSpec,
+ new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMddHH", "America/Chicago"));
+
+ // Unknown time zone is treated as UTC
+ dateTimeFormatPatternSpec = new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd tz(CSEMT)");
+ assertEquals(dateTimeFormatPatternSpec.getSdfPattern(), "yyyyMMdd");
+ assertEquals(dateTimeFormatPatternSpec.getDateTimeZone(), DateTimeZone.UTC);
+ assertEquals(dateTimeFormatPatternSpec,
+ new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd", "CSEMT"));
+
+ dateTimeFormatPatternSpec = new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd tz(GMT+5000)");
+ assertEquals(dateTimeFormatPatternSpec.getSdfPattern(), "yyyyMMdd");
+ assertEquals(dateTimeFormatPatternSpec.getDateTimeZone(), DateTimeZone.UTC);
+ assertEquals(dateTimeFormatPatternSpec,
+ new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd", "GMT+5000"));
+
+ dateTimeFormatPatternSpec =
+ new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd tz(HAHA/Chicago)");
+ assertEquals(dateTimeFormatPatternSpec.getSdfPattern(), "yyyyMMdd");
+ assertEquals(dateTimeFormatPatternSpec.getDateTimeZone(), DateTimeZone.UTC);
+ assertEquals(dateTimeFormatPatternSpec,
+ new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, "yyyyMMdd", "HAHA/Chicago"));
+
+ // Invalid pattern
+ assertThrows(IllegalArgumentException.class,
+ () -> new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, "yyyc-MM-dd"));
+ assertThrows(IllegalArgumentException.class,
+ () -> new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, "yyyy-MM-dd ff(a)"));
}
}
diff --git a/pinot-spi/src/test/java/org/apache/pinot/spi/data/DateTimeFormatSpecTest.java b/pinot-spi/src/test/java/org/apache/pinot/spi/data/DateTimeFormatSpecTest.java
index b7feae902e..8922568884 100644
--- a/pinot-spi/src/test/java/org/apache/pinot/spi/data/DateTimeFormatSpecTest.java
+++ b/pinot-spi/src/test/java/org/apache/pinot/spi/data/DateTimeFormatSpecTest.java
@@ -18,24 +18,75 @@
*/
package org.apache.pinot.spi.data;
+import java.util.TimeZone;
+import java.util.concurrent.TimeUnit;
+import org.joda.time.DateTimeZone;
import org.testng.annotations.Test;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNull;
import static org.testng.Assert.assertThrows;
public class DateTimeFormatSpecTest {
@Test
- public void testValidateFormat() {
- DateTimeFormatSpec.validateFormat("1:DAYS:EPOCH");
- DateTimeFormatSpec.validateFormat("1:DAYS:TIMESTAMP");
- DateTimeFormatSpec.validateFormat("1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd");
- assertThrows(IllegalStateException.class, () -> DateTimeFormatSpec.validateFormat("1:DAY"));
- assertThrows(IllegalStateException.class, () -> DateTimeFormatSpec.validateFormat("one:DAYS:EPOCH"));
- assertThrows(IllegalStateException.class, () -> DateTimeFormatSpec.validateFormat("1:DAY:EPOCH"));
- assertThrows(IllegalStateException.class, () -> DateTimeFormatSpec.validateFormat("1:DAY:EPOCH:yyyyMMdd"));
- assertThrows(IllegalStateException.class, () -> DateTimeFormatSpec.validateFormat("1:DAY:TIMESTAMP:yyyyMMdd"));
- assertThrows(IllegalStateException.class,
- () -> DateTimeFormatSpec.validateFormat("1:DAY:SIMPLE_DATE_FORMAT:yyycMMdd"));
+ public void testDateTimeFormatSpec() {
+ DateTimeFormatSpec dateTimeFormatSpec = new DateTimeFormatSpec("5:DAYS:EPOCH");
+ assertEquals(dateTimeFormatSpec.getTimeFormat(), DateTimeFieldSpec.TimeFormat.EPOCH);
+ assertEquals(dateTimeFormatSpec.getColumnSize(), 5);
+ assertEquals(dateTimeFormatSpec.getColumnUnit(), TimeUnit.DAYS);
+ assertEquals(dateTimeFormatSpec.getColumnDateTimeTransformUnit(),
+ DateTimeFormatUnitSpec.DateTimeTransformUnit.DAYS);
+ assertNull(dateTimeFormatSpec.getSDFPattern());
+
+ assertEquals(new DateTimeFormatSpec("EPOCH|DAYS|5"), dateTimeFormatSpec);
+
+ dateTimeFormatSpec = new DateTimeFormatSpec("1:DAYS:TIMESTAMP");
+ assertEquals(dateTimeFormatSpec.getTimeFormat(), DateTimeFieldSpec.TimeFormat.TIMESTAMP);
+ assertEquals(dateTimeFormatSpec.getColumnSize(), 1);
+ assertEquals(dateTimeFormatSpec.getColumnUnit(), TimeUnit.MILLISECONDS);
+ assertEquals(dateTimeFormatSpec.getColumnDateTimeTransformUnit(),
+ DateTimeFormatUnitSpec.DateTimeTransformUnit.MILLISECONDS);
+ assertNull(dateTimeFormatSpec.getSDFPattern());
+
+ assertEquals(new DateTimeFormatSpec("TIMESTAMP"), dateTimeFormatSpec);
+
+ dateTimeFormatSpec = new DateTimeFormatSpec("1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd");
+ assertEquals(dateTimeFormatSpec.getTimeFormat(), DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT);
+ assertEquals(dateTimeFormatSpec.getColumnSize(), 1);
+ assertEquals(dateTimeFormatSpec.getColumnUnit(), TimeUnit.MILLISECONDS);
+ assertEquals(dateTimeFormatSpec.getColumnDateTimeTransformUnit(),
+ DateTimeFormatUnitSpec.DateTimeTransformUnit.MILLISECONDS);
+ assertEquals(dateTimeFormatSpec.getSDFPattern(), "yyyyMMdd");
+ assertEquals(dateTimeFormatSpec.getDateTimezone(), DateTimeZone.UTC);
+
+ assertEquals(new DateTimeFormatSpec("SIMPLE_DATE_FORMAT|yyyyMMdd"), dateTimeFormatSpec);
+
+ dateTimeFormatSpec = new DateTimeFormatSpec("1:DAYS:SIMPLE_DATE_FORMAT:yyyy-MM-dd tz(CST)");
+ assertEquals(dateTimeFormatSpec.getTimeFormat(), DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT);
+ assertEquals(dateTimeFormatSpec.getColumnSize(), 1);
+ assertEquals(dateTimeFormatSpec.getColumnUnit(), TimeUnit.MILLISECONDS);
+ assertEquals(dateTimeFormatSpec.getColumnDateTimeTransformUnit(),
+ DateTimeFormatUnitSpec.DateTimeTransformUnit.MILLISECONDS);
+ assertEquals(dateTimeFormatSpec.getSDFPattern(), "yyyy-MM-dd");
+ assertEquals(dateTimeFormatSpec.getDateTimezone(), DateTimeZone.forTimeZone(TimeZone.getTimeZone("CST")));
+
+ assertEquals(new DateTimeFormatSpec("SIMPLE_DATE_FORMAT|yyyy-MM-dd|CST"), dateTimeFormatSpec);
+
+ assertThrows(IllegalArgumentException.class, () -> new DateTimeFormatSpec("1:DAY"));
+ assertThrows(IllegalArgumentException.class, () -> new DateTimeFormatSpec("EPOCH"));
+
+ assertThrows(IllegalArgumentException.class, () -> new DateTimeFormatSpec("one:DAYS:EPOCH"));
+ assertThrows(IllegalArgumentException.class, () -> new DateTimeFormatSpec("EPOCH|DAYS|one"));
+
+ assertThrows(IllegalArgumentException.class, () -> new DateTimeFormatSpec("1:DAY:EPOCH"));
+ assertThrows(IllegalArgumentException.class, () -> new DateTimeFormatSpec("EPOCH|DAY"));
+
+ assertThrows(IllegalArgumentException.class, () -> new DateTimeFormatSpec("1:DAY:EPOCH:yyyyMMdd"));
+ assertThrows(IllegalArgumentException.class, () -> new DateTimeFormatSpec("EPOCH|yyyyMMdd"));
+
+ assertThrows(IllegalArgumentException.class, () -> new DateTimeFormatSpec("1:DAY:SIMPLE_DATE_FORMAT:yyycMMdd"));
+ assertThrows(IllegalArgumentException.class, () -> new DateTimeFormatSpec("SIMPLE_DATE_FORMAT|yyycMMdd"));
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org