You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ja...@apache.org on 2021/11/06 00:39:09 UTC

[pinot] branch master updated: to handle datetime column consistently (#7645)

This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 7cbd5ac  to handle datetime column consistently (#7645)
7cbd5ac is described below

commit 7cbd5ac21dc0c5c3c4ea007edb1980723c90e393
Author: Xiaobing <61...@users.noreply.github.com>
AuthorDate: Fri Nov 5 17:38:44 2021 -0700

    to handle datetime column consistently (#7645)
    
    SegmentColumnarIndexCreator uses local time zone while converting datetime w/o time zone to timestamp. This is inconsistent with DateTimeFormatSpec.fromFormatToMillis() which uses UTC by default. This PR reuses DateTimeFormatSpec to handle timecolumn in SegmentColumnarIndexCreator.
---
 .../creator/impl/SegmentColumnarIndexCreator.java  |   7 +-
 .../impl/SegmentColumnarIndexCreatorTest.java      |  57 +++++++++
 .../SegmentGenerationWithTimeColumnTest.java       | 137 +++++++++++++++------
 .../spi/creator/SegmentGeneratorConfig.java        |  29 ++---
 .../spi/creator/SegmentGeneratorConfigTest.java    |  15 ++-
 5 files changed, 181 insertions(+), 64 deletions(-)

diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
index d2ac484..8315d9a 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
@@ -68,6 +68,7 @@ import org.apache.pinot.segment.spi.index.reader.H3IndexResolution;
 import org.apache.pinot.segment.spi.partition.PartitionFunction;
 import org.apache.pinot.spi.config.table.FieldConfig;
 import org.apache.pinot.spi.data.DateTimeFieldSpec;
+import org.apache.pinot.spi.data.DateTimeFormatSpec;
 import org.apache.pinot.spi.data.FieldSpec;
 import org.apache.pinot.spi.data.FieldSpec.DataType;
 import org.apache.pinot.spi.data.FieldSpec.FieldType;
@@ -76,7 +77,6 @@ import org.apache.pinot.spi.data.readers.GenericRow;
 import org.apache.pinot.spi.utils.TimeUtils;
 import org.joda.time.DateTimeZone;
 import org.joda.time.Interval;
-import org.joda.time.format.DateTimeFormat;
 import org.joda.time.format.DateTimeFormatter;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -639,7 +639,10 @@ public class SegmentColumnarIndexCreator implements SegmentCreator {
 
             if (_config.getTimeColumnType() == SegmentGeneratorConfig.TimeColumnType.SIMPLE_DATE) {
               // For TimeColumnType.SIMPLE_DATE_FORMAT, convert time value into millis since epoch
-              DateTimeFormatter dateTimeFormatter = DateTimeFormat.forPattern(_config.getSimpleDateFormat());
+              // Use DateTimeFormatter from DateTimeFormatSpec to handle default time zone consistently.
+              DateTimeFormatSpec formatSpec = _config.getDateTimeFormatSpec();
+              Preconditions.checkNotNull(formatSpec, "DateTimeFormatSpec must exist for SimpleDate");
+              DateTimeFormatter dateTimeFormatter = formatSpec.getDateTimeFormatter();
               startTime = dateTimeFormatter.parseMillis(startTimeStr);
               endTime = dateTimeFormatter.parseMillis(endTimeStr);
               timeUnit = TimeUnit.MILLISECONDS;
diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreatorTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreatorTest.java
index ef65b23..8b2c60c 100644
--- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreatorTest.java
+++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreatorTest.java
@@ -18,12 +18,27 @@
  */
 package org.apache.pinot.segment.local.segment.creator.impl;
 
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
 import java.io.File;
 import java.io.IOException;
+import java.util.List;
 import org.apache.commons.configuration.PropertiesConfiguration;
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.lang3.RandomStringUtils;
+import org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoader;
+import org.apache.pinot.segment.local.segment.readers.GenericRowRecordReader;
+import org.apache.pinot.segment.spi.IndexSegment;
+import org.apache.pinot.segment.spi.SegmentMetadata;
 import org.apache.pinot.segment.spi.V1Constants;
+import org.apache.pinot.segment.spi.creator.SegmentGeneratorConfig;
+import org.apache.pinot.spi.config.table.TableConfig;
+import org.apache.pinot.spi.config.table.TableType;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.apache.pinot.spi.data.Schema;
+import org.apache.pinot.spi.data.readers.GenericRow;
+import org.apache.pinot.spi.utils.ReadMode;
+import org.apache.pinot.spi.utils.builder.TableConfigBuilder;
 import org.testng.Assert;
 import org.testng.annotations.AfterClass;
 import org.testng.annotations.BeforeClass;
@@ -119,6 +134,48 @@ public class SegmentColumnarIndexCreatorTest {
     Assert.assertFalse(configuration.containsKey(COLUMN_PROPERTY_KEY_PREFIX + "c"));
   }
 
+  @Test
+  public void testTimeColumnInMetadata()
+      throws Exception {
+    long withTZ =
+        getStartTimeInSegmentMetadata("1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd'T'HH:mm:ssZ", "2021-07-21T06:48:51Z");
+    long withoutTZ =
+        getStartTimeInSegmentMetadata("1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd'T'HH:mm:ss", "2021-07-21T06:48:51");
+    Assert.assertEquals(withTZ, 1626850131000L); // as UTC timestamp.
+    Assert.assertEquals(withTZ, withoutTZ);
+  }
+
+  private static long getStartTimeInSegmentMetadata(String testDateTimeFormat, String testDateTime)
+      throws Exception {
+    String timeColumn = "foo";
+    Schema schema = new Schema.SchemaBuilder()
+        .addDateTime(timeColumn, FieldSpec.DataType.STRING, testDateTimeFormat, "1:MILLISECONDS").build();
+    TableConfig tableConfig =
+        new TableConfigBuilder(TableType.OFFLINE).setTableName("test").setTimeColumnName(timeColumn).build();
+
+    String segmentName = "testSegment";
+    String indexDirPath = new File(TEMP_DIR, segmentName).getAbsolutePath();
+    SegmentGeneratorConfig config = new SegmentGeneratorConfig(tableConfig, schema);
+    config.setOutDir(indexDirPath);
+    config.setSegmentName(segmentName);
+    try {
+      FileUtils.deleteQuietly(new File(indexDirPath));
+
+      GenericRow row = new GenericRow();
+      row.init(ImmutableMap.of(timeColumn, testDateTime));
+      List<GenericRow> rows = ImmutableList.of(row);
+
+      SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
+      driver.init(config, new GenericRowRecordReader(rows));
+      driver.build();
+      IndexSegment indexSegment = ImmutableSegmentLoader.load(new File(indexDirPath, segmentName), ReadMode.heap);
+      SegmentMetadata md = indexSegment.getSegmentMetadata();
+      return md.getStartTime();
+    } finally {
+      FileUtils.deleteQuietly(new File(indexDirPath));
+    }
+  }
+
   @AfterClass
   public void tearDown()
       throws IOException {
diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/SegmentGenerationWithTimeColumnTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/SegmentGenerationWithTimeColumnTest.java
index 884b77e..8bfc2d6 100644
--- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/SegmentGenerationWithTimeColumnTest.java
+++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/SegmentGenerationWithTimeColumnTest.java
@@ -52,7 +52,8 @@ import org.testng.annotations.Test;
 public class SegmentGenerationWithTimeColumnTest {
   private static final String STRING_COL_NAME = "someString";
   private static final String TIME_COL_NAME = "date";
-  private static final String TIME_COL_FORMAT = "yyyyMMdd";
+  private static final String TIME_COL_FORMAT_NO_ZONE = "yyyyMMdd";
+  private static final String TIME_COL_FORMAT_WITH_ZONE = "yyyyMMddZ";
   private static final String SEGMENT_DIR_NAME =
       System.getProperty("java.io.tmpdir") + File.separator + "segmentGenTest";
   private static final String SEGMENT_NAME = "testSegment";
@@ -84,33 +85,81 @@ public class SegmentGenerationWithTimeColumnTest {
   }
 
   @Test
-  public void testSimpleDateSegmentGeneration()
+  public void testSimpleDateSegmentGenerationNoTimeZone()
       throws Exception {
-    Schema schema = createSchema(true);
-    File segmentDir = buildSegment(_tableConfig, schema, true, false);
+    // Require no time zone in time format and time values don't have time zone.
+    Schema schema = createSchema(true, TIME_COL_FORMAT_NO_ZONE);
+    File segmentDir = buildSegment(_tableConfig, schema, true, false, "");
     SegmentMetadataImpl metadata = new SegmentMetadataImpl(segmentDir);
-    Assert.assertEquals(metadata.getStartTime(), sdfToMillis(_minTime));
-    Assert.assertEquals(metadata.getEndTime(), sdfToMillis(_maxTime));
+    Assert.assertEquals(metadata.getStartTime(), sdfToMillisNoTimeZone(_minTime + ""));
+    Assert.assertEquals(metadata.getEndTime(), sdfToMillisNoTimeZone(_maxTime + ""));
+  }
+
+  @Test
+  public void testSimpleDateSegmentGenerationWithTimeZoneUTC()
+      throws Exception {
+    // Require time zone in time format and time values have 'Z' suffix, i.e. UTC time zone.
+    Schema schema = createSchema(true, TIME_COL_FORMAT_WITH_ZONE);
+    File segmentDir = buildSegment(_tableConfig, schema, true, false, "Z");
+    SegmentMetadataImpl metadata = new SegmentMetadataImpl(segmentDir);
+    Assert.assertEquals(metadata.getStartTime(), sdfToMillisWithTimeZone(_minTime + "Z"));
+    Assert.assertEquals(metadata.getEndTime(), sdfToMillisWithTimeZone(_maxTime + "Z"));
+  }
+
+  @Test
+  public void testSimpleDateSegmentGenerationWithTimeZoneEST()
+      throws Exception {
+    // Require time zone in time format and time values have '-05:00' suffix, i.e. EST time zone.
+    Schema schema = createSchema(true, TIME_COL_FORMAT_WITH_ZONE);
+    String timeZoneSuffix = "-05:00";
+    File segmentDir = buildSegment(_tableConfig, schema, true, false, timeZoneSuffix);
+    SegmentMetadataImpl metadata = new SegmentMetadataImpl(segmentDir);
+    Assert.assertEquals(metadata.getStartTime(), sdfToMillisWithTimeZone(_minTime + timeZoneSuffix));
+    Assert.assertEquals(metadata.getEndTime(), sdfToMillisWithTimeZone(_maxTime + timeZoneSuffix));
   }
 
   /**
    * Tests using DateTimeFieldSpec as time column
    */
   @Test
-  public void testSimpleDateSegmentGenerationNew()
+  public void testSimpleDateSegmentGenerationNewNoTimeZone()
       throws Exception {
-    Schema schema = createDateTimeFieldSpecSchema(true);
-    File segmentDir = buildSegment(_tableConfig, schema, true, false);
+    // Require no time zone in time format and time values don't have time zone.
+    Schema schema = createDateTimeFieldSpecSchema(true, TIME_COL_FORMAT_NO_ZONE);
+    File segmentDir = buildSegment(_tableConfig, schema, true, false, "");
     SegmentMetadataImpl metadata = new SegmentMetadataImpl(segmentDir);
-    Assert.assertEquals(metadata.getStartTime(), sdfToMillis(_minTime));
-    Assert.assertEquals(metadata.getEndTime(), sdfToMillis(_maxTime));
+    Assert.assertEquals(metadata.getStartTime(), sdfToMillisNoTimeZone(_minTime + ""));
+    Assert.assertEquals(metadata.getEndTime(), sdfToMillisNoTimeZone(_maxTime + ""));
+  }
+
+  @Test
+  public void testSimpleDateSegmentGenerationNewWithTimeZoneUTC()
+      throws Exception {
+    // Require time zone in time format and time values have 'Z' suffix, i.e. UTC time zone.
+    Schema schema = createDateTimeFieldSpecSchema(true, TIME_COL_FORMAT_WITH_ZONE);
+    File segmentDir = buildSegment(_tableConfig, schema, true, false, "Z");
+    SegmentMetadataImpl metadata = new SegmentMetadataImpl(segmentDir);
+    Assert.assertEquals(metadata.getStartTime(), sdfToMillisWithTimeZone(_minTime + "Z"));
+    Assert.assertEquals(metadata.getEndTime(), sdfToMillisWithTimeZone(_maxTime + "Z"));
+  }
+
+  @Test
+  public void testSimpleDateSegmentGenerationNewWithTimeZoneEST()
+      throws Exception {
+    // Require time zone in time format and time values have '-05:00' suffix, i.e. EST time zone.
+    Schema schema = createDateTimeFieldSpecSchema(true, TIME_COL_FORMAT_WITH_ZONE);
+    String timeZoneSuffix = "-05:00";
+    File segmentDir = buildSegment(_tableConfig, schema, true, false, timeZoneSuffix);
+    SegmentMetadataImpl metadata = new SegmentMetadataImpl(segmentDir);
+    Assert.assertEquals(metadata.getStartTime(), sdfToMillisWithTimeZone(_minTime + timeZoneSuffix));
+    Assert.assertEquals(metadata.getEndTime(), sdfToMillisWithTimeZone(_maxTime + timeZoneSuffix));
   }
 
   @Test
   public void testEpochDateSegmentGeneration()
       throws Exception {
-    Schema schema = createSchema(false);
-    File segmentDir = buildSegment(_tableConfig, schema, false, false);
+    Schema schema = createSchema(false, "");
+    File segmentDir = buildSegment(_tableConfig, schema, false, false, "");
     SegmentMetadataImpl metadata = new SegmentMetadataImpl(segmentDir);
     Assert.assertEquals(metadata.getStartTime(), _minTime);
     Assert.assertEquals(metadata.getEndTime(), _maxTime);
@@ -120,7 +169,7 @@ public class SegmentGenerationWithTimeColumnTest {
   public void testSimpleDateSegmentGenerationNewWithDegenerateSeed()
       throws Exception {
     _random.setSeed(255672780506968L);
-    testSimpleDateSegmentGenerationNew();
+    testSimpleDateSegmentGenerationNewNoTimeZone();
   }
 
   @Test
@@ -136,8 +185,8 @@ public class SegmentGenerationWithTimeColumnTest {
   @Test
   public void testEpochDateSegmentGenerationNew()
       throws Exception {
-    Schema schema = createDateTimeFieldSpecSchema(false);
-    File segmentDir = buildSegment(_tableConfig, schema, false, false);
+    Schema schema = createDateTimeFieldSpecSchema(false, "");
+    File segmentDir = buildSegment(_tableConfig, schema, false, false, "");
     SegmentMetadataImpl metadata = new SegmentMetadataImpl(segmentDir);
     Assert.assertEquals(metadata.getStartTime(), _minTime);
     Assert.assertEquals(metadata.getEndTime(), _maxTime);
@@ -146,8 +195,8 @@ public class SegmentGenerationWithTimeColumnTest {
   @Test(expectedExceptions = IllegalStateException.class)
   public void testSegmentGenerationWithInvalidTime()
       throws Exception {
-    Schema schema = createSchema(false);
-    buildSegment(_tableConfig, schema, false, true);
+    Schema schema = createSchema(false, "");
+    buildSegment(_tableConfig, schema, false, true, "");
   }
 
   /**
@@ -156,28 +205,28 @@ public class SegmentGenerationWithTimeColumnTest {
   @Test(expectedExceptions = IllegalStateException.class)
   public void testSegmentGenerationWithInvalidTimeNew()
       throws Exception {
-    Schema schema = createDateTimeFieldSpecSchema(false);
-    buildSegment(_tableConfig, schema, false, true);
+    Schema schema = createDateTimeFieldSpecSchema(false, "");
+    buildSegment(_tableConfig, schema, false, true, "");
   }
 
-  private Schema createSchema(boolean isSimpleDate) {
+  private Schema createSchema(boolean isSimpleDate, String timeColFormat) {
     Schema.SchemaBuilder builder =
         new Schema.SchemaBuilder().addSingleValueDimension(STRING_COL_NAME, FieldSpec.DataType.STRING);
     if (isSimpleDate) {
-      builder.addTime(new TimeGranularitySpec(FieldSpec.DataType.INT, TimeUnit.DAYS,
-          TimeGranularitySpec.TimeFormat.SIMPLE_DATE_FORMAT.toString() + ":" + TIME_COL_FORMAT, TIME_COL_NAME), null);
+      builder.addTime(new TimeGranularitySpec(FieldSpec.DataType.STRING, TimeUnit.DAYS,
+          TimeGranularitySpec.TimeFormat.SIMPLE_DATE_FORMAT.toString() + ":" + timeColFormat, TIME_COL_NAME), null);
     } else {
       builder.addTime(new TimeGranularitySpec(FieldSpec.DataType.LONG, TimeUnit.MILLISECONDS, TIME_COL_NAME), null);
     }
     return builder.build();
   }
 
-  private Schema createDateTimeFieldSpecSchema(boolean isSimpleDate) {
+  private Schema createDateTimeFieldSpecSchema(boolean isSimpleDate, String timeColFormat) {
     Schema.SchemaBuilder builder =
         new Schema.SchemaBuilder().addSingleValueDimension(STRING_COL_NAME, FieldSpec.DataType.STRING);
     if (isSimpleDate) {
-      builder
-          .addDateTime(TIME_COL_NAME, FieldSpec.DataType.INT, "1:DAYS:SIMPLE_DATE_FORMAT:" + TIME_COL_FORMAT, "1:DAYS");
+      builder.addDateTime(TIME_COL_NAME, FieldSpec.DataType.STRING, "1:DAYS:SIMPLE_DATE_FORMAT:" + timeColFormat,
+          "1:DAYS");
     } else {
       builder.addDateTime(TIME_COL_NAME, FieldSpec.DataType.LONG, "1:MILLISECONDS:EPOCH", "1:MILLISECONDS");
     }
@@ -190,7 +239,7 @@ public class SegmentGenerationWithTimeColumnTest {
   }
 
   private File buildSegment(final TableConfig tableConfig, final Schema schema, final boolean isSimpleDate,
-      final boolean isInvalidDate)
+      final boolean isInvalidDate, String timeZoneSuffix)
       throws Exception {
     SegmentGeneratorConfig config = new SegmentGeneratorConfig(tableConfig, schema);
     config.setRawIndexCreationColumns(schema.getDimensionNames());
@@ -204,7 +253,7 @@ public class SegmentGenerationWithTimeColumnTest {
       for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
         Object value;
 
-        value = getRandomValueForColumn(fieldSpec, isSimpleDate, isInvalidDate);
+        value = getRandomValueForColumn(fieldSpec, isSimpleDate, isInvalidDate, timeZoneSuffix);
         map.put(fieldSpec.getName(), value);
       }
 
@@ -220,9 +269,10 @@ public class SegmentGenerationWithTimeColumnTest {
     return driver.getOutputDirectory();
   }
 
-  private Object getRandomValueForColumn(FieldSpec fieldSpec, boolean isSimpleDate, boolean isInvalidDate) {
+  private Object getRandomValueForColumn(FieldSpec fieldSpec, boolean isSimpleDate, boolean isInvalidDate,
+      String timeZoneSuffix) {
     if (fieldSpec.getName().equals(TIME_COL_NAME)) {
-      return getRandomValueForTimeColumn(isSimpleDate, isInvalidDate);
+      return getRandomValueForTimeColumn(isSimpleDate, isInvalidDate, timeZoneSuffix);
     }
     return RawIndexCreatorTest.getRandomValue(_random, fieldSpec.getDataType());
   }
@@ -240,12 +290,8 @@ public class SegmentGenerationWithTimeColumnTest {
     Assert.assertEquals(day, 1);
   }
 
-  private Object getRandomValueForTimeColumn(boolean isSimpleDate, boolean isInvalidDate) {
-    // avoid testing within a day after the start of the epoch because timezones aren't (and can't)
-    // be handled properly
-    long oneDayInMillis = 24 * 60 * 60 * 1000;
-    long randomMs = _validMinTime + oneDayInMillis
-        + (long) (_random.nextDouble() * (_startTime - _validMinTime - oneDayInMillis));
+  private Object getRandomValueForTimeColumn(boolean isSimpleDate, boolean isInvalidDate, String timeZoneSuffix) {
+    long randomMs = _validMinTime + (long) (_random.nextDouble() * (_startTime - _validMinTime));
     Preconditions.checkArgument(TimeUtils.timeValueInValidRange(randomMs), "Value " + randomMs + " out of range");
     long dateColVal = randomMs;
     Object result;
@@ -271,12 +317,25 @@ public class SegmentGenerationWithTimeColumnTest {
     if (dateColVal > _maxTime) {
       _maxTime = dateColVal;
     }
+    if (isSimpleDate) {
+      return result + timeZoneSuffix;
+    }
     return result;
   }
 
-  private long sdfToMillis(long value) {
-    DateTimeFormatter sdfFormatter = DateTimeFormat.forPattern(TIME_COL_FORMAT);
-    DateTime dateTime = DateTime.parse(Long.toString(value), sdfFormatter);
+  private long sdfToMillisNoTimeZone(String value) {
+    // Set UTC explicitly otherwise the local time zone is used, because
+    // datetime values don't have time zone info in them.
+    DateTimeFormatter sdfFormatter = DateTimeFormat.forPattern(TIME_COL_FORMAT_NO_ZONE).withZoneUTC();
+    DateTime dateTime = DateTime.parse(value, sdfFormatter);
+    return dateTime.getMillis();
+  }
+
+  private long sdfToMillisWithTimeZone(String value) {
+    // The sdfFormatter would use the time zone info in the datetime values,
+    // so no need to specify the time zone explicitly.
+    DateTimeFormatter sdfFormatter = DateTimeFormat.forPattern(TIME_COL_FORMAT_WITH_ZONE);
+    DateTime dateTime = DateTime.parse(value, sdfFormatter);
     return dateTime.getMillis();
   }
 }
diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java
index 86fa490..d1b16d7 100644
--- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java
+++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java
@@ -51,7 +51,6 @@ import org.apache.pinot.spi.data.Schema;
 import org.apache.pinot.spi.data.readers.FileFormat;
 import org.apache.pinot.spi.data.readers.RecordReaderConfig;
 import org.apache.pinot.spi.utils.builder.TableNameBuilder;
-import org.joda.time.format.DateTimeFormat;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -100,7 +99,7 @@ public class SegmentGeneratorConfig implements Serializable {
   private SegmentPartitionConfig _segmentPartitionConfig = null;
   private int _sequenceId = -1;
   private TimeColumnType _timeColumnType = TimeColumnType.EPOCH;
-  private String _simpleDateFormat = null;
+  private DateTimeFormatSpec _dateTimeFormatSpec = null;
   // Use on-heap or off-heap memory to generate index (currently only affect inverted index and star-tree v2)
   private boolean _onHeap = false;
   private boolean _skipTimeValueCheck = false;
@@ -211,12 +210,7 @@ public class SegmentGeneratorConfig implements Serializable {
       DateTimeFieldSpec dateTimeFieldSpec = schema.getSpecForTimeColumn(timeColumnName);
       if (dateTimeFieldSpec != null) {
         setTimeColumnName(dateTimeFieldSpec.getName());
-        DateTimeFormatSpec formatSpec = new DateTimeFormatSpec(dateTimeFieldSpec.getFormat());
-        if (formatSpec.getTimeFormat() == DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT) {
-          setSimpleDateFormat(formatSpec.getSDFPattern());
-        } else {
-          setSegmentTimeUnit(formatSpec.getColumnUnit());
-        }
+        setDateTimeFormatSpec(new DateTimeFormatSpec(dateTimeFieldSpec.getFormat()));
       }
     }
   }
@@ -286,18 +280,19 @@ public class SegmentGeneratorConfig implements Serializable {
     _customProperties.putAll(properties);
   }
 
-  public void setSimpleDateFormat(String simpleDateFormat) {
-    _timeColumnType = TimeColumnType.SIMPLE_DATE;
-    try {
-      DateTimeFormat.forPattern(simpleDateFormat);
-    } catch (Exception e) {
-      throw new RuntimeException("Illegal simple date format specification", e);
+  public void setDateTimeFormatSpec(DateTimeFormatSpec formatSpec) {
+    _dateTimeFormatSpec = formatSpec;
+    if (formatSpec.getTimeFormat() == DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT) {
+      // timeUnit is only needed by EPOCH time format.
+      _timeColumnType = TimeColumnType.SIMPLE_DATE;
+    } else {
+      _segmentTimeUnit = formatSpec.getColumnUnit();
+      _timeColumnType = TimeColumnType.EPOCH;
     }
-    _simpleDateFormat = simpleDateFormat;
   }
 
-  public String getSimpleDateFormat() {
-    return _simpleDateFormat;
+  public DateTimeFormatSpec getDateTimeFormatSpec() {
+    return _dateTimeFormatSpec;
   }
 
   public TimeColumnType getTimeColumnType() {
diff --git a/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfigTest.java b/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfigTest.java
index d391a7b..51ddc9e 100644
--- a/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfigTest.java
+++ b/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfigTest.java
@@ -28,6 +28,7 @@ import org.apache.pinot.spi.utils.builder.TableConfigBuilder;
 import org.testng.annotations.Test;
 
 import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNotNull;
 import static org.testng.Assert.assertNull;
 
 
@@ -44,14 +45,14 @@ public class SegmentGeneratorConfigTest {
     assertEquals(segmentGeneratorConfig.getTimeColumnName(), "daysSinceEpoch");
     assertEquals(segmentGeneratorConfig.getTimeColumnType(), SegmentGeneratorConfig.TimeColumnType.EPOCH);
     assertEquals(segmentGeneratorConfig.getSegmentTimeUnit(), TimeUnit.DAYS);
-    assertNull(segmentGeneratorConfig.getSimpleDateFormat());
+    assertNotNull(segmentGeneratorConfig.getDateTimeFormatSpec());
 
     // MUST provide valid tableConfig with time column if time details are wanted
     tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName("test").build();
     segmentGeneratorConfig = new SegmentGeneratorConfig(tableConfig, schema);
     assertNull(segmentGeneratorConfig.getTimeColumnName());
     assertNull(segmentGeneratorConfig.getSegmentTimeUnit());
-    assertNull(segmentGeneratorConfig.getSimpleDateFormat());
+    assertNull(segmentGeneratorConfig.getDateTimeFormatSpec());
 
     schema = new Schema.SchemaBuilder().addDateTime("daysSinceEpoch", FieldSpec.DataType.INT, "1:DAYS:EPOCH", "1:DAYS")
         .build();
@@ -61,7 +62,7 @@ public class SegmentGeneratorConfigTest {
     assertEquals(segmentGeneratorConfig.getTimeColumnName(), "daysSinceEpoch");
     assertEquals(segmentGeneratorConfig.getTimeColumnType(), SegmentGeneratorConfig.TimeColumnType.EPOCH);
     assertEquals(segmentGeneratorConfig.getSegmentTimeUnit(), TimeUnit.DAYS);
-    assertNull(segmentGeneratorConfig.getSimpleDateFormat());
+    assertNotNull(segmentGeneratorConfig.getDateTimeFormatSpec());
   }
 
   @Test
@@ -75,14 +76,15 @@ public class SegmentGeneratorConfigTest {
     assertEquals(segmentGeneratorConfig.getTimeColumnName(), "Date");
     assertEquals(segmentGeneratorConfig.getTimeColumnType(), SegmentGeneratorConfig.TimeColumnType.SIMPLE_DATE);
     assertNull(segmentGeneratorConfig.getSegmentTimeUnit());
-    assertEquals(segmentGeneratorConfig.getSimpleDateFormat(), "yyyyMMdd");
+    assertNotNull(segmentGeneratorConfig.getDateTimeFormatSpec());
+    assertEquals(segmentGeneratorConfig.getDateTimeFormatSpec().getSDFPattern(), "yyyyMMdd");
 
     // MUST provide valid tableConfig with time column if time details are wanted
     tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName("test").build();
     segmentGeneratorConfig = new SegmentGeneratorConfig(tableConfig, schema);
     assertNull(segmentGeneratorConfig.getTimeColumnName());
     assertNull(segmentGeneratorConfig.getSegmentTimeUnit());
-    assertNull(segmentGeneratorConfig.getSimpleDateFormat());
+    assertNull(segmentGeneratorConfig.getDateTimeFormatSpec());
 
     schema = new Schema.SchemaBuilder()
         .addDateTime("Date", FieldSpec.DataType.STRING, "1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd", "1:DAYS").build();
@@ -91,6 +93,7 @@ public class SegmentGeneratorConfigTest {
     assertEquals(segmentGeneratorConfig.getTimeColumnName(), "Date");
     assertEquals(segmentGeneratorConfig.getTimeColumnType(), SegmentGeneratorConfig.TimeColumnType.SIMPLE_DATE);
     assertNull(segmentGeneratorConfig.getSegmentTimeUnit());
-    assertEquals(segmentGeneratorConfig.getSimpleDateFormat(), "yyyyMMdd");
+    assertNotNull(segmentGeneratorConfig.getDateTimeFormatSpec());
+    assertEquals(segmentGeneratorConfig.getDateTimeFormatSpec().getSDFPattern(), "yyyyMMdd");
   }
 }

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org