You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ji...@apache.org on 2019/04/10 20:47:26 UTC

[incubator-pinot] branch master updated: [TE] onboard dataset with epoch time stamps (#4094)

This is an automated email from the ASF dual-hosted git repository.

jihao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 5be8431  [TE] onboard dataset with epoch time stamps (#4094)
5be8431 is described below

commit 5be8431d6a4984b7efa3a04cd5d9076ed48ea9d8
Author: Jihao Zhang <ji...@linkedin.com>
AuthorDate: Wed Apr 10 13:47:21 2019 -0700

    [TE] onboard dataset with epoch time stamps (#4094)
    
    For datasets with epoch timestamps, the real dataset granularity and time unit granularity is usually different. By default, TE onboard it as a five-minute granularity metric. This can be configured in dataset configs.
---
 .../auto/onboard/AutoOnboardPinotMetadataSource.java     |  3 ---
 .../pinot/thirdeye/auto/onboard/ConfigGenerator.java     | 16 ++++++++++------
 .../dashboard/resources/AutoOnboardResource.java         |  5 +++++
 .../detection/wrapper/AnomalyDetectorWrapper.java        | 12 ------------
 .../auto/onboard/AutoOnboardPinotMetricsServiceTest.java | 10 ++++++----
 5 files changed, 21 insertions(+), 25 deletions(-)

diff --git a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/auto/onboard/AutoOnboardPinotMetadataSource.java b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/auto/onboard/AutoOnboardPinotMetadataSource.java
index ca64bfc..09b48fc 100644
--- a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/auto/onboard/AutoOnboardPinotMetadataSource.java
+++ b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/auto/onboard/AutoOnboardPinotMetadataSource.java
@@ -337,9 +337,6 @@ public class AutoOnboardPinotMetadataSource extends AutoOnboard {
 
   private void checkTimeFieldChanges(DatasetConfigDTO datasetConfig, Schema schema) {
     TimeGranularitySpec timeSpec = schema.getTimeFieldSpec().getOutgoingGranularitySpec();
-    if (timeSpec.getTimeType().equals(TimeUnit.MILLISECONDS)){
-      timeSpec.setTimeType(TimeUnit.MINUTES);
-    }
     if (!datasetConfig.getTimeColumn().equals(timeSpec.getName())
         || !datasetConfig.getTimeFormat().equals(timeSpec.getTimeFormat())
         || datasetConfig.bucketTimeGranularity().getUnit() != timeSpec.getTimeType()
diff --git a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/auto/onboard/ConfigGenerator.java b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/auto/onboard/ConfigGenerator.java
index ab89e32..cce58e2 100644
--- a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/auto/onboard/ConfigGenerator.java
+++ b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/auto/onboard/ConfigGenerator.java
@@ -19,21 +19,21 @@
 
 package org.apache.pinot.thirdeye.auto.onboard;
 
-import org.apache.pinot.thirdeye.datalayer.pojo.MetricConfigBean;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.TimeUnit;
-
 import org.apache.pinot.common.data.MetricFieldSpec;
 import org.apache.pinot.common.data.Schema;
 import org.apache.pinot.common.data.TimeGranularitySpec;
 import org.apache.pinot.common.data.TimeGranularitySpec.TimeFormat;
 import org.apache.pinot.thirdeye.common.metric.MetricType;
 import org.apache.pinot.thirdeye.common.time.TimeGranularity;
+import org.apache.pinot.thirdeye.common.time.TimeSpec;
 import org.apache.pinot.thirdeye.datalayer.dto.DatasetConfigDTO;
 import org.apache.pinot.thirdeye.datalayer.dto.MetricConfigDTO;
 import org.apache.pinot.thirdeye.datalayer.pojo.DatasetConfigBean;
+import org.apache.pinot.thirdeye.datalayer.pojo.MetricConfigBean;
 import org.apache.pinot.thirdeye.datasource.pinot.PinotThirdEyeDataSource;
 import org.apache.pinot.thirdeye.util.ThirdEyeUtils;
 
@@ -48,18 +48,22 @@ public class ConfigGenerator {
     datasetConfigDTO.setTimeUnit(timeSpec.getTimeType());
     datasetConfigDTO.setTimeFormat(timeSpec.getTimeFormat());
     datasetConfigDTO.setExpectedDelay(getExpectedDelayFromTimeunit(timeSpec.getTimeType()));
-    if (timeSpec.getTimeFormat().startsWith(TimeFormat.SIMPLE_DATE_FORMAT.toString())) {
+    if (timeSpec.getTimeFormat().startsWith(TimeFormat.SIMPLE_DATE_FORMAT.toString()) || timeSpec.getTimeFormat().equals(TimeSpec.SINCE_EPOCH_FORMAT)) {
       datasetConfigDTO.setTimezone(PDT_TIMEZONE);
     }
+    // set the data granularity of epoch timestamp dataset to minute-level
+    if (datasetConfigDTO.getTimeFormat().equals(TimeSpec.SINCE_EPOCH_FORMAT) && datasetConfigDTO.getTimeUnit()
+        .equals(TimeUnit.MILLISECONDS) && (datasetConfigDTO.getNonAdditiveBucketSize() == null
+        || datasetConfigDTO.getNonAdditiveBucketUnit() == null)) {
+      datasetConfigDTO.setNonAdditiveBucketUnit(TimeUnit.MINUTES);
+      datasetConfigDTO.setNonAdditiveBucketSize(5);
+    }
   }
 
   public static DatasetConfigDTO generateDatasetConfig(String dataset, Schema schema,
       Map<String, String> customConfigs) {
     List<String> dimensions = schema.getDimensionNames();
     TimeGranularitySpec timeSpec = schema.getTimeFieldSpec().getOutgoingGranularitySpec();
-    if (timeSpec.getTimeType().equals(TimeUnit.MILLISECONDS)){
-      timeSpec.setTimeType(TimeUnit.MINUTES);
-    }
     // Create DatasetConfig
     DatasetConfigDTO datasetConfigDTO = new DatasetConfigDTO();
     datasetConfigDTO.setDataset(dataset);
diff --git a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/dashboard/resources/AutoOnboardResource.java b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/dashboard/resources/AutoOnboardResource.java
index f073ece..494aa08 100644
--- a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/dashboard/resources/AutoOnboardResource.java
+++ b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/dashboard/resources/AutoOnboardResource.java
@@ -19,6 +19,9 @@
 
 package org.apache.pinot.thirdeye.dashboard.resources;
 
+import com.wordnik.swagger.annotations.Api;
+import com.wordnik.swagger.annotations.ApiOperation;
+import org.apache.pinot.thirdeye.api.Constants;
 import org.apache.pinot.thirdeye.auto.onboard.AutoOnboard;
 import org.apache.pinot.thirdeye.auto.onboard.AutoOnboardUtility;
 import org.apache.pinot.thirdeye.common.ThirdEyeConfiguration;
@@ -49,6 +52,7 @@ import org.slf4j.LoggerFactory;
  */
 @Path(value = "/autoOnboard")
 @Produces(MediaType.APPLICATION_JSON)
+@Api(tags = {Constants.DASHBOARD_TAG})
 public class AutoOnboardResource {
 
   private Map<String, List<AutoOnboard>> dataSourceToOnboardMap;
@@ -59,6 +63,7 @@ public class AutoOnboardResource {
 
   @POST
   @Path("/runAdhoc/{datasource}")
+  @ApiOperation("run auto onboard for a data source")
   public Response runAdhocOnboard(@PathParam("datasource") String datasource) {
     if (!dataSourceToOnboardMap.containsKey(datasource)) {
       return Response.status(Status.INTERNAL_SERVER_ERROR)
diff --git a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/detection/wrapper/AnomalyDetectorWrapper.java b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/detection/wrapper/AnomalyDetectorWrapper.java
index 99ba6dd..bfd8365 100644
--- a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/detection/wrapper/AnomalyDetectorWrapper.java
+++ b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/detection/wrapper/AnomalyDetectorWrapper.java
@@ -340,18 +340,6 @@ public class AnomalyDetectorWrapper extends DetectionPipeline {
     return bucketSizePeriod;
   }
 
-  public static TimeGranularity toTimeGranularity(Period period) {
-    if (period.getDays() > 0) {
-      return new TimeGranularity(period.getDays(), TimeUnit.DAYS);
-    } else if (period.getHours() > 0) {
-      return new TimeGranularity(period.getHours(), TimeUnit.HOURS);
-    } else if (period.getMinutes() > 0)  {
-      return new TimeGranularity(period.getMinutes(), TimeUnit.MINUTES);
-    } else {
-      return new TimeGranularity(period.getMillis(), TimeUnit.MILLISECONDS);
-    }
-  }
-
   /**
    * Speed up minute level detection.
    *
diff --git a/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/auto/onboard/AutoOnboardPinotMetricsServiceTest.java b/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/auto/onboard/AutoOnboardPinotMetricsServiceTest.java
index e5735ce..3324532 100644
--- a/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/auto/onboard/AutoOnboardPinotMetricsServiceTest.java
+++ b/thirdeye/thirdeye-pinot/src/test/java/org/apache/pinot/thirdeye/auto/onboard/AutoOnboardPinotMetricsServiceTest.java
@@ -82,7 +82,7 @@ public class AutoOnboardPinotMetricsServiceTest {
     Assert.assertEquals(datasetConfig.bucketTimeGranularity().getUnit(), timeGranularitySpec.getTimeType());
     Assert.assertEquals(datasetConfig.bucketTimeGranularity().getSize(), timeGranularitySpec.getTimeUnitSize());
     Assert.assertEquals(datasetConfig.getTimeFormat(), timeGranularitySpec.getTimeFormat());
-    Assert.assertEquals(datasetConfig.getTimezone(), "UTC");
+    Assert.assertEquals(datasetConfig.getTimezone(), "US/Pacific");
     Assert.assertEquals(datasetConfig.getExpectedDelay().getUnit(), TimeUnit.HOURS);
 
     List<MetricConfigDTO> metricConfigs = metricConfigDAO.findByDataset(dataset);
@@ -149,10 +149,12 @@ public class AutoOnboardPinotMetricsServiceTest {
     Assert.assertEquals(datasetConfigDAO.findAll().size(), 1);
     datasetConfig = datasetConfigDAO.findByDataset(dataset);
     TimeGranularitySpec timeGranularitySpec = schema.getTimeFieldSpec().getOutgoingGranularitySpec();
-    Assert.assertEquals(datasetConfig.bucketTimeGranularity().getUnit(), timeGranularitySpec.getTimeType());
-    Assert.assertEquals(datasetConfig.bucketTimeGranularity().getSize(), timeGranularitySpec.getTimeUnitSize());
+    Assert.assertEquals(datasetConfig.bucketTimeGranularity().getUnit(), TimeUnit.MINUTES);
+    Assert.assertEquals(datasetConfig.bucketTimeGranularity().getSize(), 5);
+    Assert.assertEquals(datasetConfig.getTimeUnit(), timeGranularitySpec.getTimeType());
+    Assert.assertEquals(datasetConfig.getTimeDuration().intValue(), timeGranularitySpec.getTimeUnitSize());
     Assert.assertEquals(datasetConfig.getTimeFormat(), timeGranularitySpec.getTimeFormat());
-    Assert.assertEquals(datasetConfig.getTimezone(), "UTC");
+    Assert.assertEquals(datasetConfig.getTimezone(), "US/Pacific");
     Assert.assertEquals(datasetConfig.getExpectedDelay().getUnit(), TimeUnit.HOURS);
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org