You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ap...@apache.org on 2019/01/11 23:39:21 UTC
[incubator-pinot] branch master updated: [TE] meta - rename base
package to match new apache pinot convention (#3678)
This is an automated email from the ASF dual-hosted git repository.
apucher pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 39130c3 [TE] meta - rename base package to match new apache pinot convention (#3678)
39130c3 is described below
commit 39130c3021ce892c42d202b4c79f760bb19373fa
Author: Alexander Pucher <ap...@linkedin.com>
AuthorDate: Fri Jan 11 15:39:17 2019 -0800
[TE] meta - rename base package to match new apache pinot convention (#3678)
This PR updates the base package name in thirdeye from "com.linkedin" to "org.apache.pinot" to match Apache Pinot convention. It makes additional smaller changes to dependencies to maintain compatibility.
---
pom.xml | 1 +
thirdeye/pom.xml | 17 +-
.../app/pods/auto-onboard/controller.js | 6 +-
thirdeye/thirdeye-frontend/pom.xml | 4 +-
thirdeye/thirdeye-hadoop/pom.xml | 8 +-
.../com/linkedin/thirdeye/hadoop/ThirdEyeJob.java | 455 ---
.../thirdeye/hadoop/ThirdEyeJobProperties.java | 42 -
.../hadoop/aggregation/AggregationPhaseConfig.java | 109 -
.../aggregation/AggregationPhaseConstants.java | 37 -
.../hadoop/aggregation/AggregationPhaseJob.java | 360 --
.../aggregation/AggregationPhaseMapOutputKey.java | 111 -
.../AggregationPhaseMapOutputValue.java | 92 -
.../hadoop/backfill/BackfillControllerAPIs.java | 251 --
.../hadoop/backfill/BackfillPhaseConstants.java | 40 -
.../thirdeye/hadoop/backfill/BackfillPhaseJob.java | 203 -
.../hadoop/backfill/BackfillPhaseMapJob.java | 164 -
.../thirdeye/hadoop/config/DimensionSpec.java | 58 -
.../thirdeye/hadoop/config/DimensionType.java | 205 -
.../thirdeye/hadoop/config/MetricSpec.java | 57 -
.../thirdeye/hadoop/config/MetricType.java | 175 -
.../linkedin/thirdeye/hadoop/config/SplitSpec.java | 48 -
.../thirdeye/hadoop/config/ThirdEyeConfig.java | 479 ---
.../hadoop/config/ThirdEyeConfigProperties.java | 105 -
.../thirdeye/hadoop/config/ThirdEyeConstants.java | 40 -
.../thirdeye/hadoop/config/TimeGranularity.java | 102 -
.../linkedin/thirdeye/hadoop/config/TimeSpec.java | 60 -
.../hadoop/config/TopKDimensionToMetricsSpec.java | 60 -
.../thirdeye/hadoop/config/TopkWhitelistSpec.java | 77 -
.../DerivedColumnTransformationPhaseConfig.java | 135 -
.../DerivedColumnTransformationPhaseConstants.java | 38 -
.../DerivedColumnTransformationPhaseJob.java | 403 --
.../thirdeye/hadoop/join/DefaultJoinConfigUDF.java | 31 -
.../hadoop/join/DefaultJoinKeyExtractor.java | 54 -
.../hadoop/join/DelegatingAvroKeyInputFormat.java | 77 -
.../thirdeye/hadoop/join/GenericJoinUDF.java | 85 -
.../thirdeye/hadoop/join/GenericJoinUDFConfig.java | 100 -
.../thirdeye/hadoop/join/JoinConfigUDF.java | 31 -
.../thirdeye/hadoop/join/JoinKeyExtractor.java | 32 -
.../thirdeye/hadoop/join/JoinPhaseConstants.java | 42 -
.../thirdeye/hadoop/join/JoinPhaseJob.java | 394 --
.../com/linkedin/thirdeye/hadoop/join/JoinUDF.java | 39 -
.../thirdeye/hadoop/join/MapOutputKey.java | 21 -
.../thirdeye/hadoop/join/MapOutputValue.java | 103 -
.../hadoop/push/DefaultSegmentPushUDF.java | 33 -
.../push/DeleteOverlappingSegmentsInPinot.java | 145 -
.../hadoop/push/SegmentPushControllerAPIs.java | 225 --
.../thirdeye/hadoop/push/SegmentPushPhase.java | 178 -
.../hadoop/push/SegmentPushPhaseConstants.java | 40 -
.../thirdeye/hadoop/push/SegmentPushUDF.java | 25 -
.../creation/SegmentCreationPhaseConstants.java | 41 -
.../segment/creation/SegmentCreationPhaseJob.java | 223 --
.../creation/SegmentCreationPhaseMapReduceJob.java | 320 --
.../hadoop/topk/DimensionValueMetricPair.java | 60 -
.../thirdeye/hadoop/topk/TopKDimensionValues.java | 75 -
.../thirdeye/hadoop/topk/TopKPhaseConfig.java | 159 -
.../thirdeye/hadoop/topk/TopKPhaseConstants.java | 36 -
.../thirdeye/hadoop/topk/TopKPhaseJob.java | 463 ---
.../hadoop/topk/TopKPhaseMapOutputKey.java | 112 -
.../hadoop/topk/TopKPhaseMapOutputValue.java | 93 -
.../transform/DefaultTransformConfigUDF.java | 31 -
.../hadoop/transform/DefaultTransformUDF.java | 44 -
.../transform/DelegatingAvroKeyInputFormat.java | 77 -
.../hadoop/transform/TransformConfigUDF.java | 31 -
.../hadoop/transform/TransformPhaseJob.java | 289 --
.../transform/TransformPhaseJobConstants.java | 39 -
.../thirdeye/hadoop/transform/TransformUDF.java | 38 -
.../hadoop/util/ThirdeyeAggregateMetricUtils.java | 58 -
.../thirdeye/hadoop/util/ThirdeyeAvroUtils.java | 267 --
.../hadoop/util/ThirdeyePinotSchemaUtils.java | 127 -
.../thirdeye/hadoop/wait/DefaultWaitUDF.java | 44 -
.../thirdeye/hadoop/wait/WaitPhaseJob.java | 68 -
.../hadoop/wait/WaitPhaseJobConstants.java | 34 -
.../com/linkedin/thirdeye/hadoop/wait/WaitUDF.java | 39 -
.../apache/pinot/thirdeye/hadoop/ThirdEyeJob.java | 455 +++
.../thirdeye/hadoop/ThirdEyeJobProperties.java | 42 +
.../hadoop/aggregation/AggregationPhaseConfig.java | 109 +
.../aggregation/AggregationPhaseConstants.java | 37 +
.../hadoop/aggregation/AggregationPhaseJob.java | 360 ++
.../aggregation/AggregationPhaseMapOutputKey.java | 111 +
.../AggregationPhaseMapOutputValue.java | 92 +
.../hadoop/backfill/BackfillControllerAPIs.java | 251 ++
.../hadoop/backfill/BackfillPhaseConstants.java | 40 +
.../thirdeye/hadoop/backfill/BackfillPhaseJob.java | 203 +
.../hadoop/backfill/BackfillPhaseMapJob.java | 164 +
.../thirdeye/hadoop/config/DimensionSpec.java | 58 +
.../thirdeye/hadoop/config/DimensionType.java | 205 +
.../pinot/thirdeye/hadoop/config/MetricSpec.java | 57 +
.../pinot/thirdeye/hadoop/config/MetricType.java | 175 +
.../pinot/thirdeye/hadoop/config/SplitSpec.java | 48 +
.../thirdeye/hadoop/config/ThirdEyeConfig.java | 479 +++
.../hadoop/config/ThirdEyeConfigProperties.java | 105 +
.../thirdeye/hadoop/config/ThirdEyeConstants.java | 40 +
.../thirdeye/hadoop/config/TimeGranularity.java | 102 +
.../pinot/thirdeye/hadoop/config/TimeSpec.java | 60 +
.../hadoop/config/TopKDimensionToMetricsSpec.java | 60 +
.../thirdeye/hadoop/config/TopkWhitelistSpec.java | 77 +
.../DerivedColumnTransformationPhaseConfig.java | 135 +
.../DerivedColumnTransformationPhaseConstants.java | 38 +
.../DerivedColumnTransformationPhaseJob.java | 403 ++
.../thirdeye/hadoop/join/DefaultJoinConfigUDF.java | 31 +
.../hadoop/join/DefaultJoinKeyExtractor.java | 54 +
.../hadoop/join/DelegatingAvroKeyInputFormat.java | 77 +
.../pinot/thirdeye/hadoop/join/GenericJoinUDF.java | 85 +
.../thirdeye/hadoop/join/GenericJoinUDFConfig.java | 100 +
.../pinot/thirdeye/hadoop/join/JoinConfigUDF.java | 31 +
.../thirdeye/hadoop/join/JoinKeyExtractor.java | 32 +
.../thirdeye/hadoop/join/JoinPhaseConstants.java | 42 +
.../pinot/thirdeye/hadoop/join/JoinPhaseJob.java | 394 ++
.../apache/pinot/thirdeye/hadoop/join/JoinUDF.java | 39 +
.../pinot/thirdeye/hadoop/join/MapOutputKey.java | 21 +
.../pinot/thirdeye/hadoop/join/MapOutputValue.java | 103 +
.../hadoop/push/DefaultSegmentPushUDF.java | 33 +
.../push/DeleteOverlappingSegmentsInPinot.java | 145 +
.../hadoop/push/SegmentPushControllerAPIs.java | 225 ++
.../thirdeye/hadoop/push/SegmentPushPhase.java | 178 +
.../hadoop/push/SegmentPushPhaseConstants.java | 40 +
.../pinot/thirdeye/hadoop/push/SegmentPushUDF.java | 25 +
.../creation/SegmentCreationPhaseConstants.java | 41 +
.../segment/creation/SegmentCreationPhaseJob.java | 223 ++
.../creation/SegmentCreationPhaseMapReduceJob.java | 320 ++
.../hadoop/topk/DimensionValueMetricPair.java | 60 +
.../thirdeye/hadoop/topk/TopKDimensionValues.java | 75 +
.../thirdeye/hadoop/topk/TopKPhaseConfig.java | 159 +
.../thirdeye/hadoop/topk/TopKPhaseConstants.java | 36 +
.../pinot/thirdeye/hadoop/topk/TopKPhaseJob.java | 463 +++
.../hadoop/topk/TopKPhaseMapOutputKey.java | 112 +
.../hadoop/topk/TopKPhaseMapOutputValue.java | 93 +
.../transform/DefaultTransformConfigUDF.java | 31 +
.../hadoop/transform/DefaultTransformUDF.java | 44 +
.../transform/DelegatingAvroKeyInputFormat.java | 77 +
.../hadoop/transform/TransformConfigUDF.java | 31 +
.../hadoop/transform/TransformPhaseJob.java | 289 ++
.../transform/TransformPhaseJobConstants.java | 39 +
.../thirdeye/hadoop/transform/TransformUDF.java | 38 +
.../hadoop/util/ThirdeyeAggregateMetricUtils.java | 58 +
.../thirdeye/hadoop/util/ThirdeyeAvroUtils.java | 267 ++
.../hadoop/util/ThirdeyePinotSchemaUtils.java | 127 +
.../pinot/thirdeye/hadoop/wait/DefaultWaitUDF.java | 44 +
.../pinot/thirdeye/hadoop/wait/WaitPhaseJob.java | 68 +
.../hadoop/wait/WaitPhaseJobConstants.java | 34 +
.../apache/pinot/thirdeye/hadoop/wait/WaitUDF.java | 39 +
.../src/main/resources/sample-join-config.yml | 6 +-
.../src/main/resources/sample-transform-config.yml | 4 +-
.../hadoop/aggregation/AggregationPhaseTest.java | 265 --
.../thirdeye/hadoop/config/ThirdEyeConfigTest.java | 249 --
.../DerivedColumnNoTransformationTest.java | 325 --
.../DerivedColumnTransformationTest.java | 323 --
.../DerivedSchemaGenerationTest.java | 74 -
.../hadoop/push/SegmentPushControllerAPIsTest.java | 64 -
.../hadoop/topk/DimensionValueMetricPairTest.java | 46 -
.../thirdeye/hadoop/topk/TopkPhaseTest.java | 238 --
.../util/ThirdeyeAggregateMetricUtilsTest.java | 84 -
.../hadoop/util/ThirdeyeAvroUtilsTest.java | 96 -
.../hadoop/util/ThirdeyePinotSchemaUtilsTest.java | 74 -
.../hadoop/aggregation/AggregationPhaseTest.java | 265 ++
.../thirdeye/hadoop/config/ThirdEyeConfigTest.java | 249 ++
.../DerivedColumnNoTransformationTest.java | 325 ++
.../DerivedColumnTransformationTest.java | 323 ++
.../DerivedSchemaGenerationTest.java | 74 +
.../hadoop/push/SegmentPushControllerAPIsTest.java | 64 +
.../hadoop/topk/DimensionValueMetricPairTest.java | 46 +
.../pinot/thirdeye/hadoop/topk/TopkPhaseTest.java | 238 ++
.../util/ThirdeyeAggregateMetricUtilsTest.java | 84 +
.../hadoop/util/ThirdeyeAvroUtilsTest.java | 96 +
.../hadoop/util/ThirdeyePinotSchemaUtilsTest.java | 74 +
thirdeye/thirdeye-pinot/config/dashboard.yml | 14 +-
.../config/data-sources/data-sources-config.yml | 2 +-
thirdeye/thirdeye-pinot/config/detector.yml | 2 +-
thirdeye/thirdeye-pinot/config/rca.yml | 28 +-
thirdeye/thirdeye-pinot/pom.xml | 17 +-
.../thirdeye/alert/commons/AnomalyFeedConfig.java | 96 -
.../thirdeye/alert/commons/AnomalyFeedFactory.java | 34 -
.../alert/commons/AnomalyFetcherConfig.java | 59 -
.../alert/commons/AnomalyFetcherFactory.java | 34 -
.../alert/commons/AnomalyNotifiedStatus.java | 48 -
.../thirdeye/alert/commons/AnomalySource.java | 50 -
.../commons/EmailContentFormatterFactory.java | 34 -
.../thirdeye/alert/commons/EmailEntity.java | 74 -
.../alert/content/BaseEmailContentFormatter.java | 885 ----
.../alert/content/EmailContentFormatter.java | 62 -
.../EmailContentFormatterConfiguration.java | 128 -
.../content/EmailContentFormatterContext.java | 64 -
...HierarchicalAnomaliesEmailContentFormatter.java | 202 -
.../MultipleAnomaliesEmailContentFormatter.java | 195 -
...nboardingNotificationEmailContentFormatter.java | 106 -
.../linkedin/thirdeye/alert/feed/AnomalyFeed.java | 50 -
.../thirdeye/alert/feed/UnionAnomalyFeed.java | 136 -
.../thirdeye/alert/fetcher/AnomalyFetcher.java | 43 -
.../thirdeye/alert/fetcher/BaseAnomalyFetcher.java | 69 -
.../alert/fetcher/ContinuumAnomalyFetcher.java | 110 -
.../alert/fetcher/UnnotifiedAnomalyFetcher.java | 107 -
.../anomaly/HolidayEventsLoaderConfiguration.java | 94 -
.../thirdeye/anomaly/SmtpConfiguration.java | 109 -
.../anomaly/ThirdEyeAnomalyApplication.java | 253 --
.../anomaly/ThirdEyeAnomalyConfiguration.java | 214 -
.../thirdeye/anomaly/alert/AlertJobContext.java | 48 -
.../thirdeye/anomaly/alert/AlertJobResource.java | 89 -
.../thirdeye/anomaly/alert/AlertTaskInfo.java | 113 -
.../anomaly/alert/grouping/AlertGroupKey.java | 109 -
.../anomaly/alert/grouping/AlertGrouper.java | 43 -
.../alert/grouping/AlertGrouperFactory.java | 69 -
.../anomaly/alert/grouping/BaseAlertGrouper.java | 33 -
.../alert/grouping/DimensionalAlertGrouper.java | 142 -
.../anomaly/alert/grouping/DummyAlertGrouper.java | 43 -
.../HorizontalDimensionalAlertGrouper.java | 100 -
.../alert/grouping/SimpleGroupedAnomalyMerger.java | 65 -
.../AlertGroupAuxiliaryInfoProvider.java | 49 -
.../AlertGroupRecipientProviderFactory.java | 165 -
.../AuxiliaryAlertGroupInfo.java | 96 -
.../BaseAlertGroupAuxiliaryInfoProvider.java | 35 -
...sionalAlertGroupAuxiliaryRecipientProvider.java | 77 -
.../DummyAlertGroupAuxiliaryInfoProvider.java | 31 -
.../alert/grouping/filter/AlertGroupFilter.java | 46 -
.../grouping/filter/AlertGroupFilterFactory.java | 67 -
.../grouping/filter/BaseAlertGroupFilter.java | 33 -
.../grouping/filter/DummyAlertGroupFilter.java | 34 -
.../filter/SizeSeverityAlertGroupFilter.java | 100 -
.../alert/template/pojo/MetricDimensionReport.java | 134 -
.../anomaly/alert/util/AlertFilterHelper.java | 79 -
.../anomaly/alert/util/AnomalyReportGenerator.java | 599 ---
.../anomaly/alert/util/DataReportHelper.java | 276 --
.../thirdeye/anomaly/alert/util/EmailHelper.java | 285 --
.../anomaly/alert/util/EmailScreenshotHelper.java | 120 -
.../anomaly/alert/v2/AlertJobRunnerV2.java | 157 -
.../anomaly/alert/v2/AlertJobSchedulerV2.java | 249 --
.../anomaly/alert/v2/AlertTaskRunnerV2.java | 637 ---
.../classification/ClassificationJobConfig.java | 47 -
.../classification/ClassificationJobContext.java | 70 -
.../classification/ClassificationJobResource.java | 23 -
.../classification/ClassificationJobRunner.java | 126 -
.../classification/ClassificationJobScheduler.java | 186 -
.../classification/ClassificationTaskInfo.java | 73 -
.../classification/ClassificationTaskRunner.java | 438 --
.../classifier/AnomalyClassifier.java | 45 -
.../classifier/AnomalyClassifierFactory.java | 152 -
.../classifier/BaseAnomalyClassifier.java | 32 -
.../classifier/DummyAnomalyClassifier.java | 36 -
.../detection/AnomalyDetectionInputContext.java | 71 -
.../AnomalyDetectionInputContextBuilder.java | 704 ----
.../detection/AnomalyDetectionOutputContext.java | 37 -
.../anomaly/detection/DetectionJobContext.java | 80 -
.../anomaly/detection/DetectionJobRunner.java | 182 -
.../anomaly/detection/DetectionJobScheduler.java | 613 ---
.../detection/DetectionJobSchedulerUtils.java | 232 --
.../anomaly/detection/DetectionTaskInfo.java | 138 -
.../anomaly/detection/DetectionTaskRunner.java | 460 ---
.../anomaly/detection/lib/AutotuneMethodType.java | 24 -
.../detection/lib/FunctionReplayRunnable.java | 295 --
.../thirdeye/anomaly/events/EventDataProvider.java | 28 -
.../thirdeye/anomaly/events/EventFilter.java | 182 -
.../thirdeye/anomaly/events/EventType.java | 24 -
.../events/HistoricalAnomalyEventProvider.java | 87 -
.../anomaly/events/HolidayEventProvider.java | 53 -
.../anomaly/events/HolidayEventResource.java | 60 -
.../anomaly/events/HolidayEventsLoader.java | 424 --
.../thirdeye/anomaly/job/JobConstants.java | 33 -
.../linkedin/thirdeye/anomaly/job/JobContext.java | 97 -
.../linkedin/thirdeye/anomaly/job/JobRunner.java | 31 -
.../thirdeye/anomaly/job/JobScheduler.java | 39 -
.../thirdeye/anomaly/merge/AnomalyMergeConfig.java | 92 -
.../anomaly/merge/AnomalyMergeStrategy.java | 25 -
.../anomaly/merge/AnomalyTimeBasedSummarizer.java | 171 -
.../anomaly/merge/TimeBasedAnomalyMerger.java | 314 --
.../anomaly/monitor/MonitorConfiguration.java | 71 -
.../thirdeye/anomaly/monitor/MonitorConstants.java | 38 -
.../anomaly/monitor/MonitorJobContext.java | 38 -
.../thirdeye/anomaly/monitor/MonitorJobRunner.java | 128 -
.../anomaly/monitor/MonitorJobScheduler.java | 72 -
.../thirdeye/anomaly/monitor/MonitorTaskInfo.java | 111 -
.../anomaly/monitor/MonitorTaskRunner.java | 220 -
.../anomaly/onboard/DetectionOnboardResource.java | 184 -
.../thirdeye/anomaly/onboard/ReplayTaskInfo.java | 45 -
.../thirdeye/anomaly/onboard/ReplayTaskRunner.java | 146 -
.../onboard/framework/BaseDetectionOnboardJob.java | 43 -
.../framework/BaseDetectionOnboardTask.java | 50 -
.../framework/DetectionOnBoardJobRunner.java | 184 -
.../DetectionOnboardExecutionContext.java | 54 -
.../onboard/framework/DetectionOnboardJob.java | 50 -
.../framework/DetectionOnboardJobContext.java | 113 -
.../framework/DetectionOnboardJobStatus.java | 93 -
.../onboard/framework/DetectionOnboardTask.java | 50 -
.../framework/DetectionOnboardTaskContext.java | 51 -
.../framework/DetectionOnboardTaskRunner.java | 55 -
.../framework/DetectionOnboardTaskStatus.java | 69 -
.../tasks/AlertFilterAutoTuneOnboardingTask.java | 127 -
.../tasks/DataPreparationOnboardingTask.java | 79 -
.../onboard/tasks/DefaultDetectionOnboardJob.java | 322 --
.../tasks/FunctionCreationOnboardingTask.java | 365 --
.../tasks/FunctionReplayOnboardingTask.java | 129 -
.../onboard/tasks/NotificationOnboardingTask.java | 164 -
.../onboard/utils/FunctionCreationUtils.java | 52 -
.../anomaly/onboard/utils/PropertyCheckUtils.java | 50 -
.../anomaly/override/OverrideConfigHelper.java | 228 --
.../thirdeye/anomaly/task/TaskConstants.java | 45 -
.../thirdeye/anomaly/task/TaskContext.java | 64 -
.../linkedin/thirdeye/anomaly/task/TaskDriver.java | 236 --
.../anomaly/task/TaskDriverConfiguration.java | 68 -
.../thirdeye/anomaly/task/TaskGenerator.java | 127 -
.../linkedin/thirdeye/anomaly/task/TaskInfo.java | 27 -
.../thirdeye/anomaly/task/TaskInfoFactory.java | 87 -
.../linkedin/thirdeye/anomaly/task/TaskResult.java | 24 -
.../linkedin/thirdeye/anomaly/task/TaskRunner.java | 32 -
.../thirdeye/anomaly/task/TaskRunnerFactory.java | 71 -
.../anomaly/utils/AbstractResourceHttpUtils.java | 84 -
.../anomaly/utils/AlertResourceHttpUtils.java | 60 -
.../thirdeye/anomaly/utils/AnomalyUtils.java | 164 -
.../anomaly/utils/DetectionResourceHttpUtils.java | 144 -
.../thirdeye/anomaly/utils/EmailUtils.java | 80 -
.../anomaly/utils/OnboardResourceHttpUtils.java | 54 -
.../anomaly/utils/ThirdeyeMetricsUtil.java | 156 -
.../anomaly/views/AnomalyTimelinesView.java | 113 -
.../views/CondensedAnomalyTimelinesView.java | 268 --
.../anomalydetection/AnomalyDetectionUtils.java | 76 -
.../alertFilterAutotune/AlertFilterAutoTune.java | 35 -
.../AlertFilterAutotuneFactory.java | 95 -
.../BaseAlertFilterAutoTune.java | 78 -
.../DummyAlertFilterAutoTune.java | 35 -
.../context/AnomalyDetectionContext.java | 186 -
.../anomalydetection/context/AnomalyFeedback.java | 48 -
.../anomalydetection/context/AnomalyResult.java | 133 -
.../anomalydetection/context/MetricTimeSeries.java | 63 -
.../anomalydetection/context/RawAnomalyResult.java | 157 -
.../anomalydetection/context/TimeSeries.java | 167 -
.../anomalydetection/context/TimeSeriesKey.java | 61 -
.../datafilter/AverageThresholdDataFilter.java | 241 --
.../datafilter/BaseDataFilter.java | 32 -
.../anomalydetection/datafilter/DataFilter.java | 57 -
.../datafilter/DataFilterFactory.java | 67 -
.../datafilter/DummyDataFilter.java | 41 -
.../AbstractModularizedAnomalyFunction.java | 369 --
.../function/AnomalyDetectionFunction.java | 93 -
.../function/BackwardAnomalyFunctionUtils.java | 146 -
.../function/MinMaxThresholdFunction.java | 104 -
.../ModularizedAnomalyFunctionModelProvider.java | 66 -
.../function/WeekOverWeekRuleFunction.java | 176 -
.../model/data/AbstractDataModel.java | 36 -
.../anomalydetection/model/data/DataModel.java | 57 -
.../anomalydetection/model/data/NoopDataModel.java | 40 -
.../model/data/SeasonalDataModel.java | 82 -
.../model/detection/AbstractDetectionModel.java | 36 -
.../model/detection/DetectionModel.java | 49 -
.../detection/MinMaxThresholdDetectionModel.java | 99 -
.../model/detection/NoopDetectionModel.java | 32 -
.../detection/SimpleThresholdDetectionModel.java | 124 -
.../model/merge/AbstractMergeModel.java | 36 -
.../anomalydetection/model/merge/MergeModel.java | 48 -
.../model/merge/MinMaxThresholdMergeModel.java | 93 -
.../model/merge/NoPredictionMergeModel.java | 27 -
.../model/merge/NoopMergeModel.java | 31 -
.../model/merge/SimplePercentageMergeModel.java | 106 -
.../model/prediction/AbstractPredictionModel.java | 36 -
.../ExpectedTimeSeriesPredictionModel.java | 31 -
.../model/prediction/NoopPredictionModel.java | 31 -
.../model/prediction/PredictionModel.java | 47 -
.../prediction/SeasonalAveragePredictionModel.java | 109 -
.../transform/AbstractTransformationFunction.java | 36 -
.../model/transform/AnomalyRemovalFunction.java | 206 -
.../transform/MovingAverageSmoothingFunction.java | 98 -
.../TotalCountThresholdRemovalFunction.java | 70 -
.../model/transform/TransformationFunction.java | 54 -
.../model/transform/ZeroRemovalFunction.java | 53 -
.../AnomalyPercentagePerformanceEvaluation.java | 57 -
.../BasePerformanceEvaluate.java | 61 -
.../F1ScoreByTimePerformanceEvaluation.java | 46 -
.../performanceEvaluation/PerformanceEvaluate.java | 24 -
.../PerformanceEvaluateHelper.java | 69 -
.../PerformanceEvaluationMethod.java | 24 -
.../PrecisionByTimePerformanceEvaluation.java | 85 -
.../RecallByTimePreformanceEvaluation.java | 91 -
.../java/com/linkedin/thirdeye/api/Constants.java | 34 -
.../com/linkedin/thirdeye/api/DimensionKey.java | 206 -
.../com/linkedin/thirdeye/api/DimensionMap.java | 330 --
.../com/linkedin/thirdeye/api/DimensionSpec.java | 75 -
.../com/linkedin/thirdeye/api/DimensionType.java | 25 -
.../com/linkedin/thirdeye/api/MetricSchema.java | 122 -
.../java/com/linkedin/thirdeye/api/MetricSpec.java | 62 -
.../linkedin/thirdeye/api/MetricTimeSeries.java | 386 --
.../java/com/linkedin/thirdeye/api/MetricType.java | 89 -
.../com/linkedin/thirdeye/api/TimeGranularity.java | 205 -
.../java/com/linkedin/thirdeye/api/TimeRange.java | 92 -
.../java/com/linkedin/thirdeye/api/TimeSpec.java | 58 -
.../com/linkedin/thirdeye/auth/Credentials.java | 61 -
.../linkedin/thirdeye/auth/ThirdEyeAuthFilter.java | 134 -
.../auth/ThirdEyeAuthenticatorDisabled.java | 48 -
.../thirdeye/auth/ThirdEyeAuthenticatorLdap.java | 214 -
.../linkedin/thirdeye/auth/ThirdEyePrincipal.java | 56 -
.../thirdeye/auto/onboard/AutoOnboard.java | 48 -
.../auto/onboard/AutoOnboardConfiguration.java | 39 -
.../onboard/AutoOnboardPinotMetadataSource.java | 435 --
.../auto/onboard/AutoOnboardPinotMetricsUtils.java | 229 --
.../thirdeye/auto/onboard/AutoOnboardService.java | 80 -
.../thirdeye/auto/onboard/AutoOnboardUtility.java | 88 -
.../thirdeye/auto/onboard/ConfigGenerator.java | 118 -
.../linkedin/thirdeye/client/diffsummary/Cube.java | 570 ---
.../client/diffsummary/DimNameValueCostEntry.java | 108 -
.../client/diffsummary/DimensionValues.java | 110 -
.../thirdeye/client/diffsummary/Dimensions.java | 121 -
.../thirdeye/client/diffsummary/HierarchyNode.java | 296 --
.../diffsummary/MultiDimensionalSummary.java | 115 -
.../MultiDimensionalSummaryCLITool.java | 257 --
.../client/diffsummary/OLAPDataBaseClient.java | 50 -
.../diffsummary/PinotThirdEyeSummaryClient.java | 278 --
.../linkedin/thirdeye/client/diffsummary/Row.java | 105 -
.../costfunctions/BalancedCostFunction.java | 138 -
.../costfunctions/ChangeRatioCostFunction.java | 58 -
.../ContributionToOverallChangeCostFunction.java | 61 -
.../diffsummary/costfunctions/CostFunction.java | 25 -
.../com/linkedin/thirdeye/common/BaseFactory.java | 32 -
.../thirdeye/common/BaseThirdEyeApplication.java | 84 -
.../thirdeye/common/ThirdEyeConfiguration.java | 181 -
.../thirdeye/common/ThirdEyeSwaggerBundle.java | 35 -
.../checker/DataCompletenessAlgorithm.java | 94 -
.../checker/DataCompletenessAlgorithmFactory.java | 39 -
.../checker/DataCompletenessConstants.java | 41 -
.../checker/DataCompletenessJobContext.java | 55 -
.../checker/DataCompletenessJobRunner.java | 170 -
.../checker/DataCompletenessScheduler.java | 53 -
.../checker/DataCompletenessTaskInfo.java | 101 -
.../checker/DataCompletenessTaskRunner.java | 348 --
.../checker/DataCompletenessUtils.java | 273 --
.../checker/PercentCompletenessFunctionInput.java | 88 -
.../checker/Wo4WAvgDataCompletenessAlgorithm.java | 164 -
.../linkedin/thirdeye/config/ConfigNamespace.java | 122 -
.../thirdeye/constant/AnomalyFeedbackType.java | 50 -
.../thirdeye/constant/AnomalyResultSource.java | 26 -
.../thirdeye/constant/MetricAggFunction.java | 30 -
.../thirdeye/dashboard/DetectorHttpUtils.java | 78 -
.../thirdeye/dashboard/HandlebarsHelperBundle.java | 87 -
.../thirdeye/dashboard/HandlebarsViewRenderer.java | 109 -
.../linkedin/thirdeye/dashboard/HelperBundle.java | 31 -
.../thirdeye/dashboard/RootCauseConfiguration.java | 55 -
.../dashboard/ThirdEyeDashboardApplication.java | 333 --
.../dashboard/ThirdEyeDashboardConfiguration.java | 57 -
.../com/linkedin/thirdeye/dashboard/Utils.java | 246 --
.../com/linkedin/thirdeye/dashboard/ViewType.java | 27 -
.../dashboard/configs/AuthConfiguration.java | 117 -
.../dashboard/configs/ResourceConfiguration.java | 37 -
.../dashboard/resources/AdminResource.java | 46 -
.../dashboard/resources/AnomalyResource.java | 946 -----
.../dashboard/resources/AutoOnboardResource.java | 74 -
.../dashboard/resources/CacheResource.java | 156 -
.../resources/CustomizedEventResource.java | 82 -
.../dashboard/resources/DashboardResource.java | 245 --
.../dashboard/resources/DatasetConfigResource.java | 118 -
.../dashboard/resources/DetectionJobResource.java | 1308 ------
.../dashboard/resources/EmailResource.java | 598 ---
.../dashboard/resources/EntityManagerResource.java | 266 --
.../dashboard/resources/EntityMappingResource.java | 246 --
.../dashboard/resources/MetricConfigResource.java | 297 --
.../resources/OnboardDatasetMetricResource.java | 118 -
.../dashboard/resources/OnboardResource.java | 680 ----
.../dashboard/resources/SummaryResource.java | 182 -
.../dashboard/resources/ThirdEyeResource.java | 42 -
.../dashboard/resources/v2/AnomaliesResource.java | 1322 ------
.../dashboard/resources/v2/AuthResource.java | 155 -
.../resources/v2/BaselineParsingUtils.java | 194 -
.../dashboard/resources/v2/ConfigResource.java | 113 -
.../dashboard/resources/v2/DataResource.java | 578 ---
.../resources/v2/DetectionAlertResource.java | 105 -
.../dashboard/resources/v2/ResourceUtils.java | 372 --
.../resources/v2/RootCauseEntityFormatter.java | 44 -
.../v2/RootCauseEventEntityFormatter.java | 63 -
.../resources/v2/RootCauseMetricResource.java | 673 ----
.../dashboard/resources/v2/RootCauseResource.java | 213 -
.../resources/v2/RootCauseSessionResource.java | 260 --
.../dashboard/resources/v2/TimeSeriesResource.java | 435 --
.../resources/v2/UserDashboardResource.java | 424 --
.../resources/v2/pojo/AnomaliesSummary.java | 75 -
.../resources/v2/pojo/AnomaliesWrapper.java | 74 -
.../v2/pojo/AnomalyClassificationType.java | 28 -
.../resources/v2/pojo/AnomalyDataCompare.java | 97 -
.../resources/v2/pojo/AnomalyDetails.java | 294 --
.../resources/v2/pojo/AnomalySummary.java | 197 -
.../dashboard/resources/v2/pojo/MetricSummary.java | 108 -
.../resources/v2/pojo/RootCauseEntity.java | 114 -
.../resources/v2/pojo/RootCauseEventEntity.java | 64 -
.../dashboard/resources/v2/pojo/SearchFilters.java | 302 --
.../v2/pojo/TimeSeriesCompareMetricView.java | 127 -
.../resources/v2/pojo/ValuesContainer.java | 79 -
.../dashboard/resources/v2/pojo/WowSummary.java | 37 -
.../v2/rootcause/AnomalyEventFormatter.java | 216 -
.../v2/rootcause/DefaultEntityFormatter.java | 43 -
.../v2/rootcause/DefaultEventEntityFormatter.java | 39 -
.../v2/rootcause/DimensionEntityFormatter.java | 42 -
.../resources/v2/rootcause/FormatterLoader.java | 29 -
.../resources/v2/rootcause/HyperlinkFormatter.java | 41 -
.../v2/rootcause/MetricEntityFormatter.java | 168 -
.../v2/rootcause/ServiceEntityFormatter.java | 41 -
.../v2/rootcause/ThirdEyeEventFormatter.java | 69 -
.../dashboard/views/CompareViewRequest.java | 128 -
.../thirdeye/dashboard/views/DashboardView.java | 31 -
.../thirdeye/dashboard/views/DefaultView.java | 30 -
.../thirdeye/dashboard/views/GenericResponse.java | 164 -
.../dashboard/views/ThirdEyeAdminView.java | 30 -
.../thirdeye/dashboard/views/ThirdEyeView.java | 30 -
.../thirdeye/dashboard/views/TimeBucket.java | 115 -
.../thirdeye/dashboard/views/ViewHandler.java | 26 -
.../thirdeye/dashboard/views/ViewRequest.java | 33 -
.../thirdeye/dashboard/views/ViewResponse.java | 24 -
.../views/contributor/ContributionCell.java | 183 -
.../views/contributor/ContributionViewTable.java | 48 -
.../contributor/ContributionViewTableBuilder.java | 142 -
.../views/contributor/ContributorViewHandler.java | 331 --
.../views/contributor/ContributorViewRequest.java | 27 -
.../views/contributor/ContributorViewResponse.java | 109 -
.../views/diffsummary/BaseResponseRow.java | 28 -
.../dashboard/views/diffsummary/DPArray.java | 101 -
.../dashboard/views/diffsummary/Summary.java | 443 --
.../diffsummary/SummaryGainerLoserResponseRow.java | 38 -
.../views/diffsummary/SummaryResponse.java | 296 --
.../views/diffsummary/SummaryResponseRow.java | 50 -
.../views/diffsummary/SummaryResponseTree.java | 243 --
.../thirdeye/dashboard/views/heatmap/HeatMap.java | 155 -
.../dashboard/views/heatmap/HeatMapCell.java | 146 -
.../views/heatmap/HeatMapViewHandler.java | 335 --
.../views/heatmap/HeatMapViewRequest.java | 26 -
.../views/heatmap/HeatMapViewResponse.java | 253 --
.../views/tabular/TabularViewHandler.java | 268 --
.../views/tabular/TabularViewRequest.java | 28 -
.../views/tabular/TabularViewResponse.java | 71 -
.../linkedin/thirdeye/dataframe/BooleanSeries.java | 961 -----
.../com/linkedin/thirdeye/dataframe/DataFrame.java | 2658 ------------
.../linkedin/thirdeye/dataframe/DoubleSeries.java | 1207 ------
.../com/linkedin/thirdeye/dataframe/Grouping.java | 1193 ------
.../linkedin/thirdeye/dataframe/LongSeries.java | 917 -----
.../linkedin/thirdeye/dataframe/ObjectSeries.java | 855 ----
.../thirdeye/dataframe/PrimitiveMultimap.java | 245 --
.../com/linkedin/thirdeye/dataframe/Series.java | 1504 -------
.../linkedin/thirdeye/dataframe/StringSeries.java | 763 ----
.../linkedin/thirdeye/dataframe/TypedSeries.java | 98 -
.../dataframe/util/DataFrameSerializer.java | 87 -
.../thirdeye/dataframe/util/DataFrameUtils.java | 624 ---
.../thirdeye/dataframe/util/MetricSlice.java | 122 -
.../thirdeye/dataframe/util/RequestContainer.java | 46 -
.../dataframe/util/TimeSeriesRequestContainer.java | 53 -
.../linkedin/thirdeye/datalayer/ScriptRunner.java | 179 -
.../thirdeye/datalayer/bao/AbstractManager.java | 60 -
.../thirdeye/datalayer/bao/AlertConfigManager.java | 31 -
.../datalayer/bao/AlertSnapshotManager.java | 26 -
.../datalayer/bao/AnomalyFunctionManager.java | 46 -
.../thirdeye/datalayer/bao/ApplicationManager.java | 28 -
.../datalayer/bao/AutotuneConfigManager.java | 36 -
.../datalayer/bao/ClassificationConfigManager.java | 29 -
.../thirdeye/datalayer/bao/ConfigManager.java | 30 -
.../bao/DataCompletenessConfigManager.java | 39 -
.../datalayer/bao/DatasetConfigManager.java | 32 -
.../datalayer/bao/DetectionAlertConfigManager.java | 26 -
.../datalayer/bao/DetectionConfigManager.java | 27 -
.../datalayer/bao/DetectionStatusManager.java | 34 -
.../bao/EntityToEntityMappingManager.java | 36 -
.../thirdeye/datalayer/bao/EventManager.java | 29 -
.../bao/GroupedAnomalyResultsManager.java | 37 -
.../thirdeye/datalayer/bao/JobManager.java | 46 -
.../datalayer/bao/MergedAnomalyResultManager.java | 81 -
.../datalayer/bao/MetricConfigManager.java | 38 -
.../datalayer/bao/OnboardDatasetMetricManager.java | 38 -
.../datalayer/bao/OverrideConfigManager.java | 31 -
.../datalayer/bao/RawAnomalyResultManager.java | 26 -
.../datalayer/bao/RootcauseSessionManager.java | 36 -
.../thirdeye/datalayer/bao/SessionManager.java | 29 -
.../thirdeye/datalayer/bao/TaskManager.java | 51 -
.../datalayer/bao/jdbc/AbstractManagerImpl.java | 201 -
.../datalayer/bao/jdbc/AlertConfigManagerImpl.java | 100 -
.../bao/jdbc/AlertSnapshotManagerImpl.java | 33 -
.../bao/jdbc/AnomalyFunctionManagerImpl.java | 133 -
.../datalayer/bao/jdbc/ApplicationManagerImpl.java | 47 -
.../bao/jdbc/AutotuneConfigManagerImpl.java | 76 -
.../bao/jdbc/ClassificationConfigManagerImpl.java | 55 -
.../datalayer/bao/jdbc/ConfigManagerImpl.java | 63 -
.../jdbc/DataCompletenessConfigManagerImpl.java | 119 -
.../bao/jdbc/DatasetConfigManagerImpl.java | 63 -
.../bao/jdbc/DetectionAlertConfigManagerImpl.java | 33 -
.../bao/jdbc/DetectionConfigManagerImpl.java | 70 -
.../bao/jdbc/DetectionStatusManagerImpl.java | 67 -
.../bao/jdbc/EntityToEntityMappingManagerImpl.java | 82 -
.../datalayer/bao/jdbc/EventManagerImpl.java | 54 -
.../bao/jdbc/GroupedAnomalyResultsManagerImpl.java | 154 -
.../datalayer/bao/jdbc/JobManagerImpl.java | 162 -
.../bao/jdbc/MergedAnomalyResultManagerImpl.java | 505 ---
.../bao/jdbc/MetricConfigManagerImpl.java | 130 -
.../bao/jdbc/OnboardDatasetMetricManagerImpl.java | 102 -
.../bao/jdbc/OverrideConfigManagerImpl.java | 54 -
.../bao/jdbc/RawAnomalyResultManagerImpl.java | 116 -
.../bao/jdbc/RootcauseSessionManagerImpl.java | 116 -
.../datalayer/bao/jdbc/SessionManagerImpl.java | 44 -
.../datalayer/bao/jdbc/TaskManagerImpl.java | 190 -
.../thirdeye/datalayer/dao/GenericPojoDao.java | 831 ----
.../thirdeye/datalayer/dto/AbstractDTO.java | 67 -
.../thirdeye/datalayer/dto/AlertConfigDTO.java | 26 -
.../thirdeye/datalayer/dto/AlertSnapshotDTO.java | 146 -
.../thirdeye/datalayer/dto/AnomalyFeedbackDTO.java | 47 -
.../thirdeye/datalayer/dto/AnomalyFunctionDTO.java | 108 -
.../thirdeye/datalayer/dto/ApplicationDTO.java | 25 -
.../thirdeye/datalayer/dto/AutotuneConfigDTO.java | 78 -
.../datalayer/dto/ClassificationConfigDTO.java | 25 -
.../linkedin/thirdeye/datalayer/dto/ConfigDTO.java | 27 -
.../datalayer/dto/DataCompletenessConfigDTO.java | 26 -
.../thirdeye/datalayer/dto/DatasetConfigDTO.java | 49 -
.../datalayer/dto/DetectionAlertConfigDTO.java | 27 -
.../thirdeye/datalayer/dto/DetectionConfigDTO.java | 38 -
.../thirdeye/datalayer/dto/DetectionStatusDTO.java | 26 -
.../datalayer/dto/EntityToEntityMappingDTO.java | 26 -
.../linkedin/thirdeye/datalayer/dto/EventDTO.java | 26 -
.../datalayer/dto/GroupedAnomalyResultsDTO.java | 57 -
.../linkedin/thirdeye/datalayer/dto/JobDTO.java | 32 -
.../datalayer/dto/MergedAnomalyResultDTO.java | 114 -
.../thirdeye/datalayer/dto/MetricConfigDTO.java | 37 -
.../datalayer/dto/OnboardDatasetMetricDTO.java | 26 -
.../thirdeye/datalayer/dto/OverrideConfigDTO.java | 29 -
.../datalayer/dto/RawAnomalyResultDTO.java | 63 -
.../datalayer/dto/RootcauseSessionDTO.java | 27 -
.../thirdeye/datalayer/dto/SessionDTO.java | 28 -
.../linkedin/thirdeye/datalayer/dto/TaskDTO.java | 33 -
.../thirdeye/datalayer/entity/AbstractEntity.java | 101 -
.../datalayer/entity/AbstractIndexEntity.java | 33 -
.../datalayer/entity/AbstractJsonEntity.java | 44 -
.../datalayer/entity/AbstractMappingEntity.java | 36 -
.../datalayer/entity/AlertConfigIndex.java | 50 -
.../datalayer/entity/AlertSnapshotIndex.java | 24 -
.../datalayer/entity/AnomalyFeedbackIndex.java | 32 -
.../datalayer/entity/AnomalyFunctionIndex.java | 68 -
.../datalayer/entity/ApplicationIndex.java | 42 -
.../datalayer/entity/AutotuneConfigIndex.java | 77 -
.../entity/ClassificationConfigIndex.java | 41 -
.../thirdeye/datalayer/entity/ConfigIndex.java | 41 -
.../entity/DataCompletenessConfigIndex.java | 69 -
.../datalayer/entity/DatasetConfigIndex.java | 46 -
.../entity/DetectionAlertConfigIndex.java | 43 -
.../datalayer/entity/DetectionConfigIndex.java | 32 -
.../datalayer/entity/DetectionStatusIndex.java | 79 -
.../entity/EntityToEntityMappingIndex.java | 47 -
.../thirdeye/datalayer/entity/EventIndex.java | 79 -
.../datalayer/entity/GenericJsonEntity.java | 24 -
.../entity/GroupedAnomalyResultsIndex.java | 52 -
.../thirdeye/datalayer/entity/JobIndex.java | 82 -
.../datalayer/entity/MergedAnomalyResultIndex.java | 118 -
.../datalayer/entity/MetricConfigIndex.java | 59 -
.../entity/OnboardDatasetMetricIndex.java | 53 -
.../datalayer/entity/OverrideConfigIndex.java | 59 -
.../datalayer/entity/RawAnomalyResultIndex.java | 98 -
.../datalayer/entity/RootcauseSessionIndex.java | 95 -
.../thirdeye/datalayer/entity/SessionIndex.java | 44 -
.../thirdeye/datalayer/entity/TaskIndex.java | 95 -
.../thirdeye/datalayer/pojo/AbstractBean.java | 29 -
.../thirdeye/datalayer/pojo/AlertConfigBean.java | 404 --
.../thirdeye/datalayer/pojo/AlertSnapshotBean.java | 64 -
.../datalayer/pojo/AnomalyFeedbackBean.java | 67 -
.../datalayer/pojo/AnomalyFunctionBean.java | 375 --
.../thirdeye/datalayer/pojo/ApplicationBean.java | 44 -
.../datalayer/pojo/AutotuneConfigBean.java | 174 -
.../datalayer/pojo/ClassificationConfigBean.java | 141 -
.../thirdeye/datalayer/pojo/ConfigBean.java | 82 -
.../datalayer/pojo/DataCompletenessConfigBean.java | 131 -
.../thirdeye/datalayer/pojo/DatasetConfigBean.java | 327 --
.../datalayer/pojo/DetectionAlertConfigBean.java | 191 -
.../datalayer/pojo/DetectionConfigBean.java | 118 -
.../datalayer/pojo/DetectionStatusBean.java | 99 -
.../datalayer/pojo/EntityToEntityMappingBean.java | 80 -
.../thirdeye/datalayer/pojo/EventBean.java | 120 -
.../datalayer/pojo/GroupedAnomalyResultsBean.java | 73 -
.../linkedin/thirdeye/datalayer/pojo/JobBean.java | 133 -
.../datalayer/pojo/MergedAnomalyResultBean.java | 280 --
.../thirdeye/datalayer/pojo/MetricConfigBean.java | 277 --
.../datalayer/pojo/OnboardDatasetMetricBean.java | 106 -
.../datalayer/pojo/OverrideConfigBean.java | 112 -
.../datalayer/pojo/RawAnomalyResultBean.java | 223 --
.../datalayer/pojo/RootcauseSessionBean.java | 217 -
.../thirdeye/datalayer/pojo/SessionBean.java | 96 -
.../linkedin/thirdeye/datalayer/pojo/TaskBean.java | 144 -
.../linkedin/thirdeye/datalayer/util/BeanInfo.java | 33 -
.../thirdeye/datalayer/util/DaoProviderUtil.java | 241 --
.../datalayer/util/EntityMappingHolder.java | 123 -
.../datalayer/util/GenericResultSetMapper.java | 216 -
.../thirdeye/datalayer/util/IndexInfo.java | 31 -
.../thirdeye/datalayer/util/ManagerProvider.java | 51 -
.../thirdeye/datalayer/util/PersistenceConfig.java | 95 -
.../thirdeye/datalayer/util/Predicate.java | 117 -
.../thirdeye/datalayer/util/SqlQueryBuilder.java | 503 ---
.../thirdeye/datalayer/util/StringUtils.java | 70 -
.../dataset/DatasetAutoOnboardResource.java | 63 -
.../thirdeye/datasource/BaseThirdEyeResponse.java | 93 -
.../linkedin/thirdeye/datasource/DAORegistry.java | 192 -
.../thirdeye/datasource/DataSourceConfig.java | 68 -
.../linkedin/thirdeye/datasource/DataSources.java | 47 -
.../thirdeye/datasource/DataSourcesLoader.java | 85 -
.../thirdeye/datasource/MetadataSourceConfig.java | 65 -
.../thirdeye/datasource/MetricExpression.java | 165 -
.../thirdeye/datasource/MetricFunction.java | 136 -
.../thirdeye/datasource/ResponseParserUtils.java | 164 -
.../thirdeye/datasource/ThirdEyeCacheRegistry.java | 180 -
.../thirdeye/datasource/ThirdEyeDataSource.java | 57 -
.../thirdeye/datasource/ThirdEyeRequest.java | 284 --
.../thirdeye/datasource/ThirdEyeResponse.java | 50 -
.../thirdeye/datasource/ThirdEyeResponseRow.java | 49 -
.../thirdeye/datasource/TimeRangeUtils.java | 150 -
.../datasource/cache/DatasetConfigCacheLoader.java | 45 -
.../datasource/cache/DatasetListCache.java | 72 -
.../cache/DatasetMaxDataTimeCacheLoader.java | 91 -
.../cache/DimensionFiltersCacheLoader.java | 73 -
.../datasource/cache/MetricConfigCacheLoader.java | 48 -
.../thirdeye/datasource/cache/MetricDataset.java | 64 -
.../thirdeye/datasource/cache/QueryCache.java | 91 -
.../thirdeye/datasource/comparison/Row.java | 161 -
.../comparison/ThirdEyeRequestGenerator.java | 101 -
.../comparison/TimeOnTimeComparisonHandler.java | 180 -
.../comparison/TimeOnTimeComparisonRequest.java | 179 -
.../comparison/TimeOnTimeComparisonResponse.java | 77 -
.../datasource/comparison/TimeOnTimeConstants.java | 30 -
.../comparison/TimeOnTimeResponseParser.java | 375 --
.../datasource/csv/CSVThirdEyeDataSource.java | 393 --
.../datasource/csv/CSVThirdEyeResponse.java | 149 -
.../datasource/loader/AggregationLoader.java | 54 -
.../loader/DefaultAggregationLoader.java | 155 -
.../datasource/loader/DefaultTimeSeriesLoader.java | 62 -
.../datasource/loader/TimeSeriesLoader.java | 47 -
.../datasource/mock/AutoOnboardMockDataSource.java | 184 -
.../datasource/mock/MockThirdEyeDataSource.java | 444 --
.../pinot/PinotControllerResponseCacheLoader.java | 210 -
.../pinot/PinotDataSourceDimensionFilters.java | 175 -
.../datasource/pinot/PinotDataSourceMaxTime.java | 118 -
.../thirdeye/datasource/pinot/PinotQuery.java | 67 -
.../datasource/pinot/PinotResponseCacheLoader.java | 35 -
.../datasource/pinot/PinotThirdEyeDataSource.java | 599 ---
.../pinot/PinotThirdEyeDataSourceConfig.java | 327 --
.../datasource/pinot/PinotThirdEyeResponse.java | 104 -
.../pinot/PinotThirdeyeDataSourceProperties.java | 42 -
.../thirdeye/datasource/pinot/PqlUtils.java | 513 ---
.../pinot/resources/PinotDataSourceResource.java | 103 -
.../pinot/resultset/AbstractThirdEyeResultSet.java | 43 -
.../resultset/ThirdEyeDataFrameResultSet.java | 195 -
.../pinot/resultset/ThirdEyeResultSet.java | 53 -
.../resultset/ThirdEyeResultSetDeserializer.java | 75 -
.../pinot/resultset/ThirdEyeResultSetGroup.java | 89 -
.../pinot/resultset/ThirdEyeResultSetMetaData.java | 84 -
.../resultset/ThirdEyeResultSetSerializer.java | 78 -
.../AnomalyDetectionTimeSeriesResponseParser.java | 73 -
.../timeseries/BaseTimeSeriesResponseParser.java | 138 -
.../datasource/timeseries/TimeSeriesHandler.java | 211 -
.../datasource/timeseries/TimeSeriesRequest.java | 168 -
.../datasource/timeseries/TimeSeriesResponse.java | 80 -
.../timeseries/TimeSeriesResponseConverter.java | 143 -
.../timeseries/TimeSeriesResponseParser.java | 27 -
.../datasource/timeseries/TimeSeriesRow.java | 220 -
.../timeseries/UITimeSeriesResponseParser.java | 179 -
.../linkedin/thirdeye/detection/ConfigUtils.java | 246 --
.../detection/CurrentAndBaselineLoader.java | 141 -
.../linkedin/thirdeye/detection/DataProvider.java | 151 -
.../thirdeye/detection/DefaultDataProvider.java | 255 --
.../detection/DefaultInputDataFetcher.java | 90 -
.../detection/DetectionMigrationResource.java | 313 --
.../thirdeye/detection/DetectionPipeline.java | 269 --
.../thirdeye/detection/DetectionPipelineJob.java | 100 -
.../detection/DetectionPipelineLoader.java | 34 -
.../detection/DetectionPipelineResult.java | 81 -
.../detection/DetectionPipelineScheduler.java | 171 -
.../detection/DetectionPipelineTaskInfo.java | 63 -
.../detection/DetectionPipelineTaskRunner.java | 137 -
.../thirdeye/detection/DetectionResource.java | 430 --
.../thirdeye/detection/DetectionUtils.java | 166 -
.../thirdeye/detection/InputDataFetcher.java | 35 -
.../detection/LegacyAnomalyFunctionTranslator.java | 94 -
.../com/linkedin/thirdeye/detection/Pattern.java | 29 -
.../detection/StaticDetectionPipeline.java | 91 -
.../thirdeye/detection/alert/AlertUtils.java | 138 -
.../detection/alert/DetectionAlertFilter.java | 63 -
.../alert/DetectionAlertFilterRecipients.java | 105 -
.../alert/DetectionAlertFilterResult.java | 94 -
.../detection/alert/DetectionAlertJob.java | 86 -
.../detection/alert/DetectionAlertScheduler.java | 185 -
.../detection/alert/DetectionAlertTaskFactory.java | 126 -
.../detection/alert/DetectionAlertTaskInfo.java | 46 -
.../detection/alert/DetectionAlertTaskRunner.java | 142 -
.../alert/StatefulDetectionAlertFilter.java | 97 -
.../filter/DimensionDetectionAlertFilter.java | 120 -
.../detection/alert/filter/LegacyAlertFilter.java | 141 -
.../ToAllRecipientsDetectionAlertFilter.java | 87 -
.../alert/scheme/DetectionAlertScheme.java | 37 -
.../alert/scheme/DetectionEmailAlerter.java | 191 -
.../alert/suppress/DetectionAlertSuppressor.java | 41 -
.../DetectionAlertTimeWindowSuppressor.java | 148 -
.../detection/algorithm/AlgorithmUtils.java | 381 --
.../detection/algorithm/BaselineAlgorithm.java | 139 -
.../algorithm/BaselineRuleFilterWrapper.java | 117 -
.../detection/algorithm/DimensionWrapper.java | 242 --
.../algorithm/LegacyAlertFilterWrapper.java | 130 -
.../algorithm/LegacyAnomalyFunctionAlgorithm.java | 191 -
.../algorithm/LegacyDimensionWrapper.java | 126 -
.../detection/algorithm/LegacyMergeWrapper.java | 403 --
.../thirdeye/detection/algorithm/MergeWrapper.java | 242 --
.../detection/algorithm/MovingWindowAlgorithm.java | 759 ----
.../algorithm/RuleBasedFilterWrapper.java | 90 -
.../detection/algorithm/ThresholdAlgorithm.java | 93 -
.../algorithm/ThresholdRuleFilterWrapper.java | 73 -
.../algorithm/stage/AnomalyDetectionStage.java | 38 -
.../stage/AnomalyDetectionStageWrapper.java | 216 -
.../algorithm/stage/AnomalyFilterStage.java | 38 -
.../algorithm/stage/AnomalyFilterStageWrapper.java | 104 -
.../algorithm/stage/BaseDetectionStage.java | 36 -
.../stage/BaselineRuleDetectionStage.java | 144 -
.../algorithm/stage/BaselineRuleFilterStage.java | 119 -
.../detection/algorithm/stage/GrouperStage.java | 38 -
.../algorithm/stage/GrouperStageWrapper.java | 88 -
.../stage/StaticAnomalyDetectionStage.java | 169 -
.../algorithm/stage/StaticAnomalyFilterStage.java | 62 -
.../stage/ThresholdRuleDetectionStage.java | 107 -
.../algorithm/stage/ThresholdRuleFilterStage.java | 90 -
.../thirdeye/detection/annotation/AlertScheme.java | 35 -
.../detection/annotation/AlertSuppressor.java | 35 -
.../thirdeye/detection/annotation/Components.java | 52 -
.../annotation/DetectionConfigurationResource.java | 41 -
.../detection/annotation/DetectionTag.java | 27 -
.../thirdeye/detection/annotation/Param.java | 44 -
.../detection/annotation/PresentationOption.java | 39 -
.../thirdeye/detection/annotation/Tune.java | 39 -
.../thirdeye/detection/annotation/Yaml.java | 39 -
.../registry/DetectionAlertRegistry.java | 110 -
.../annotation/registry/DetectionRegistry.java | 151 -
.../AbsoluteChangeRuleAnomalyFilter.java | 100 -
.../components/AbsoluteChangeRuleDetector.java | 115 -
.../PercentageChangeRuleAnomalyFilter.java | 99 -
.../components/PercentageChangeRuleDetector.java | 124 -
.../detection/components/RuleBaselineProvider.java | 71 -
.../SitewideImpactRuleAnomalyFilter.java | 148 -
.../components/ThresholdRuleAnomalyFilter.java | 80 -
.../components/ThresholdRuleDetector.java | 95 -
.../detection/finetune/F1ScoreFunction.java | 72 -
.../finetune/GridSearchTuningAlgorithm.java | 189 -
.../thirdeye/detection/finetune/ScoreFunction.java | 29 -
.../finetune/TimeBucketF1ScoreFunction.java | 106 -
.../detection/finetune/TuningAlgorithm.java | 44 -
.../spec/AbsoluteChangeRuleAnomalyFilterSpec.java | 59 -
.../spec/AbsoluteChangeRuleDetectorSpec.java | 59 -
.../thirdeye/detection/spec/AbstractSpec.java | 36 -
.../PercentageChangeRuleAnomalyFilterSpec.java | 59 -
.../spec/PercentageChangeRuleDetectorSpec.java | 59 -
.../detection/spec/RuleBaselineProviderSpec.java | 53 -
.../spec/SitewideImpactRuleAnomalyFilterSpec.java | 91 -
.../detection/spec/ThresholdRuleDetectorSpec.java | 45 -
.../detection/spec/ThresholdRuleFilterSpec.java | 45 -
.../detection/spi/components/AnomalyDetector.java | 37 -
.../detection/spi/components/AnomalyFilter.java | 32 -
.../detection/spi/components/BaseComponent.java | 28 -
.../detection/spi/components/BaselineProvider.java | 47 -
.../thirdeye/detection/spi/components/Grouper.java | 37 -
.../thirdeye/detection/spi/components/Tunable.java | 39 -
.../thirdeye/detection/spi/model/AnomalySlice.java | 88 -
.../thirdeye/detection/spi/model/EventSlice.java | 90 -
.../thirdeye/detection/spi/model/InputData.java | 121 -
.../detection/spi/model/InputDataSpec.java | 180 -
.../thirdeye/detection/spi/model/TimeSeries.java | 67 -
.../detection/validators/ConfigValidator.java | 67 -
.../validators/DetectionAlertConfigValidator.java | 128 -
.../detection/wrapper/AnomalyDetectorWrapper.java | 246 --
.../detection/wrapper/AnomalyFilterWrapper.java | 97 -
.../wrapper/BaselineFillingMergeWrapper.java | 187 -
.../wrapper/ChildKeepingMergeWrapper.java | 118 -
.../yaml/CompositePipelineConfigTranslator.java | 474 ---
.../yaml/YamlDetectionAlertConfigTranslator.java | 223 --
.../yaml/YamlDetectionConfigTranslator.java | 102 -
.../yaml/YamlDetectionTranslatorLoader.java | 42 -
.../thirdeye/detection/yaml/YamlResource.java | 503 ---
.../detection/yaml/YamlTranslationResult.java | 87 -
.../detector/email/AnomalyGraphGenerator.java | 338 --
.../detector/email/filter/AlertFilter.java | 33 -
.../detector/email/filter/AlertFilterFactory.java | 105 -
.../email/filter/AlphaBetaAlertFilter.java | 110 -
.../filter/AverageChangeThresholdAlertFilter.java | 57 -
.../detector/email/filter/BaseAlertFilter.java | 123 -
.../detector/email/filter/DummyAlertFilter.java | 49 -
.../email/filter/PrecisionRecallEvaluator.java | 283 --
.../detector/email/filter/UserReportUtils.java | 64 -
.../email/filter/WeightThresholdAlertFilter.java | 89 -
.../detector/function/AnomalyFunction.java | 177 -
.../detector/function/AnomalyFunctionFactory.java | 111 -
.../detector/function/BaseAnomalyFunction.java | 197 -
.../detector/metric/transfer/MetricTransfer.java | 163 -
.../detector/metric/transfer/ScalingFactor.java | 65 -
.../com/linkedin/thirdeye/rootcause/Entity.java | 105 -
.../linkedin/thirdeye/rootcause/MaxScoreSet.java | 185 -
.../com/linkedin/thirdeye/rootcause/Pipeline.java | 59 -
.../thirdeye/rootcause/PipelineCallable.java | 81 -
.../thirdeye/rootcause/PipelineContext.java | 66 -
.../thirdeye/rootcause/PipelineResult.java | 45 -
.../linkedin/thirdeye/rootcause/RCAFramework.java | 180 -
.../rootcause/RCAFrameworkExecutionResult.java | 75 -
.../thirdeye/rootcause/StaticPipeline.java | 88 -
.../rootcause/callgraph/CallGraphEntity.java | 76 -
.../callgraph/CallGraphEntityFormatter.java | 56 -
.../rootcause/callgraph/CallGraphPipeline.java | 418 --
.../rootcause/impl/AnomalyContextPipeline.java | 147 -
.../rootcause/impl/AnomalyEventEntity.java | 62 -
.../rootcause/impl/AnomalyEventsPipeline.java | 243 --
.../thirdeye/rootcause/impl/DatasetEntity.java | 77 -
.../rootcause/impl/DimensionAnalysisPipeline.java | 318 --
.../thirdeye/rootcause/impl/DimensionEntity.java | 129 -
.../thirdeye/rootcause/impl/DimensionsEntity.java | 81 -
.../thirdeye/rootcause/impl/EmptyPipeline.java | 61 -
.../rootcause/impl/EntityMappingPipeline.java | 324 --
.../thirdeye/rootcause/impl/EntityType.java | 92 -
.../thirdeye/rootcause/impl/EventEntity.java | 69 -
.../thirdeye/rootcause/impl/HyperlinkEntity.java | 56 -
.../rootcause/impl/LinearAggregationPipeline.java | 142 -
.../rootcause/impl/MaxAggregationPipeline.java | 76 -
.../rootcause/impl/MetricAnalysisPipeline.java | 371 --
.../rootcause/impl/MetricAnalysisPipeline2.java | 429 --
.../rootcause/impl/MetricBreakdownPipeline.java | 292 --
.../impl/MetricComponentAnalysisPipeline.java | 343 --
.../impl/MetricCorrelationRankingPipeline.java | 408 --
.../rootcause/impl/MetricDatasetPipeline.java | 183 -
.../rootcause/impl/MetricDimensionPipeline.java | 72 -
.../thirdeye/rootcause/impl/MetricEntity.java | 105 -
.../rootcause/impl/MetricMappingPipeline.java | 292 --
.../rootcause/impl/NormalizationPipeline.java | 61 -
.../thirdeye/rootcause/impl/NullPipeline.java | 61 -
.../rootcause/impl/PipelineConfiguration.java | 62 -
.../thirdeye/rootcause/impl/RCAConfiguration.java | 41 -
.../rootcause/impl/RCAFrameworkLoader.java | 115 -
.../rootcause/impl/RCAFrameworkRunner.java | 334 --
.../thirdeye/rootcause/impl/ServiceEntity.java | 72 -
.../rootcause/impl/ThirdEyeEventEntity.java | 63 -
.../rootcause/impl/ThirdEyeEventsPipeline.java | 251 --
.../thirdeye/rootcause/impl/TimeRangeEntity.java | 125 -
.../thirdeye/rootcause/impl/TopKPipeline.java | 85 -
.../thirdeye/rootcause/timeseries/Baseline.java | 62 -
.../rootcause/timeseries/BaselineAggregate.java | 553 ---
.../timeseries/BaselineAggregateType.java | 50 -
.../rootcause/timeseries/BaselineNone.java | 45 -
.../rootcause/timeseries/BaselineOffset.java | 86 -
.../thirdeye/rootcause/util/EntityUtils.java | 554 ---
.../thirdeye/rootcause/util/FilterPredicate.java | 54 -
.../thirdeye/rootcause/util/ParsedUrn.java | 110 -
.../thirdeye/rootcause/util/ScoreUtils.java | 244 --
.../com/linkedin/thirdeye/tracking/RequestLog.java | 233 --
.../thirdeye/tracking/RequestLogEntry.java | 78 -
.../thirdeye/tracking/RequestStatistics.java | 212 -
.../tracking/RequestStatisticsFormatter.java | 133 -
.../thirdeye/tracking/RequestStatisticsLogger.java | 65 -
.../com/linkedin/thirdeye/util/AnomalyOffset.java | 45 -
.../thirdeye/util/CustomDateDeserializer.java | 40 -
.../thirdeye/util/CustomDateSerializer.java | 39 -
.../thirdeye/util/CustomListDateDeserializer.java | 47 -
.../thirdeye/util/CustomListDateSerializer.java | 41 -
.../com/linkedin/thirdeye/util/IntervalUtils.java | 87 -
.../linkedin/thirdeye/util/JodaDateTimeUtils.java | 51 -
.../linkedin/thirdeye/util/JsonResponseUtil.java | 65 -
.../com/linkedin/thirdeye/util/NumberUtils.java | 178 -
.../thirdeye/util/SeverityComputationUtil.java | 118 -
.../com/linkedin/thirdeye/util/ThirdEyeUtils.java | 523 ---
.../linkedin/thirdeye/util/TimeSeriesUtils.java | 96 -
.../thirdeye/alert/commons/AnomalyFeedConfig.java | 96 +
.../thirdeye/alert/commons/AnomalyFeedFactory.java | 34 +
.../alert/commons/AnomalyFetcherConfig.java | 59 +
.../alert/commons/AnomalyFetcherFactory.java | 34 +
.../alert/commons/AnomalyNotifiedStatus.java | 48 +
.../thirdeye/alert/commons/AnomalySource.java | 50 +
.../commons/EmailContentFormatterFactory.java | 34 +
.../pinot/thirdeye/alert/commons/EmailEntity.java | 74 +
.../alert/content/BaseEmailContentFormatter.java | 885 ++++
.../alert/content/EmailContentFormatter.java | 62 +
.../EmailContentFormatterConfiguration.java | 128 +
.../content/EmailContentFormatterContext.java | 64 +
...HierarchicalAnomaliesEmailContentFormatter.java | 202 +
.../MultipleAnomaliesEmailContentFormatter.java | 195 +
...nboardingNotificationEmailContentFormatter.java | 106 +
.../pinot/thirdeye/alert/feed/AnomalyFeed.java | 50 +
.../thirdeye/alert/feed/UnionAnomalyFeed.java | 136 +
.../thirdeye/alert/fetcher/AnomalyFetcher.java | 43 +
.../thirdeye/alert/fetcher/BaseAnomalyFetcher.java | 69 +
.../alert/fetcher/ContinuumAnomalyFetcher.java | 110 +
.../alert/fetcher/UnnotifiedAnomalyFetcher.java | 107 +
.../anomaly/HolidayEventsLoaderConfiguration.java | 94 +
.../pinot/thirdeye/anomaly/SmtpConfiguration.java | 109 +
.../anomaly/ThirdEyeAnomalyApplication.java | 253 ++
.../anomaly/ThirdEyeAnomalyConfiguration.java | 214 +
.../thirdeye/anomaly/alert/AlertJobContext.java | 48 +
.../thirdeye/anomaly/alert/AlertJobResource.java | 89 +
.../thirdeye/anomaly/alert/AlertTaskInfo.java | 113 +
.../anomaly/alert/grouping/AlertGroupKey.java | 109 +
.../anomaly/alert/grouping/AlertGrouper.java | 43 +
.../alert/grouping/AlertGrouperFactory.java | 69 +
.../anomaly/alert/grouping/BaseAlertGrouper.java | 33 +
.../alert/grouping/DimensionalAlertGrouper.java | 142 +
.../anomaly/alert/grouping/DummyAlertGrouper.java | 43 +
.../HorizontalDimensionalAlertGrouper.java | 100 +
.../alert/grouping/SimpleGroupedAnomalyMerger.java | 65 +
.../AlertGroupAuxiliaryInfoProvider.java | 49 +
.../AlertGroupRecipientProviderFactory.java | 165 +
.../AuxiliaryAlertGroupInfo.java | 96 +
.../BaseAlertGroupAuxiliaryInfoProvider.java | 35 +
...sionalAlertGroupAuxiliaryRecipientProvider.java | 77 +
.../DummyAlertGroupAuxiliaryInfoProvider.java | 31 +
.../alert/grouping/filter/AlertGroupFilter.java | 46 +
.../grouping/filter/AlertGroupFilterFactory.java | 67 +
.../grouping/filter/BaseAlertGroupFilter.java | 33 +
.../grouping/filter/DummyAlertGroupFilter.java | 34 +
.../filter/SizeSeverityAlertGroupFilter.java | 100 +
.../alert/template/pojo/MetricDimensionReport.java | 134 +
.../anomaly/alert/util/AlertFilterHelper.java | 79 +
.../anomaly/alert/util/AnomalyReportGenerator.java | 599 +++
.../anomaly/alert/util/DataReportHelper.java | 276 ++
.../thirdeye/anomaly/alert/util/EmailHelper.java | 285 ++
.../anomaly/alert/util/EmailScreenshotHelper.java | 120 +
.../anomaly/alert/v2/AlertJobRunnerV2.java | 157 +
.../anomaly/alert/v2/AlertJobSchedulerV2.java | 249 ++
.../anomaly/alert/v2/AlertTaskRunnerV2.java | 637 +++
.../classification/ClassificationJobConfig.java | 47 +
.../classification/ClassificationJobContext.java | 70 +
.../classification/ClassificationJobResource.java | 23 +
.../classification/ClassificationJobRunner.java | 126 +
.../classification/ClassificationJobScheduler.java | 186 +
.../classification/ClassificationTaskInfo.java | 73 +
.../classification/ClassificationTaskRunner.java | 438 ++
.../classifier/AnomalyClassifier.java | 45 +
.../classifier/AnomalyClassifierFactory.java | 152 +
.../classifier/BaseAnomalyClassifier.java | 32 +
.../classifier/DummyAnomalyClassifier.java | 36 +
.../detection/AnomalyDetectionInputContext.java | 71 +
.../AnomalyDetectionInputContextBuilder.java | 704 ++++
.../detection/AnomalyDetectionOutputContext.java | 37 +
.../anomaly/detection/DetectionJobContext.java | 80 +
.../anomaly/detection/DetectionJobRunner.java | 182 +
.../anomaly/detection/DetectionJobScheduler.java | 613 +++
.../detection/DetectionJobSchedulerUtils.java | 232 ++
.../anomaly/detection/DetectionTaskInfo.java | 138 +
.../anomaly/detection/DetectionTaskRunner.java | 460 +++
.../anomaly/detection/lib/AutotuneMethodType.java | 24 +
.../detection/lib/FunctionReplayRunnable.java | 295 ++
.../thirdeye/anomaly/events/EventDataProvider.java | 28 +
.../pinot/thirdeye/anomaly/events/EventFilter.java | 182 +
.../pinot/thirdeye/anomaly/events/EventType.java | 24 +
.../events/HistoricalAnomalyEventProvider.java | 87 +
.../anomaly/events/HolidayEventProvider.java | 53 +
.../anomaly/events/HolidayEventResource.java | 60 +
.../anomaly/events/HolidayEventsLoader.java | 424 ++
.../pinot/thirdeye/anomaly/job/JobConstants.java | 33 +
.../pinot/thirdeye/anomaly/job/JobContext.java | 97 +
.../pinot/thirdeye/anomaly/job/JobRunner.java | 31 +
.../pinot/thirdeye/anomaly/job/JobScheduler.java | 39 +
.../thirdeye/anomaly/merge/AnomalyMergeConfig.java | 92 +
.../anomaly/merge/AnomalyMergeStrategy.java | 25 +
.../anomaly/merge/AnomalyTimeBasedSummarizer.java | 171 +
.../anomaly/merge/TimeBasedAnomalyMerger.java | 314 ++
.../anomaly/monitor/MonitorConfiguration.java | 71 +
.../thirdeye/anomaly/monitor/MonitorConstants.java | 38 +
.../anomaly/monitor/MonitorJobContext.java | 38 +
.../thirdeye/anomaly/monitor/MonitorJobRunner.java | 128 +
.../anomaly/monitor/MonitorJobScheduler.java | 72 +
.../thirdeye/anomaly/monitor/MonitorTaskInfo.java | 111 +
.../anomaly/monitor/MonitorTaskRunner.java | 220 +
.../anomaly/onboard/DetectionOnboardResource.java | 184 +
.../thirdeye/anomaly/onboard/ReplayTaskInfo.java | 45 +
.../thirdeye/anomaly/onboard/ReplayTaskRunner.java | 146 +
.../onboard/framework/BaseDetectionOnboardJob.java | 43 +
.../framework/BaseDetectionOnboardTask.java | 50 +
.../framework/DetectionOnBoardJobRunner.java | 184 +
.../DetectionOnboardExecutionContext.java | 54 +
.../onboard/framework/DetectionOnboardJob.java | 50 +
.../framework/DetectionOnboardJobContext.java | 113 +
.../framework/DetectionOnboardJobStatus.java | 93 +
.../onboard/framework/DetectionOnboardTask.java | 50 +
.../framework/DetectionOnboardTaskContext.java | 51 +
.../framework/DetectionOnboardTaskRunner.java | 55 +
.../framework/DetectionOnboardTaskStatus.java | 69 +
.../tasks/AlertFilterAutoTuneOnboardingTask.java | 127 +
.../tasks/DataPreparationOnboardingTask.java | 79 +
.../onboard/tasks/DefaultDetectionOnboardJob.java | 322 ++
.../tasks/FunctionCreationOnboardingTask.java | 365 ++
.../tasks/FunctionReplayOnboardingTask.java | 129 +
.../onboard/tasks/NotificationOnboardingTask.java | 164 +
.../onboard/utils/FunctionCreationUtils.java | 52 +
.../anomaly/onboard/utils/PropertyCheckUtils.java | 50 +
.../anomaly/override/OverrideConfigHelper.java | 228 ++
.../pinot/thirdeye/anomaly/task/TaskConstants.java | 45 +
.../pinot/thirdeye/anomaly/task/TaskContext.java | 64 +
.../pinot/thirdeye/anomaly/task/TaskDriver.java | 236 ++
.../anomaly/task/TaskDriverConfiguration.java | 68 +
.../pinot/thirdeye/anomaly/task/TaskGenerator.java | 127 +
.../pinot/thirdeye/anomaly/task/TaskInfo.java | 27 +
.../thirdeye/anomaly/task/TaskInfoFactory.java | 87 +
.../pinot/thirdeye/anomaly/task/TaskResult.java | 24 +
.../pinot/thirdeye/anomaly/task/TaskRunner.java | 32 +
.../thirdeye/anomaly/task/TaskRunnerFactory.java | 71 +
.../anomaly/utils/AbstractResourceHttpUtils.java | 84 +
.../anomaly/utils/AlertResourceHttpUtils.java | 60 +
.../pinot/thirdeye/anomaly/utils/AnomalyUtils.java | 164 +
.../anomaly/utils/DetectionResourceHttpUtils.java | 144 +
.../pinot/thirdeye/anomaly/utils/EmailUtils.java | 80 +
.../anomaly/utils/OnboardResourceHttpUtils.java | 54 +
.../anomaly/utils/ThirdeyeMetricsUtil.java | 156 +
.../anomaly/views/AnomalyTimelinesView.java | 113 +
.../views/CondensedAnomalyTimelinesView.java | 268 ++
.../anomalydetection/AnomalyDetectionUtils.java | 76 +
.../alertFilterAutotune/AlertFilterAutoTune.java | 35 +
.../AlertFilterAutotuneFactory.java | 95 +
.../BaseAlertFilterAutoTune.java | 78 +
.../DummyAlertFilterAutoTune.java | 35 +
.../context/AnomalyDetectionContext.java | 186 +
.../anomalydetection/context/AnomalyFeedback.java | 48 +
.../anomalydetection/context/AnomalyResult.java | 133 +
.../anomalydetection/context/MetricTimeSeries.java | 63 +
.../anomalydetection/context/RawAnomalyResult.java | 157 +
.../anomalydetection/context/TimeSeries.java | 167 +
.../anomalydetection/context/TimeSeriesKey.java | 61 +
.../datafilter/AverageThresholdDataFilter.java | 241 ++
.../datafilter/BaseDataFilter.java | 32 +
.../anomalydetection/datafilter/DataFilter.java | 57 +
.../datafilter/DataFilterFactory.java | 67 +
.../datafilter/DummyDataFilter.java | 41 +
.../AbstractModularizedAnomalyFunction.java | 369 ++
.../function/AnomalyDetectionFunction.java | 93 +
.../function/BackwardAnomalyFunctionUtils.java | 146 +
.../function/MinMaxThresholdFunction.java | 104 +
.../ModularizedAnomalyFunctionModelProvider.java | 66 +
.../function/WeekOverWeekRuleFunction.java | 176 +
.../model/data/AbstractDataModel.java | 36 +
.../anomalydetection/model/data/DataModel.java | 57 +
.../anomalydetection/model/data/NoopDataModel.java | 40 +
.../model/data/SeasonalDataModel.java | 82 +
.../model/detection/AbstractDetectionModel.java | 36 +
.../model/detection/DetectionModel.java | 49 +
.../detection/MinMaxThresholdDetectionModel.java | 99 +
.../model/detection/NoopDetectionModel.java | 32 +
.../detection/SimpleThresholdDetectionModel.java | 124 +
.../model/merge/AbstractMergeModel.java | 36 +
.../anomalydetection/model/merge/MergeModel.java | 48 +
.../model/merge/MinMaxThresholdMergeModel.java | 93 +
.../model/merge/NoPredictionMergeModel.java | 27 +
.../model/merge/NoopMergeModel.java | 31 +
.../model/merge/SimplePercentageMergeModel.java | 106 +
.../model/prediction/AbstractPredictionModel.java | 36 +
.../ExpectedTimeSeriesPredictionModel.java | 31 +
.../model/prediction/NoopPredictionModel.java | 31 +
.../model/prediction/PredictionModel.java | 47 +
.../prediction/SeasonalAveragePredictionModel.java | 109 +
.../transform/AbstractTransformationFunction.java | 36 +
.../model/transform/AnomalyRemovalFunction.java | 206 +
.../transform/MovingAverageSmoothingFunction.java | 98 +
.../TotalCountThresholdRemovalFunction.java | 70 +
.../model/transform/TransformationFunction.java | 54 +
.../model/transform/ZeroRemovalFunction.java | 53 +
.../AnomalyPercentagePerformanceEvaluation.java | 57 +
.../BasePerformanceEvaluate.java | 61 +
.../F1ScoreByTimePerformanceEvaluation.java | 46 +
.../performanceEvaluation/PerformanceEvaluate.java | 24 +
.../PerformanceEvaluateHelper.java | 69 +
.../PerformanceEvaluationMethod.java | 24 +
.../PrecisionByTimePerformanceEvaluation.java | 85 +
.../RecallByTimePreformanceEvaluation.java | 91 +
.../org/apache/pinot/thirdeye/api/Constants.java | 34 +
.../apache/pinot/thirdeye/api/DimensionKey.java | 206 +
.../apache/pinot/thirdeye/api/DimensionMap.java | 330 ++
.../apache/pinot/thirdeye/api/DimensionSpec.java | 75 +
.../apache/pinot/thirdeye/api/DimensionType.java | 25 +
.../apache/pinot/thirdeye/api/MetricSchema.java | 122 +
.../org/apache/pinot/thirdeye/api/MetricSpec.java | 62 +
.../pinot/thirdeye/api/MetricTimeSeries.java | 386 ++
.../org/apache/pinot/thirdeye/api/MetricType.java | 89 +
.../apache/pinot/thirdeye/api/TimeGranularity.java | 205 +
.../org/apache/pinot/thirdeye/api/TimeRange.java | 92 +
.../org/apache/pinot/thirdeye/api/TimeSpec.java | 58 +
.../apache/pinot/thirdeye/auth/Credentials.java | 61 +
.../pinot/thirdeye/auth/ThirdEyeAuthFilter.java | 134 +
.../auth/ThirdEyeAuthenticatorDisabled.java | 48 +
.../thirdeye/auth/ThirdEyeAuthenticatorLdap.java | 214 +
.../pinot/thirdeye/auth/ThirdEyePrincipal.java | 56 +
.../pinot/thirdeye/auto/onboard/AutoOnboard.java | 48 +
.../auto/onboard/AutoOnboardConfiguration.java | 39 +
.../onboard/AutoOnboardPinotMetadataSource.java | 435 ++
.../auto/onboard/AutoOnboardPinotMetricsUtils.java | 229 ++
.../thirdeye/auto/onboard/AutoOnboardService.java | 80 +
.../thirdeye/auto/onboard/AutoOnboardUtility.java | 88 +
.../thirdeye/auto/onboard/ConfigGenerator.java | 118 +
.../pinot/thirdeye/client/diffsummary/Cube.java | 570 +++
.../client/diffsummary/DimNameValueCostEntry.java | 108 +
.../client/diffsummary/DimensionValues.java | 110 +
.../thirdeye/client/diffsummary/Dimensions.java | 121 +
.../thirdeye/client/diffsummary/HierarchyNode.java | 296 ++
.../diffsummary/MultiDimensionalSummary.java | 115 +
.../MultiDimensionalSummaryCLITool.java | 257 ++
.../client/diffsummary/OLAPDataBaseClient.java | 50 +
.../diffsummary/PinotThirdEyeSummaryClient.java | 278 ++
.../pinot/thirdeye/client/diffsummary/Row.java | 105 +
.../costfunctions/BalancedCostFunction.java | 138 +
.../costfunctions/ChangeRatioCostFunction.java | 58 +
.../ContributionToOverallChangeCostFunction.java | 61 +
.../diffsummary/costfunctions/CostFunction.java | 25 +
.../apache/pinot/thirdeye/common/BaseFactory.java | 32 +
.../thirdeye/common/BaseThirdEyeApplication.java | 84 +
.../thirdeye/common/ThirdEyeConfiguration.java | 181 +
.../thirdeye/common/ThirdEyeSwaggerBundle.java | 35 +
.../checker/DataCompletenessAlgorithm.java | 94 +
.../checker/DataCompletenessAlgorithmFactory.java | 39 +
.../checker/DataCompletenessConstants.java | 41 +
.../checker/DataCompletenessJobContext.java | 55 +
.../checker/DataCompletenessJobRunner.java | 170 +
.../checker/DataCompletenessScheduler.java | 53 +
.../checker/DataCompletenessTaskInfo.java | 101 +
.../checker/DataCompletenessTaskRunner.java | 348 ++
.../checker/DataCompletenessUtils.java | 273 ++
.../checker/PercentCompletenessFunctionInput.java | 88 +
.../checker/Wo4WAvgDataCompletenessAlgorithm.java | 164 +
.../pinot/thirdeye/config/ConfigNamespace.java | 122 +
.../thirdeye/constant/AnomalyFeedbackType.java | 50 +
.../thirdeye/constant/AnomalyResultSource.java | 26 +
.../pinot/thirdeye/constant/MetricAggFunction.java | 30 +
.../thirdeye/dashboard/DetectorHttpUtils.java | 78 +
.../thirdeye/dashboard/HandlebarsHelperBundle.java | 87 +
.../thirdeye/dashboard/HandlebarsViewRenderer.java | 109 +
.../pinot/thirdeye/dashboard/HelperBundle.java | 31 +
.../thirdeye/dashboard/RootCauseConfiguration.java | 55 +
.../dashboard/ThirdEyeDashboardApplication.java | 333 ++
.../dashboard/ThirdEyeDashboardConfiguration.java | 57 +
.../org/apache/pinot/thirdeye/dashboard/Utils.java | 246 ++
.../apache/pinot/thirdeye/dashboard/ViewType.java | 27 +
.../dashboard/configs/AuthConfiguration.java | 117 +
.../dashboard/configs/ResourceConfiguration.java | 37 +
.../dashboard/resources/AdminResource.java | 46 +
.../dashboard/resources/AnomalyResource.java | 946 +++++
.../dashboard/resources/AutoOnboardResource.java | 74 +
.../dashboard/resources/CacheResource.java | 156 +
.../resources/CustomizedEventResource.java | 82 +
.../dashboard/resources/DashboardResource.java | 245 ++
.../dashboard/resources/DatasetConfigResource.java | 118 +
.../dashboard/resources/DetectionJobResource.java | 1308 ++++++
.../dashboard/resources/EmailResource.java | 598 +++
.../dashboard/resources/EntityManagerResource.java | 266 ++
.../dashboard/resources/EntityMappingResource.java | 246 ++
.../dashboard/resources/MetricConfigResource.java | 297 ++
.../resources/OnboardDatasetMetricResource.java | 118 +
.../dashboard/resources/OnboardResource.java | 680 ++++
.../dashboard/resources/SummaryResource.java | 182 +
.../dashboard/resources/ThirdEyeResource.java | 42 +
.../dashboard/resources/v2/AnomaliesResource.java | 1322 ++++++
.../dashboard/resources/v2/AuthResource.java | 155 +
.../resources/v2/BaselineParsingUtils.java | 194 +
.../dashboard/resources/v2/ConfigResource.java | 113 +
.../dashboard/resources/v2/DataResource.java | 578 +++
.../resources/v2/DetectionAlertResource.java | 105 +
.../dashboard/resources/v2/ResourceUtils.java | 372 ++
.../resources/v2/RootCauseEntityFormatter.java | 44 +
.../v2/RootCauseEventEntityFormatter.java | 63 +
.../resources/v2/RootCauseMetricResource.java | 673 ++++
.../dashboard/resources/v2/RootCauseResource.java | 213 +
.../resources/v2/RootCauseSessionResource.java | 260 ++
.../dashboard/resources/v2/TimeSeriesResource.java | 435 ++
.../resources/v2/UserDashboardResource.java | 424 ++
.../resources/v2/pojo/AnomaliesSummary.java | 75 +
.../resources/v2/pojo/AnomaliesWrapper.java | 74 +
.../v2/pojo/AnomalyClassificationType.java | 28 +
.../resources/v2/pojo/AnomalyDataCompare.java | 97 +
.../resources/v2/pojo/AnomalyDetails.java | 294 ++
.../resources/v2/pojo/AnomalySummary.java | 197 +
.../dashboard/resources/v2/pojo/MetricSummary.java | 108 +
.../resources/v2/pojo/RootCauseEntity.java | 114 +
.../resources/v2/pojo/RootCauseEventEntity.java | 64 +
.../dashboard/resources/v2/pojo/SearchFilters.java | 302 ++
.../v2/pojo/TimeSeriesCompareMetricView.java | 127 +
.../resources/v2/pojo/ValuesContainer.java | 79 +
.../dashboard/resources/v2/pojo/WowSummary.java | 37 +
.../v2/rootcause/AnomalyEventFormatter.java | 216 +
.../v2/rootcause/DefaultEntityFormatter.java | 43 +
.../v2/rootcause/DefaultEventEntityFormatter.java | 39 +
.../v2/rootcause/DimensionEntityFormatter.java | 42 +
.../resources/v2/rootcause/FormatterLoader.java | 29 +
.../resources/v2/rootcause/HyperlinkFormatter.java | 41 +
.../v2/rootcause/MetricEntityFormatter.java | 168 +
.../v2/rootcause/ServiceEntityFormatter.java | 41 +
.../v2/rootcause/ThirdEyeEventFormatter.java | 69 +
.../dashboard/views/CompareViewRequest.java | 128 +
.../thirdeye/dashboard/views/DashboardView.java | 31 +
.../thirdeye/dashboard/views/DefaultView.java | 30 +
.../thirdeye/dashboard/views/GenericResponse.java | 164 +
.../dashboard/views/ThirdEyeAdminView.java | 30 +
.../thirdeye/dashboard/views/ThirdEyeView.java | 30 +
.../pinot/thirdeye/dashboard/views/TimeBucket.java | 115 +
.../thirdeye/dashboard/views/ViewHandler.java | 26 +
.../thirdeye/dashboard/views/ViewRequest.java | 33 +
.../thirdeye/dashboard/views/ViewResponse.java | 24 +
.../views/contributor/ContributionCell.java | 183 +
.../views/contributor/ContributionViewTable.java | 48 +
.../contributor/ContributionViewTableBuilder.java | 142 +
.../views/contributor/ContributorViewHandler.java | 331 ++
.../views/contributor/ContributorViewRequest.java | 27 +
.../views/contributor/ContributorViewResponse.java | 109 +
.../views/diffsummary/BaseResponseRow.java | 28 +
.../dashboard/views/diffsummary/DPArray.java | 101 +
.../dashboard/views/diffsummary/Summary.java | 443 ++
.../diffsummary/SummaryGainerLoserResponseRow.java | 38 +
.../views/diffsummary/SummaryResponse.java | 296 ++
.../views/diffsummary/SummaryResponseRow.java | 50 +
.../views/diffsummary/SummaryResponseTree.java | 243 ++
.../thirdeye/dashboard/views/heatmap/HeatMap.java | 155 +
.../dashboard/views/heatmap/HeatMapCell.java | 146 +
.../views/heatmap/HeatMapViewHandler.java | 335 ++
.../views/heatmap/HeatMapViewRequest.java | 26 +
.../views/heatmap/HeatMapViewResponse.java | 253 ++
.../views/tabular/TabularViewHandler.java | 268 ++
.../views/tabular/TabularViewRequest.java | 28 +
.../views/tabular/TabularViewResponse.java | 71 +
.../pinot/thirdeye/dataframe/BooleanSeries.java | 961 +++++
.../apache/pinot/thirdeye/dataframe/DataFrame.java | 2658 ++++++++++++
.../pinot/thirdeye/dataframe/DoubleSeries.java | 1207 ++++++
.../apache/pinot/thirdeye/dataframe/Grouping.java | 1193 ++++++
.../pinot/thirdeye/dataframe/LongSeries.java | 917 +++++
.../pinot/thirdeye/dataframe/ObjectSeries.java | 855 ++++
.../thirdeye/dataframe/PrimitiveMultimap.java | 245 ++
.../apache/pinot/thirdeye/dataframe/Series.java | 1504 +++++++
.../pinot/thirdeye/dataframe/StringSeries.java | 763 ++++
.../pinot/thirdeye/dataframe/TypedSeries.java | 98 +
.../dataframe/util/DataFrameSerializer.java | 87 +
.../thirdeye/dataframe/util/DataFrameUtils.java | 624 +++
.../pinot/thirdeye/dataframe/util/MetricSlice.java | 122 +
.../thirdeye/dataframe/util/RequestContainer.java | 46 +
.../dataframe/util/TimeSeriesRequestContainer.java | 53 +
.../pinot/thirdeye/datalayer/ScriptRunner.java | 179 +
.../thirdeye/datalayer/bao/AbstractManager.java | 60 +
.../thirdeye/datalayer/bao/AlertConfigManager.java | 31 +
.../datalayer/bao/AlertSnapshotManager.java | 26 +
.../datalayer/bao/AnomalyFunctionManager.java | 46 +
.../thirdeye/datalayer/bao/ApplicationManager.java | 28 +
.../datalayer/bao/AutotuneConfigManager.java | 36 +
.../datalayer/bao/ClassificationConfigManager.java | 29 +
.../thirdeye/datalayer/bao/ConfigManager.java | 30 +
.../bao/DataCompletenessConfigManager.java | 39 +
.../datalayer/bao/DatasetConfigManager.java | 32 +
.../datalayer/bao/DetectionAlertConfigManager.java | 26 +
.../datalayer/bao/DetectionConfigManager.java | 27 +
.../datalayer/bao/DetectionStatusManager.java | 34 +
.../bao/EntityToEntityMappingManager.java | 36 +
.../pinot/thirdeye/datalayer/bao/EventManager.java | 29 +
.../bao/GroupedAnomalyResultsManager.java | 37 +
.../pinot/thirdeye/datalayer/bao/JobManager.java | 46 +
.../datalayer/bao/MergedAnomalyResultManager.java | 81 +
.../datalayer/bao/MetricConfigManager.java | 38 +
.../datalayer/bao/OnboardDatasetMetricManager.java | 38 +
.../datalayer/bao/OverrideConfigManager.java | 31 +
.../datalayer/bao/RawAnomalyResultManager.java | 26 +
.../datalayer/bao/RootcauseSessionManager.java | 36 +
.../thirdeye/datalayer/bao/SessionManager.java | 29 +
.../pinot/thirdeye/datalayer/bao/TaskManager.java | 51 +
.../datalayer/bao/jdbc/AbstractManagerImpl.java | 201 +
.../datalayer/bao/jdbc/AlertConfigManagerImpl.java | 100 +
.../bao/jdbc/AlertSnapshotManagerImpl.java | 33 +
.../bao/jdbc/AnomalyFunctionManagerImpl.java | 133 +
.../datalayer/bao/jdbc/ApplicationManagerImpl.java | 47 +
.../bao/jdbc/AutotuneConfigManagerImpl.java | 76 +
.../bao/jdbc/ClassificationConfigManagerImpl.java | 55 +
.../datalayer/bao/jdbc/ConfigManagerImpl.java | 63 +
.../jdbc/DataCompletenessConfigManagerImpl.java | 119 +
.../bao/jdbc/DatasetConfigManagerImpl.java | 63 +
.../bao/jdbc/DetectionAlertConfigManagerImpl.java | 33 +
.../bao/jdbc/DetectionConfigManagerImpl.java | 70 +
.../bao/jdbc/DetectionStatusManagerImpl.java | 67 +
.../bao/jdbc/EntityToEntityMappingManagerImpl.java | 82 +
.../datalayer/bao/jdbc/EventManagerImpl.java | 54 +
.../bao/jdbc/GroupedAnomalyResultsManagerImpl.java | 154 +
.../datalayer/bao/jdbc/JobManagerImpl.java | 162 +
.../bao/jdbc/MergedAnomalyResultManagerImpl.java | 505 +++
.../bao/jdbc/MetricConfigManagerImpl.java | 130 +
.../bao/jdbc/OnboardDatasetMetricManagerImpl.java | 102 +
.../bao/jdbc/OverrideConfigManagerImpl.java | 54 +
.../bao/jdbc/RawAnomalyResultManagerImpl.java | 116 +
.../bao/jdbc/RootcauseSessionManagerImpl.java | 116 +
.../datalayer/bao/jdbc/SessionManagerImpl.java | 44 +
.../datalayer/bao/jdbc/TaskManagerImpl.java | 190 +
.../thirdeye/datalayer/dao/GenericPojoDao.java | 831 ++++
.../pinot/thirdeye/datalayer/dto/AbstractDTO.java | 67 +
.../thirdeye/datalayer/dto/AlertConfigDTO.java | 26 +
.../thirdeye/datalayer/dto/AlertSnapshotDTO.java | 146 +
.../thirdeye/datalayer/dto/AnomalyFeedbackDTO.java | 47 +
.../thirdeye/datalayer/dto/AnomalyFunctionDTO.java | 108 +
.../thirdeye/datalayer/dto/ApplicationDTO.java | 25 +
.../thirdeye/datalayer/dto/AutotuneConfigDTO.java | 78 +
.../datalayer/dto/ClassificationConfigDTO.java | 25 +
.../pinot/thirdeye/datalayer/dto/ConfigDTO.java | 27 +
.../datalayer/dto/DataCompletenessConfigDTO.java | 26 +
.../thirdeye/datalayer/dto/DatasetConfigDTO.java | 49 +
.../datalayer/dto/DetectionAlertConfigDTO.java | 27 +
.../thirdeye/datalayer/dto/DetectionConfigDTO.java | 38 +
.../thirdeye/datalayer/dto/DetectionStatusDTO.java | 26 +
.../datalayer/dto/EntityToEntityMappingDTO.java | 26 +
.../pinot/thirdeye/datalayer/dto/EventDTO.java | 26 +
.../datalayer/dto/GroupedAnomalyResultsDTO.java | 57 +
.../pinot/thirdeye/datalayer/dto/JobDTO.java | 32 +
.../datalayer/dto/MergedAnomalyResultDTO.java | 114 +
.../thirdeye/datalayer/dto/MetricConfigDTO.java | 37 +
.../datalayer/dto/OnboardDatasetMetricDTO.java | 26 +
.../thirdeye/datalayer/dto/OverrideConfigDTO.java | 29 +
.../datalayer/dto/RawAnomalyResultDTO.java | 63 +
.../datalayer/dto/RootcauseSessionDTO.java | 27 +
.../pinot/thirdeye/datalayer/dto/SessionDTO.java | 28 +
.../pinot/thirdeye/datalayer/dto/TaskDTO.java | 33 +
.../thirdeye/datalayer/entity/AbstractEntity.java | 101 +
.../datalayer/entity/AbstractIndexEntity.java | 33 +
.../datalayer/entity/AbstractJsonEntity.java | 44 +
.../datalayer/entity/AbstractMappingEntity.java | 36 +
.../datalayer/entity/AlertConfigIndex.java | 50 +
.../datalayer/entity/AlertSnapshotIndex.java | 24 +
.../datalayer/entity/AnomalyFeedbackIndex.java | 32 +
.../datalayer/entity/AnomalyFunctionIndex.java | 68 +
.../datalayer/entity/ApplicationIndex.java | 42 +
.../datalayer/entity/AutotuneConfigIndex.java | 77 +
.../entity/ClassificationConfigIndex.java | 41 +
.../thirdeye/datalayer/entity/ConfigIndex.java | 41 +
.../entity/DataCompletenessConfigIndex.java | 69 +
.../datalayer/entity/DatasetConfigIndex.java | 46 +
.../entity/DetectionAlertConfigIndex.java | 43 +
.../datalayer/entity/DetectionConfigIndex.java | 32 +
.../datalayer/entity/DetectionStatusIndex.java | 79 +
.../entity/EntityToEntityMappingIndex.java | 47 +
.../thirdeye/datalayer/entity/EventIndex.java | 79 +
.../datalayer/entity/GenericJsonEntity.java | 24 +
.../entity/GroupedAnomalyResultsIndex.java | 52 +
.../pinot/thirdeye/datalayer/entity/JobIndex.java | 82 +
.../datalayer/entity/MergedAnomalyResultIndex.java | 118 +
.../datalayer/entity/MetricConfigIndex.java | 59 +
.../entity/OnboardDatasetMetricIndex.java | 53 +
.../datalayer/entity/OverrideConfigIndex.java | 59 +
.../datalayer/entity/RawAnomalyResultIndex.java | 98 +
.../datalayer/entity/RootcauseSessionIndex.java | 95 +
.../thirdeye/datalayer/entity/SessionIndex.java | 44 +
.../pinot/thirdeye/datalayer/entity/TaskIndex.java | 95 +
.../thirdeye/datalayer/pojo/AbstractBean.java | 29 +
.../thirdeye/datalayer/pojo/AlertConfigBean.java | 404 ++
.../thirdeye/datalayer/pojo/AlertSnapshotBean.java | 64 +
.../datalayer/pojo/AnomalyFeedbackBean.java | 67 +
.../datalayer/pojo/AnomalyFunctionBean.java | 375 ++
.../thirdeye/datalayer/pojo/ApplicationBean.java | 44 +
.../datalayer/pojo/AutotuneConfigBean.java | 174 +
.../datalayer/pojo/ClassificationConfigBean.java | 141 +
.../pinot/thirdeye/datalayer/pojo/ConfigBean.java | 82 +
.../datalayer/pojo/DataCompletenessConfigBean.java | 131 +
.../thirdeye/datalayer/pojo/DatasetConfigBean.java | 327 ++
.../datalayer/pojo/DetectionAlertConfigBean.java | 191 +
.../datalayer/pojo/DetectionConfigBean.java | 118 +
.../datalayer/pojo/DetectionStatusBean.java | 99 +
.../datalayer/pojo/EntityToEntityMappingBean.java | 80 +
.../pinot/thirdeye/datalayer/pojo/EventBean.java | 120 +
.../datalayer/pojo/GroupedAnomalyResultsBean.java | 73 +
.../pinot/thirdeye/datalayer/pojo/JobBean.java | 133 +
.../datalayer/pojo/MergedAnomalyResultBean.java | 280 ++
.../thirdeye/datalayer/pojo/MetricConfigBean.java | 277 ++
.../datalayer/pojo/OnboardDatasetMetricBean.java | 106 +
.../datalayer/pojo/OverrideConfigBean.java | 112 +
.../datalayer/pojo/RawAnomalyResultBean.java | 223 ++
.../datalayer/pojo/RootcauseSessionBean.java | 217 +
.../pinot/thirdeye/datalayer/pojo/SessionBean.java | 96 +
.../pinot/thirdeye/datalayer/pojo/TaskBean.java | 144 +
.../pinot/thirdeye/datalayer/util/BeanInfo.java | 33 +
.../thirdeye/datalayer/util/DaoProviderUtil.java | 241 ++
.../datalayer/util/EntityMappingHolder.java | 123 +
.../datalayer/util/GenericResultSetMapper.java | 216 +
.../pinot/thirdeye/datalayer/util/IndexInfo.java | 31 +
.../thirdeye/datalayer/util/ManagerProvider.java | 51 +
.../thirdeye/datalayer/util/PersistenceConfig.java | 95 +
.../pinot/thirdeye/datalayer/util/Predicate.java | 117 +
.../thirdeye/datalayer/util/SqlQueryBuilder.java | 503 +++
.../pinot/thirdeye/datalayer/util/StringUtils.java | 70 +
.../dataset/DatasetAutoOnboardResource.java | 63 +
.../thirdeye/datasource/BaseThirdEyeResponse.java | 93 +
.../pinot/thirdeye/datasource/DAORegistry.java | 192 +
.../thirdeye/datasource/DataSourceConfig.java | 68 +
.../pinot/thirdeye/datasource/DataSources.java | 47 +
.../thirdeye/datasource/DataSourcesLoader.java | 85 +
.../thirdeye/datasource/MetadataSourceConfig.java | 65 +
.../thirdeye/datasource/MetricExpression.java | 165 +
.../pinot/thirdeye/datasource/MetricFunction.java | 136 +
.../thirdeye/datasource/ResponseParserUtils.java | 164 +
.../thirdeye/datasource/ThirdEyeCacheRegistry.java | 180 +
.../thirdeye/datasource/ThirdEyeDataSource.java | 57 +
.../pinot/thirdeye/datasource/ThirdEyeRequest.java | 284 ++
.../thirdeye/datasource/ThirdEyeResponse.java | 50 +
.../thirdeye/datasource/ThirdEyeResponseRow.java | 49 +
.../pinot/thirdeye/datasource/TimeRangeUtils.java | 150 +
.../datasource/cache/DatasetConfigCacheLoader.java | 45 +
.../datasource/cache/DatasetListCache.java | 72 +
.../cache/DatasetMaxDataTimeCacheLoader.java | 91 +
.../cache/DimensionFiltersCacheLoader.java | 73 +
.../datasource/cache/MetricConfigCacheLoader.java | 48 +
.../thirdeye/datasource/cache/MetricDataset.java | 64 +
.../thirdeye/datasource/cache/QueryCache.java | 91 +
.../pinot/thirdeye/datasource/comparison/Row.java | 161 +
.../comparison/ThirdEyeRequestGenerator.java | 101 +
.../comparison/TimeOnTimeComparisonHandler.java | 180 +
.../comparison/TimeOnTimeComparisonRequest.java | 179 +
.../comparison/TimeOnTimeComparisonResponse.java | 77 +
.../datasource/comparison/TimeOnTimeConstants.java | 30 +
.../comparison/TimeOnTimeResponseParser.java | 375 ++
.../datasource/csv/CSVThirdEyeDataSource.java | 393 ++
.../datasource/csv/CSVThirdEyeResponse.java | 149 +
.../datasource/loader/AggregationLoader.java | 54 +
.../loader/DefaultAggregationLoader.java | 155 +
.../datasource/loader/DefaultTimeSeriesLoader.java | 62 +
.../datasource/loader/TimeSeriesLoader.java | 47 +
.../datasource/mock/AutoOnboardMockDataSource.java | 184 +
.../datasource/mock/MockThirdEyeDataSource.java | 444 ++
.../pinot/PinotControllerResponseCacheLoader.java | 210 +
.../pinot/PinotDataSourceDimensionFilters.java | 175 +
.../datasource/pinot/PinotDataSourceMaxTime.java | 118 +
.../thirdeye/datasource/pinot/PinotQuery.java | 67 +
.../datasource/pinot/PinotResponseCacheLoader.java | 35 +
.../datasource/pinot/PinotThirdEyeDataSource.java | 599 +++
.../pinot/PinotThirdEyeDataSourceConfig.java | 327 ++
.../datasource/pinot/PinotThirdEyeResponse.java | 104 +
.../pinot/PinotThirdeyeDataSourceProperties.java | 42 +
.../pinot/thirdeye/datasource/pinot/PqlUtils.java | 513 +++
.../pinot/resources/PinotDataSourceResource.java | 103 +
.../pinot/resultset/AbstractThirdEyeResultSet.java | 43 +
.../resultset/ThirdEyeDataFrameResultSet.java | 195 +
.../pinot/resultset/ThirdEyeResultSet.java | 53 +
.../resultset/ThirdEyeResultSetDeserializer.java | 75 +
.../pinot/resultset/ThirdEyeResultSetGroup.java | 89 +
.../pinot/resultset/ThirdEyeResultSetMetaData.java | 84 +
.../resultset/ThirdEyeResultSetSerializer.java | 78 +
.../AnomalyDetectionTimeSeriesResponseParser.java | 73 +
.../timeseries/BaseTimeSeriesResponseParser.java | 138 +
.../datasource/timeseries/TimeSeriesHandler.java | 211 +
.../datasource/timeseries/TimeSeriesRequest.java | 168 +
.../datasource/timeseries/TimeSeriesResponse.java | 80 +
.../timeseries/TimeSeriesResponseConverter.java | 143 +
.../timeseries/TimeSeriesResponseParser.java | 27 +
.../datasource/timeseries/TimeSeriesRow.java | 220 +
.../timeseries/UITimeSeriesResponseParser.java | 179 +
.../pinot/thirdeye/detection/ConfigUtils.java | 246 ++
.../detection/CurrentAndBaselineLoader.java | 141 +
.../pinot/thirdeye/detection/DataProvider.java | 151 +
.../thirdeye/detection/DefaultDataProvider.java | 255 ++
.../detection/DefaultInputDataFetcher.java | 90 +
.../detection/DetectionMigrationResource.java | 313 ++
.../thirdeye/detection/DetectionPipeline.java | 269 ++
.../thirdeye/detection/DetectionPipelineJob.java | 100 +
.../detection/DetectionPipelineLoader.java | 34 +
.../detection/DetectionPipelineResult.java | 81 +
.../detection/DetectionPipelineScheduler.java | 171 +
.../detection/DetectionPipelineTaskInfo.java | 63 +
.../detection/DetectionPipelineTaskRunner.java | 137 +
.../thirdeye/detection/DetectionResource.java | 430 ++
.../pinot/thirdeye/detection/DetectionUtils.java | 166 +
.../pinot/thirdeye/detection/InputDataFetcher.java | 35 +
.../detection/LegacyAnomalyFunctionTranslator.java | 94 +
.../apache/pinot/thirdeye/detection/Pattern.java | 29 +
.../detection/StaticDetectionPipeline.java | 91 +
.../pinot/thirdeye/detection/alert/AlertUtils.java | 138 +
.../detection/alert/DetectionAlertFilter.java | 63 +
.../alert/DetectionAlertFilterRecipients.java | 105 +
.../alert/DetectionAlertFilterResult.java | 94 +
.../detection/alert/DetectionAlertJob.java | 86 +
.../detection/alert/DetectionAlertScheduler.java | 185 +
.../detection/alert/DetectionAlertTaskFactory.java | 126 +
.../detection/alert/DetectionAlertTaskInfo.java | 46 +
.../detection/alert/DetectionAlertTaskRunner.java | 142 +
.../alert/StatefulDetectionAlertFilter.java | 97 +
.../filter/DimensionDetectionAlertFilter.java | 120 +
.../detection/alert/filter/LegacyAlertFilter.java | 141 +
.../ToAllRecipientsDetectionAlertFilter.java | 87 +
.../alert/scheme/DetectionAlertScheme.java | 37 +
.../alert/scheme/DetectionEmailAlerter.java | 191 +
.../alert/suppress/DetectionAlertSuppressor.java | 41 +
.../DetectionAlertTimeWindowSuppressor.java | 148 +
.../detection/algorithm/AlgorithmUtils.java | 381 ++
.../detection/algorithm/BaselineAlgorithm.java | 139 +
.../algorithm/BaselineRuleFilterWrapper.java | 117 +
.../detection/algorithm/DimensionWrapper.java | 242 ++
.../algorithm/LegacyAlertFilterWrapper.java | 130 +
.../algorithm/LegacyAnomalyFunctionAlgorithm.java | 191 +
.../algorithm/LegacyDimensionWrapper.java | 126 +
.../detection/algorithm/LegacyMergeWrapper.java | 403 ++
.../thirdeye/detection/algorithm/MergeWrapper.java | 242 ++
.../detection/algorithm/MovingWindowAlgorithm.java | 759 ++++
.../algorithm/RuleBasedFilterWrapper.java | 90 +
.../detection/algorithm/ThresholdAlgorithm.java | 93 +
.../algorithm/ThresholdRuleFilterWrapper.java | 73 +
.../algorithm/stage/AnomalyDetectionStage.java | 38 +
.../stage/AnomalyDetectionStageWrapper.java | 216 +
.../algorithm/stage/AnomalyFilterStage.java | 38 +
.../algorithm/stage/AnomalyFilterStageWrapper.java | 104 +
.../algorithm/stage/BaseDetectionStage.java | 36 +
.../stage/BaselineRuleDetectionStage.java | 144 +
.../algorithm/stage/BaselineRuleFilterStage.java | 119 +
.../detection/algorithm/stage/GrouperStage.java | 38 +
.../algorithm/stage/GrouperStageWrapper.java | 88 +
.../stage/StaticAnomalyDetectionStage.java | 169 +
.../algorithm/stage/StaticAnomalyFilterStage.java | 62 +
.../stage/ThresholdRuleDetectionStage.java | 107 +
.../algorithm/stage/ThresholdRuleFilterStage.java | 90 +
.../thirdeye/detection/annotation/AlertScheme.java | 35 +
.../detection/annotation/AlertSuppressor.java | 35 +
.../thirdeye/detection/annotation/Components.java | 52 +
.../annotation/DetectionConfigurationResource.java | 41 +
.../detection/annotation/DetectionTag.java | 27 +
.../pinot/thirdeye/detection/annotation/Param.java | 44 +
.../detection/annotation/PresentationOption.java | 39 +
.../pinot/thirdeye/detection/annotation/Tune.java | 39 +
.../pinot/thirdeye/detection/annotation/Yaml.java | 39 +
.../registry/DetectionAlertRegistry.java | 110 +
.../annotation/registry/DetectionRegistry.java | 151 +
.../AbsoluteChangeRuleAnomalyFilter.java | 100 +
.../components/AbsoluteChangeRuleDetector.java | 115 +
.../PercentageChangeRuleAnomalyFilter.java | 99 +
.../components/PercentageChangeRuleDetector.java | 124 +
.../detection/components/RuleBaselineProvider.java | 71 +
.../SitewideImpactRuleAnomalyFilter.java | 148 +
.../components/ThresholdRuleAnomalyFilter.java | 80 +
.../components/ThresholdRuleDetector.java | 95 +
.../detection/finetune/F1ScoreFunction.java | 72 +
.../finetune/GridSearchTuningAlgorithm.java | 189 +
.../thirdeye/detection/finetune/ScoreFunction.java | 29 +
.../finetune/TimeBucketF1ScoreFunction.java | 106 +
.../detection/finetune/TuningAlgorithm.java | 44 +
.../spec/AbsoluteChangeRuleAnomalyFilterSpec.java | 59 +
.../spec/AbsoluteChangeRuleDetectorSpec.java | 59 +
.../thirdeye/detection/spec/AbstractSpec.java | 36 +
.../PercentageChangeRuleAnomalyFilterSpec.java | 59 +
.../spec/PercentageChangeRuleDetectorSpec.java | 59 +
.../detection/spec/RuleBaselineProviderSpec.java | 53 +
.../spec/SitewideImpactRuleAnomalyFilterSpec.java | 91 +
.../detection/spec/ThresholdRuleDetectorSpec.java | 45 +
.../detection/spec/ThresholdRuleFilterSpec.java | 45 +
.../detection/spi/components/AnomalyDetector.java | 37 +
.../detection/spi/components/AnomalyFilter.java | 32 +
.../detection/spi/components/BaseComponent.java | 28 +
.../detection/spi/components/BaselineProvider.java | 47 +
.../thirdeye/detection/spi/components/Grouper.java | 37 +
.../thirdeye/detection/spi/components/Tunable.java | 39 +
.../thirdeye/detection/spi/model/AnomalySlice.java | 88 +
.../thirdeye/detection/spi/model/EventSlice.java | 90 +
.../thirdeye/detection/spi/model/InputData.java | 121 +
.../detection/spi/model/InputDataSpec.java | 180 +
.../thirdeye/detection/spi/model/TimeSeries.java | 67 +
.../detection/validators/ConfigValidator.java | 67 +
.../validators/DetectionAlertConfigValidator.java | 128 +
.../detection/wrapper/AnomalyDetectorWrapper.java | 246 ++
.../detection/wrapper/AnomalyFilterWrapper.java | 97 +
.../wrapper/BaselineFillingMergeWrapper.java | 187 +
.../wrapper/ChildKeepingMergeWrapper.java | 118 +
.../yaml/CompositePipelineConfigTranslator.java | 474 +++
.../yaml/YamlDetectionAlertConfigTranslator.java | 223 ++
.../yaml/YamlDetectionConfigTranslator.java | 102 +
.../yaml/YamlDetectionTranslatorLoader.java | 42 +
.../thirdeye/detection/yaml/YamlResource.java | 503 +++
.../detection/yaml/YamlTranslationResult.java | 87 +
.../detector/email/AnomalyGraphGenerator.java | 338 ++
.../detector/email/filter/AlertFilter.java | 33 +
.../detector/email/filter/AlertFilterFactory.java | 105 +
.../email/filter/AlphaBetaAlertFilter.java | 110 +
.../filter/AverageChangeThresholdAlertFilter.java | 57 +
.../detector/email/filter/BaseAlertFilter.java | 123 +
.../detector/email/filter/DummyAlertFilter.java | 49 +
.../email/filter/PrecisionRecallEvaluator.java | 283 ++
.../detector/email/filter/UserReportUtils.java | 64 +
.../email/filter/WeightThresholdAlertFilter.java | 89 +
.../detector/function/AnomalyFunction.java | 177 +
.../detector/function/AnomalyFunctionFactory.java | 111 +
.../detector/function/BaseAnomalyFunction.java | 197 +
.../detector/metric/transfer/MetricTransfer.java | 163 +
.../detector/metric/transfer/ScalingFactor.java | 65 +
.../apache/pinot/thirdeye/rootcause/Entity.java | 105 +
.../pinot/thirdeye/rootcause/MaxScoreSet.java | 185 +
.../apache/pinot/thirdeye/rootcause/Pipeline.java | 59 +
.../pinot/thirdeye/rootcause/PipelineCallable.java | 81 +
.../pinot/thirdeye/rootcause/PipelineContext.java | 66 +
.../pinot/thirdeye/rootcause/PipelineResult.java | 45 +
.../pinot/thirdeye/rootcause/RCAFramework.java | 180 +
.../rootcause/RCAFrameworkExecutionResult.java | 75 +
.../pinot/thirdeye/rootcause/StaticPipeline.java | 88 +
.../rootcause/callgraph/CallGraphEntity.java | 76 +
.../callgraph/CallGraphEntityFormatter.java | 56 +
.../rootcause/callgraph/CallGraphPipeline.java | 418 ++
.../rootcause/impl/AnomalyContextPipeline.java | 147 +
.../rootcause/impl/AnomalyEventEntity.java | 62 +
.../rootcause/impl/AnomalyEventsPipeline.java | 243 ++
.../thirdeye/rootcause/impl/DatasetEntity.java | 77 +
.../rootcause/impl/DimensionAnalysisPipeline.java | 318 ++
.../thirdeye/rootcause/impl/DimensionEntity.java | 129 +
.../thirdeye/rootcause/impl/DimensionsEntity.java | 81 +
.../thirdeye/rootcause/impl/EmptyPipeline.java | 61 +
.../rootcause/impl/EntityMappingPipeline.java | 324 ++
.../pinot/thirdeye/rootcause/impl/EntityType.java | 92 +
.../pinot/thirdeye/rootcause/impl/EventEntity.java | 69 +
.../thirdeye/rootcause/impl/HyperlinkEntity.java | 56 +
.../rootcause/impl/LinearAggregationPipeline.java | 142 +
.../rootcause/impl/MaxAggregationPipeline.java | 76 +
.../rootcause/impl/MetricAnalysisPipeline.java | 371 ++
.../rootcause/impl/MetricAnalysisPipeline2.java | 429 ++
.../rootcause/impl/MetricBreakdownPipeline.java | 292 ++
.../impl/MetricComponentAnalysisPipeline.java | 343 ++
.../impl/MetricCorrelationRankingPipeline.java | 408 ++
.../rootcause/impl/MetricDatasetPipeline.java | 183 +
.../rootcause/impl/MetricDimensionPipeline.java | 72 +
.../thirdeye/rootcause/impl/MetricEntity.java | 105 +
.../rootcause/impl/MetricMappingPipeline.java | 292 ++
.../rootcause/impl/NormalizationPipeline.java | 61 +
.../thirdeye/rootcause/impl/NullPipeline.java | 61 +
.../rootcause/impl/PipelineConfiguration.java | 62 +
.../thirdeye/rootcause/impl/RCAConfiguration.java | 41 +
.../rootcause/impl/RCAFrameworkLoader.java | 115 +
.../rootcause/impl/RCAFrameworkRunner.java | 334 ++
.../thirdeye/rootcause/impl/ServiceEntity.java | 72 +
.../rootcause/impl/ThirdEyeEventEntity.java | 63 +
.../rootcause/impl/ThirdEyeEventsPipeline.java | 251 ++
.../thirdeye/rootcause/impl/TimeRangeEntity.java | 125 +
.../thirdeye/rootcause/impl/TopKPipeline.java | 85 +
.../thirdeye/rootcause/timeseries/Baseline.java | 62 +
.../rootcause/timeseries/BaselineAggregate.java | 553 +++
.../timeseries/BaselineAggregateType.java | 50 +
.../rootcause/timeseries/BaselineNone.java | 45 +
.../rootcause/timeseries/BaselineOffset.java | 86 +
.../pinot/thirdeye/rootcause/util/EntityUtils.java | 554 +++
.../thirdeye/rootcause/util/FilterPredicate.java | 54 +
.../pinot/thirdeye/rootcause/util/ParsedUrn.java | 110 +
.../pinot/thirdeye/rootcause/util/ScoreUtils.java | 244 ++
.../apache/pinot/thirdeye/tracking/RequestLog.java | 233 ++
.../pinot/thirdeye/tracking/RequestLogEntry.java | 78 +
.../pinot/thirdeye/tracking/RequestStatistics.java | 212 +
.../tracking/RequestStatisticsFormatter.java | 133 +
.../thirdeye/tracking/RequestStatisticsLogger.java | 65 +
.../apache/pinot/thirdeye/util/AnomalyOffset.java | 45 +
.../thirdeye/util/CustomDateDeserializer.java | 40 +
.../pinot/thirdeye/util/CustomDateSerializer.java | 39 +
.../thirdeye/util/CustomListDateDeserializer.java | 47 +
.../thirdeye/util/CustomListDateSerializer.java | 41 +
.../apache/pinot/thirdeye/util/IntervalUtils.java | 87 +
.../pinot/thirdeye/util/JodaDateTimeUtils.java | 51 +
.../pinot/thirdeye/util/JsonResponseUtil.java | 65 +
.../apache/pinot/thirdeye/util/NumberUtils.java | 178 +
.../thirdeye/util/SeverityComputationUtil.java | 118 +
.../apache/pinot/thirdeye/util/ThirdEyeUtils.java | 523 +++
.../pinot/thirdeye/util/TimeSeriesUtils.java | 96 +
.../datasource/mock/data-sources-config.yml | 191 -
.../dashboard/views/admin/dataset-config.ftl | 0
.../thirdeye/dashboard/views/admin/job-info.ftl | 0
.../dashboard/views/admin/metric-config.ftl | 0
.../dashboard/views/admin/thirdeye-admin.ftl | 0
.../pinot}/thirdeye/dashboard/views/dashboard.ftl | 0
.../pinot}/thirdeye/dashboard/views/head.ftl | 0
.../thirdeye/dashboard/views/tabs/analysis.ftl | 0
.../views/tabs/analysis/analysis-options.ftl | 0
.../tabs/analysis/dimension-tree-map-graph.ftl | 0
.../views/tabs/analysis/dimension-tree-map.ftl | 0
.../tabs/analysis/percentage-change-table.ftl | 0
.../views/tabs/analysis/rootcause-table.ftl | 0
.../views/tabs/analysis/timeseries-contributor.ftl | 0
.../analysis/timeseries-subdimension-legend.ftl | 0
.../views/tabs/anomalies-tab/anomalies.ftl | 0
.../views/tabs/anomalies-tab/anomaly-details.ftl | 0
.../thirdeye/dashboard/views/tabs/anomalies.ftl | 0
.../views/tabs/anomaly-filters-wrapper.ftl | 0
.../dashboard/views/tabs/anomaly-filters.ftl | 0
.../dashboard/views/tabs/anomaly-results.ftl | 0
.../dashboard/views/tabs/common/chart-area.ftl | 0
.../dashboard/views/tabs/common/dataset-list.ftl | 0
.../dashboard/views/tabs/common/filter-value.ftl | 0
.../thirdeye/dashboard/views/tabs/common/form.ftl | 0
.../dashboard/views/tabs/common/metric-list.ftl | 0
.../views/tabs/compare-tab/contributors.ftl | 0
.../views/tabs/compare-tab/difference-summary.ftl | 0
.../views/tabs/compare-tab/heat-map-summary.ftl | 0
.../dashboard/views/tabs/compare-tab/heat-map.ftl | 0
.../views/tabs/compare-tab/metric-timeseries.ftl | 0
.../dashboard/views/tabs/compare-tab/tabular.ftl | 0
.../thirdeye/dashboard/views/tabs/dashboard.ftl | 0
.../views/tabs/dashboard/add-metrics-modal.ftl | 0
.../tabs/dashboard/anomaly-summary-dashboard.ftl | 0
.../tabs/dashboard/manage-dashboard-modal.ftl | 0
.../tabs/dashboard/metric-summary-dashboard.ftl | 0
.../views/tabs/dashboard/new-dashboard-modal.ftl | 0
.../views/tabs/dashboard/wow-summary-dashboard.ftl | 0
.../thirdeye/dashboard/views/tabs/investigate.ftl | 0
.../self-service-tab/anomaly-function-form.ftl | 0
.../tabs/self-service-tab/self-service-email.ftl | 0
.../self-service-existing-functions.ftl | 0
.../views/tabs/self-service-tab/self-service.ftl | 0
.../pinot}/thirdeye/dashboard/views/tabs/tab.ftl | 0
.../views/tabs/timeseries-tab/timeseries.ftl | 0
.../pinot}/thirdeye/dashboard/views/thirdeye.ftl | 0
.../datasource/mock/data-sources-config.yml | 191 +
.../pinot}/thirdeye/detector/anomaly-report-v2.ftl | 0
.../pinot}/thirdeye/detector/anomaly-report.ftl | 0
.../thirdeye/detector/custom-anomaly-report.ftl | 0
.../detector/data-report-by-metric-dimension.ftl | 0
.../hierarchical-anomalies-email-template.ftl | 0
.../thirdeye/detector/holiday-anomaly-report.ftl | 0
.../apache/pinot}/thirdeye/detector/lib/utils.ftl | 0
.../detector/multiple-anomalies-email-template.ftl | 0
.../onboard-notification-email-template.ftl | 0
.../detector/single-anomaly-email-template.ftl | 0
.../src/main/resources/sample-rca-config.yml | 40 +-
.../java/com/linkedin/thirdeye/TestListener.java | 61 -
.../alert/commons/TestAnomalyFeedFactory.java | 32 -
.../alert/commons/TestAnomalyFetcherFactory.java | 38 -
.../commons/TestEmailContentFormatterFactory.java | 39 -
...HierarchicalAnomaliesEmailContentFormatter.java | 175 -
...TestMultipleAnomaliesEmailContentFormatter.java | 156 -
...TestOnboardingNotificationContentFormatter.java | 151 -
.../thirdeye/alert/feed/TestUnionAnomalyFeed.java | 104 -
.../alert/fetcher/TestContinuumAnomalyFetcher.java | 86 -
.../fetcher/TestUnnotifiedAnomalyFetcher.java | 81 -
.../anomaly/alert/grouping/AlertGroupKeyTest.java | 43 -
.../alert/grouping/AlertGrouperFactoryTest.java | 38 -
.../grouping/DimensionalAlertGrouperTest.java | 169 -
.../alert/grouping/DummyAlertGrouperTest.java | 50 -
.../HorizontalDimensionalAlertGrouperTest.java | 165 -
...AlertGroupAuxiliaryInfoProviderFactoryTest.java | 40 -
...alAlertGroupAuxiliaryRecipientProviderTest.java | 104 -
.../filter/SizeSeverityAlertGroupFilterTest.java | 56 -
.../anomaly/detection/DetectionTaskRunnerTest.java | 113 -
.../detection/TestDetectionJobSchedulerUtils.java | 357 --
.../events/CustomizedEventResourceTest.java | 51 -
.../anomaly/events/HolidayEventsLoaderTest.java | 127 -
.../thirdeye/anomaly/events/MockEventsManager.java | 92 -
.../merge/TestAnomalyTimeBasedSummarizer.java | 101 -
.../onboard/DetectionOnBoardJobRunnerTest.java | 344 --
.../onboard/DetectionOnboardResourceTest.java | 97 -
.../anomaly/onboard/OnboardingTaskTestUtils.java | 72 -
.../anomaly/onboard/tasks/TestOnboardingTasks.java | 128 -
.../anomaly/override/TestOverrideConfigHelper.java | 104 -
.../thirdeye/anomaly/utils/TestEmailUtils.java | 52 -
.../views/TestCondensedAnomalyTimelinesView.java | 107 -
.../datafilter/AverageThresholdDataFilterTest.java | 229 --
.../datafilter/DataFilterFactoryTest.java | 40 -
.../function/BackwardAnomalyFunctionUtilsTest.java | 96 -
.../function/TestAnomalyFunctionFactory.java | 67 -
.../function/TestBackwardAnoamlyFunctionUtils.java | 64 -
.../function/TestMinMaxThresholdFunction.java | 206 -
.../function/TestWeekOverWeekRuleFunction.java | 468 ---
.../transform/testAnomalyRemovalFunction.java | 51 -
.../linkedin/thirdeye/api/DimensionMapTest.java | 132 -
.../thirdeye/api/MetricTimeSeriesTest.java | 308 --
.../auth/ThirdEyeAuthenticatorLdapTest.java | 132 -
.../onboard/AutoOnboardAnotherDummyDataSource.java | 25 -
.../AutoOnboardAnotherRandomDataSource.java | 26 -
.../auto/onboard/AutoOnboardDummyDataSource.java | 25 -
.../AutoOnboardPinotMetricsServiceTest.java | 165 -
.../auto/onboard/AutoOnboardServiceTest.java | 48 -
.../auto/onboard/AutoOnboardUtilityTest.java | 73 -
.../thirdeye/client/diffsummary/CubeTest.java | 197 -
.../diffsummary/DimNameValueCostEntryTest.java | 37 -
.../client/diffsummary/DimensionValuesTest.java | 89 -
.../client/diffsummary/DimensionsTest.java | 107 -
.../client/diffsummary/HierarchyNodeTest.java | 182 -
.../MultiDimensionalSummaryCLIToolTest.java | 58 -
.../costfunctions/BalancedCostFunctionTest.java | 39 -
.../thirdeye/common/ThirdEyeConfigurationTest.java | 57 -
.../checker/DataCompletenessUtilsTest.java | 232 --
.../config/ConfigNamespaceIntegrationTest.java | 116 -
.../thirdeye/config/ConfigNamespaceTest.java | 134 -
.../com/linkedin/thirdeye/dashboard/UtilsTest.java | 56 -
.../dashboard/handler/ContributorTest.java | 70 -
.../thirdeye/dashboard/handler/HeatMapTest.java | 71 -
.../thirdeye/dashboard/handler/TabularTest.java | 70 -
.../dashboard/resource/AnomalyResourceTest.java | 135 -
.../dashboard/resource/OnboardResourceTest.java | 100 -
.../resource/v2/UserDashboardResourceTest.java | 177 -
.../thirdeye/dataframe/DataFrameBenchmark.java | 1198 ------
.../dataframe/DataFrameSerializerTest.java | 131 -
.../linkedin/thirdeye/dataframe/DataFrameTest.java | 4236 --------------------
.../dataframe/PrimitiveMultimapBenchmark.java | 472 ---
.../thirdeye/dataframe/PrimitiveMultimapTest.java | 231 --
.../thirdeye/datalayer/DAOProviderUtilTest.java | 47 -
.../linkedin/thirdeye/datalayer/DaoTestUtils.java | 352 --
.../thirdeye/datalayer/bao/DAOTestBase.java | 113 -
.../datalayer/bao/TestAlertConfigManager.java | 281 --
.../datalayer/bao/TestAlertSnapshotManager.java | 70 -
.../datalayer/bao/TestAnomalyFunctionManager.java | 120 -
.../datalayer/bao/TestAnomalyJobManager.java | 125 -
.../datalayer/bao/TestAnomalyTaskManager.java | 189 -
.../datalayer/bao/TestApplicationManager.java | 67 -
.../datalayer/bao/TestAutotuneConfigManager.java | 91 -
.../bao/TestClassificationJobConfigManager.java | 108 -
.../thirdeye/datalayer/bao/TestConfigManager.java | 102 -
.../bao/TestDataCompletenessConfigManager.java | 118 -
.../datalayer/bao/TestDatasetConfigManager.java | 99 -
.../datalayer/bao/TestDetectionStatusManager.java | 100 -
.../bao/TestEntityToEntityMappingManager.java | 116 -
.../thirdeye/datalayer/bao/TestEventManager.java | 84 -
.../bao/TestGroupedAnomalyResultsManager.java | 137 -
.../bao/TestMergedAnomalyResultManager.java | 207 -
.../datalayer/bao/TestMetricConfigManager.java | 134 -
.../bao/TestOnboardDatasetMetricManager.java | 108 -
.../datalayer/bao/TestOverrideConfigManager.java | 126 -
.../datalayer/bao/TestRawAnomalyResultManager.java | 102 -
.../datalayer/bao/TestRootcauseSessionManager.java | 259 --
.../datalayer/dto/DatasetConfigDTOTest.java | 57 -
.../thirdeye/datalayer/util/StringUtilsTest.java | 18 -
.../thirdeye/datasource/TestTimeRangeUtils.java | 64 -
.../datasource/comparison/TimeOnTimeTest.java | 148 -
.../csv/CSVThirdEyeDataSourceIntegrationTest.java | 92 -
.../datasource/csv/CSVThirdEyeDataSourceTest.java | 376 --
.../datasource/csv/CSVThirdEyeResponseTest.java | 84 -
.../MockThirdEyeDataSourceIntegrationTest.java | 205 -
.../mock/MockThirdEyeDataSourceTest.java | 157 -
.../pinot/PinotThirdEyeDataSourceConfigTest.java | 122 -
.../pinot/PinotThirdEyeDataSourceTest.java | 72 -
.../thirdeye/datasource/pinot/PqlUtilsTest.java | 208 -
.../resultset/ThirdEyeDataFrameResultSetTest.java | 397 --
.../ThirdEyeResultSetDeserializerTest.java | 112 -
.../resultset/ThirdEyeResultSetSerializerTest.java | 115 -
...omalyDetectionTimeSeriesResponseParserTest.java | 160 -
.../BaseTimeSeriesResponseParserTest.java | 286 --
.../timeseries/TestTimeSeriesResponseUtils.java | 243 --
.../datasource/timeseries/TimeSeriesTest.java | 143 -
.../thirdeye/detection/ConfigUtilsTest.java | 130 -
.../detection/CurrentAndBaselineLoaderTest.java | 141 -
.../thirdeye/detection/DataProviderTest.java | 393 --
.../detection/DefaultInputDataFetcherTest.java | 73 -
.../detection/DetectionPipelineTaskRunnerTest.java | 141 -
.../thirdeye/detection/DetectionTestUtils.java | 91 -
.../thirdeye/detection/MockDataProvider.java | 298 --
.../linkedin/thirdeye/detection/MockPipeline.java | 53 -
.../thirdeye/detection/MockPipelineLoader.java | 47 -
.../thirdeye/detection/MockPipelineOutput.java | 53 -
.../alert/DetectionAlertTaskFactoryTest.java | 128 -
.../thirdeye/detection/alert/SendAlertTest.java | 177 -
.../filter/DimensionDetectionAlertFilterTest.java | 203 -
.../alert/filter/LegacyAlertFilterTest.java | 110 -
.../ToAllRecipientsDetectionAlertFilterTest.java | 210 -
.../alert/scheme/AnotherRandomAlerter.java | 18 -
.../detection/alert/scheme/RandomAlerter.java | 18 -
.../DetectionTimeWindowSuppressorTest.java | 157 -
.../detection/algorithm/AlgorithmUtilsTest.java | 177 -
.../detection/algorithm/BaselineAlgorithmTest.java | 139 -
.../detection/algorithm/DimensionWrapperTest.java | 250 --
.../algorithm/LegacyAlertFilterWrapperTest.java | 109 -
.../LegacyAnomalyFunctionAlgorithmTest.java | 98 -
.../algorithm/LegacyDimensionWrapperTest.java | 154 -
.../algorithm/LegacyMergeWrapperTest.java | 137 -
.../detection/algorithm/MergeWrapperTest.java | 270 --
.../algorithm/MovingWindowAlgorithmTest.java | 414 --
.../algorithm/ThresholdAlgorithmTest.java | 89 -
.../stage/AnomalyDetectionStageWrapperTest.java | 87 -
.../stage/BaselineRuleDetectionStageTest.java | 141 -
.../algorithm/stage/BaselineRuleFilterTest.java | 173 -
.../stage/ThresholdRuleDetectionStageTest.java | 93 -
.../algorithm/stage/ThresholdRuleFilterTest.java | 167 -
.../AbsoluteChangeRuleAnomalyFilterTest.java | 90 -
.../components/AbsoluteChangeRuleDetectorTest.java | 94 -
.../detection/components/MockBaselineProvider.java | 45 -
.../PercentageChangeRuleAnomalyFilterTest.java | 89 -
.../PercentageChangeRuleDetectorTest.java | 151 -
.../components/RuleBaselineProviderTest.java | 89 -
.../SitewideImpactRuleAnomalyFilterTest.java | 105 -
.../components/ThresholdRuleAnomalyFilterTest.java | 170 -
.../components/ThresholdRuleDetectorTest.java | 97 -
.../finetune/GridSearchTuningAlgorithmTest.java | 134 -
.../LegacyEmulationIntegrationTest.java | 148 -
.../MergeDimensionThresholdIntegrationTest.java | 152 -
.../thirdeye/detection/spec/AbstractSpecTest.java | 69 -
.../detection/spec/MockBaselineProviderSpec.java | 43 -
.../linkedin/thirdeye/detection/spec/TestSpec.java | 70 -
.../wrapper/AnomalyDetectorWrapperTest.java | 107 -
.../wrapper/BaselineFillingMergeWrapperTest.java | 136 -
.../wrapper/ChildKeepingMergeWrapperTest.java | 266 --
.../CompositePipelineConfigTranslatorTest.java | 76 -
.../yaml/MockYamlDetectionConfigTranslator.java | 19 -
.../YamlDetectionAlertConfigTranslatorTest.java | 145 -
.../yaml/YamlDetectionConfigTranslatorTest.java | 52 -
.../thirdeye/detection/yaml/YamlResourceTest.java | 140 -
.../email/filter/TestAlertFilterFactory.java | 53 -
.../detector/email/filter/TestBaseAlertFilter.java | 57 -
.../email/filter/TestPrecisionRecallEvaluator.java | 161 -
.../detector/email/filter/TestUserReportUtils.java | 123 -
.../metric/transfer/testMetricTransfer.java | 79 -
.../thirdeye/eventprovider/TestEventFilter.java | 117 -
.../eventprovider/TestHolidayEventProvider.java | 124 -
.../AnomalyApplicationEndToEndTest.java | 435 --
.../thirdeye/rootcause/AbstractMockManager.java | 104 -
.../rootcause/EntityMappingPipelineTest.java | 237 --
.../thirdeye/rootcause/MaxScoreSetTest.java | 68 -
.../rootcause/MockDatasetConfigManager.java | 85 -
.../MockEntityToEntityMappingManager.java | 103 -
.../rootcause/MockMetricConfigManager.java | 97 -
.../thirdeye/rootcause/RCAFrameworkTest.java | 145 -
.../rootcause/impl/DimensionEntityTest.java | 54 -
.../rootcause/impl/DimensionsEntityTest.java | 91 -
.../thirdeye/rootcause/impl/EntityTypeTest.java | 71 -
.../thirdeye/rootcause/impl/MetricEntityTest.java | 129 -
.../rootcause/impl/MetricMappingPipelineTest.java | 178 -
.../rootcause/impl/RCAFrameworkLoaderTest.java | 47 -
.../rootcause/timeseries/BaselineTest.java | 283 --
.../thirdeye/rootcause/util/EntityUtilsTest.java | 219 -
.../thirdeye/rootcause/util/ScoreUtilsTest.java | 88 -
.../thirdeye/tools/AutoTuneAlertFilterTool.java | 224 --
.../tools/CleanupAndRegenerateAnomaliesConfig.java | 88 -
.../tools/CleanupAndRegenerateAnomaliesTool.java | 262 --
.../thirdeye/tools/DashboardHttpUtils.java | 71 -
.../tools/FetchAnomaliesInRangeAndOutputCSV.java | 126 -
.../thirdeye/tools/FetchAutoTuneResult.java | 166 -
.../FetchMetricDataAndExistingAnomaliesTool.java | 228 --
.../tools/FetchMetricDataInRangeAndOutputCSV.java | 147 -
.../tools/RunAdhocDatabaseQueriesTool.java | 447 ---
.../tools/anomaly/report/AnomalyReportConfig.java | 86 -
.../tools/anomaly/report/AnomalyReportDriver.java | 117 -
.../anomaly/report/GenerateAnomalyReport.java | 348 --
.../linkedin/thirdeye/tracking/RequestLogTest.java | 100 -
.../linkedin/thirdeye/util/ThirdEyeUtilsTest.java | 176 -
.../org/apache/pinot/thirdeye/TestListener.java | 61 +
.../alert/commons/TestAnomalyFeedFactory.java | 32 +
.../alert/commons/TestAnomalyFetcherFactory.java | 38 +
.../commons/TestEmailContentFormatterFactory.java | 39 +
...HierarchicalAnomaliesEmailContentFormatter.java | 175 +
...TestMultipleAnomaliesEmailContentFormatter.java | 156 +
...TestOnboardingNotificationContentFormatter.java | 151 +
.../thirdeye/alert/feed/TestUnionAnomalyFeed.java | 104 +
.../alert/fetcher/TestContinuumAnomalyFetcher.java | 86 +
.../fetcher/TestUnnotifiedAnomalyFetcher.java | 81 +
.../anomaly/alert/grouping/AlertGroupKeyTest.java | 43 +
.../alert/grouping/AlertGrouperFactoryTest.java | 38 +
.../grouping/DimensionalAlertGrouperTest.java | 169 +
.../alert/grouping/DummyAlertGrouperTest.java | 50 +
.../HorizontalDimensionalAlertGrouperTest.java | 165 +
...AlertGroupAuxiliaryInfoProviderFactoryTest.java | 40 +
...alAlertGroupAuxiliaryRecipientProviderTest.java | 104 +
.../filter/SizeSeverityAlertGroupFilterTest.java | 56 +
.../anomaly/detection/DetectionTaskRunnerTest.java | 113 +
.../detection/TestDetectionJobSchedulerUtils.java | 357 ++
.../events/CustomizedEventResourceTest.java | 51 +
.../anomaly/events/HolidayEventsLoaderTest.java | 127 +
.../thirdeye/anomaly/events/MockEventsManager.java | 92 +
.../merge/TestAnomalyTimeBasedSummarizer.java | 101 +
.../onboard/DetectionOnBoardJobRunnerTest.java | 344 ++
.../onboard/DetectionOnboardResourceTest.java | 97 +
.../anomaly/onboard/OnboardingTaskTestUtils.java | 72 +
.../anomaly/onboard/tasks/TestOnboardingTasks.java | 128 +
.../anomaly/override/TestOverrideConfigHelper.java | 104 +
.../thirdeye/anomaly/utils/TestEmailUtils.java | 52 +
.../views/TestCondensedAnomalyTimelinesView.java | 107 +
.../datafilter/AverageThresholdDataFilterTest.java | 229 ++
.../datafilter/DataFilterFactoryTest.java | 40 +
.../function/BackwardAnomalyFunctionUtilsTest.java | 96 +
.../function/TestAnomalyFunctionFactory.java | 67 +
.../function/TestBackwardAnoamlyFunctionUtils.java | 64 +
.../function/TestMinMaxThresholdFunction.java | 206 +
.../function/TestWeekOverWeekRuleFunction.java | 468 +++
.../transform/testAnomalyRemovalFunction.java | 51 +
.../pinot/thirdeye/api/DimensionMapTest.java | 132 +
.../pinot/thirdeye/api/MetricTimeSeriesTest.java | 308 ++
.../auth/ThirdEyeAuthenticatorLdapTest.java | 132 +
.../onboard/AutoOnboardAnotherDummyDataSource.java | 25 +
.../AutoOnboardAnotherRandomDataSource.java | 26 +
.../auto/onboard/AutoOnboardDummyDataSource.java | 25 +
.../AutoOnboardPinotMetricsServiceTest.java | 165 +
.../auto/onboard/AutoOnboardServiceTest.java | 48 +
.../auto/onboard/AutoOnboardUtilityTest.java | 73 +
.../thirdeye/client/diffsummary/CubeTest.java | 197 +
.../diffsummary/DimNameValueCostEntryTest.java | 37 +
.../client/diffsummary/DimensionValuesTest.java | 89 +
.../client/diffsummary/DimensionsTest.java | 107 +
.../client/diffsummary/HierarchyNodeTest.java | 182 +
.../MultiDimensionalSummaryCLIToolTest.java | 58 +
.../costfunctions/BalancedCostFunctionTest.java | 39 +
.../thirdeye/common/ThirdEyeConfigurationTest.java | 57 +
.../checker/DataCompletenessUtilsTest.java | 232 ++
.../config/ConfigNamespaceIntegrationTest.java | 116 +
.../pinot/thirdeye/config/ConfigNamespaceTest.java | 134 +
.../apache/pinot/thirdeye/dashboard/UtilsTest.java | 56 +
.../dashboard/handler/ContributorTest.java | 70 +
.../thirdeye/dashboard/handler/HeatMapTest.java | 71 +
.../thirdeye/dashboard/handler/TabularTest.java | 70 +
.../dashboard/resource/AnomalyResourceTest.java | 135 +
.../dashboard/resource/OnboardResourceTest.java | 100 +
.../resource/v2/UserDashboardResourceTest.java | 177 +
.../thirdeye/dataframe/DataFrameBenchmark.java | 1198 ++++++
.../dataframe/DataFrameSerializerTest.java | 131 +
.../pinot/thirdeye/dataframe/DataFrameTest.java | 4236 ++++++++++++++++++++
.../dataframe/PrimitiveMultimapBenchmark.java | 472 +++
.../thirdeye/dataframe/PrimitiveMultimapTest.java | 231 ++
.../thirdeye/datalayer/DAOProviderUtilTest.java | 47 +
.../pinot/thirdeye/datalayer/DaoTestUtils.java | 352 ++
.../pinot/thirdeye/datalayer/bao/DAOTestBase.java | 113 +
.../datalayer/bao/TestAlertConfigManager.java | 281 ++
.../datalayer/bao/TestAlertSnapshotManager.java | 70 +
.../datalayer/bao/TestAnomalyFunctionManager.java | 120 +
.../datalayer/bao/TestAnomalyJobManager.java | 125 +
.../datalayer/bao/TestAnomalyTaskManager.java | 189 +
.../datalayer/bao/TestApplicationManager.java | 67 +
.../datalayer/bao/TestAutotuneConfigManager.java | 91 +
.../bao/TestClassificationJobConfigManager.java | 108 +
.../thirdeye/datalayer/bao/TestConfigManager.java | 102 +
.../bao/TestDataCompletenessConfigManager.java | 118 +
.../datalayer/bao/TestDatasetConfigManager.java | 99 +
.../datalayer/bao/TestDetectionStatusManager.java | 100 +
.../bao/TestEntityToEntityMappingManager.java | 116 +
.../thirdeye/datalayer/bao/TestEventManager.java | 84 +
.../bao/TestGroupedAnomalyResultsManager.java | 137 +
.../bao/TestMergedAnomalyResultManager.java | 207 +
.../datalayer/bao/TestMetricConfigManager.java | 134 +
.../bao/TestOnboardDatasetMetricManager.java | 108 +
.../datalayer/bao/TestOverrideConfigManager.java | 126 +
.../datalayer/bao/TestRawAnomalyResultManager.java | 102 +
.../datalayer/bao/TestRootcauseSessionManager.java | 259 ++
.../datalayer/dto/DatasetConfigDTOTest.java | 57 +
.../thirdeye/datalayer/util/StringUtilsTest.java | 18 +
.../thirdeye/datasource/TestTimeRangeUtils.java | 64 +
.../datasource/comparison/TimeOnTimeTest.java | 148 +
.../csv/CSVThirdEyeDataSourceIntegrationTest.java | 92 +
.../datasource/csv/CSVThirdEyeDataSourceTest.java | 376 ++
.../datasource/csv/CSVThirdEyeResponseTest.java | 84 +
.../MockThirdEyeDataSourceIntegrationTest.java | 205 +
.../mock/MockThirdEyeDataSourceTest.java | 157 +
.../pinot/PinotThirdEyeDataSourceConfigTest.java | 122 +
.../pinot/PinotThirdEyeDataSourceTest.java | 72 +
.../thirdeye/datasource/pinot/PqlUtilsTest.java | 208 +
.../resultset/ThirdEyeDataFrameResultSetTest.java | 397 ++
.../ThirdEyeResultSetDeserializerTest.java | 112 +
.../resultset/ThirdEyeResultSetSerializerTest.java | 115 +
...omalyDetectionTimeSeriesResponseParserTest.java | 160 +
.../BaseTimeSeriesResponseParserTest.java | 286 ++
.../timeseries/TestTimeSeriesResponseUtils.java | 243 ++
.../datasource/timeseries/TimeSeriesTest.java | 143 +
.../pinot/thirdeye/detection/ConfigUtilsTest.java | 130 +
.../detection/CurrentAndBaselineLoaderTest.java | 141 +
.../pinot/thirdeye/detection/DataProviderTest.java | 393 ++
.../detection/DefaultInputDataFetcherTest.java | 73 +
.../detection/DetectionPipelineTaskRunnerTest.java | 141 +
.../thirdeye/detection/DetectionTestUtils.java | 91 +
.../pinot/thirdeye/detection/MockDataProvider.java | 298 ++
.../pinot/thirdeye/detection/MockPipeline.java | 53 +
.../thirdeye/detection/MockPipelineLoader.java | 47 +
.../thirdeye/detection/MockPipelineOutput.java | 53 +
.../alert/DetectionAlertTaskFactoryTest.java | 128 +
.../thirdeye/detection/alert/SendAlertTest.java | 177 +
.../filter/DimensionDetectionAlertFilterTest.java | 203 +
.../alert/filter/LegacyAlertFilterTest.java | 110 +
.../ToAllRecipientsDetectionAlertFilterTest.java | 210 +
.../alert/scheme/AnotherRandomAlerter.java | 18 +
.../detection/alert/scheme/RandomAlerter.java | 18 +
.../DetectionTimeWindowSuppressorTest.java | 157 +
.../detection/algorithm/AlgorithmUtilsTest.java | 177 +
.../detection/algorithm/BaselineAlgorithmTest.java | 139 +
.../detection/algorithm/DimensionWrapperTest.java | 250 ++
.../algorithm/LegacyAlertFilterWrapperTest.java | 109 +
.../LegacyAnomalyFunctionAlgorithmTest.java | 98 +
.../algorithm/LegacyDimensionWrapperTest.java | 154 +
.../algorithm/LegacyMergeWrapperTest.java | 137 +
.../detection/algorithm/MergeWrapperTest.java | 270 ++
.../algorithm/MovingWindowAlgorithmTest.java | 414 ++
.../algorithm/ThresholdAlgorithmTest.java | 89 +
.../stage/AnomalyDetectionStageWrapperTest.java | 87 +
.../stage/BaselineRuleDetectionStageTest.java | 141 +
.../algorithm/stage/BaselineRuleFilterTest.java | 173 +
.../stage/ThresholdRuleDetectionStageTest.java | 93 +
.../algorithm/stage/ThresholdRuleFilterTest.java | 167 +
.../AbsoluteChangeRuleAnomalyFilterTest.java | 90 +
.../components/AbsoluteChangeRuleDetectorTest.java | 94 +
.../detection/components/MockBaselineProvider.java | 45 +
.../PercentageChangeRuleAnomalyFilterTest.java | 89 +
.../PercentageChangeRuleDetectorTest.java | 151 +
.../components/RuleBaselineProviderTest.java | 89 +
.../SitewideImpactRuleAnomalyFilterTest.java | 105 +
.../components/ThresholdRuleAnomalyFilterTest.java | 170 +
.../components/ThresholdRuleDetectorTest.java | 97 +
.../finetune/GridSearchTuningAlgorithmTest.java | 134 +
.../LegacyEmulationIntegrationTest.java | 148 +
.../MergeDimensionThresholdIntegrationTest.java | 152 +
.../thirdeye/detection/spec/AbstractSpecTest.java | 69 +
.../detection/spec/MockBaselineProviderSpec.java | 43 +
.../pinot/thirdeye/detection/spec/TestSpec.java | 70 +
.../wrapper/AnomalyDetectorWrapperTest.java | 107 +
.../wrapper/BaselineFillingMergeWrapperTest.java | 136 +
.../wrapper/ChildKeepingMergeWrapperTest.java | 266 ++
.../CompositePipelineConfigTranslatorTest.java | 76 +
.../yaml/MockYamlDetectionConfigTranslator.java | 19 +
.../YamlDetectionAlertConfigTranslatorTest.java | 145 +
.../yaml/YamlDetectionConfigTranslatorTest.java | 52 +
.../thirdeye/detection/yaml/YamlResourceTest.java | 140 +
.../email/filter/TestAlertFilterFactory.java | 53 +
.../detector/email/filter/TestBaseAlertFilter.java | 57 +
.../email/filter/TestPrecisionRecallEvaluator.java | 161 +
.../detector/email/filter/TestUserReportUtils.java | 123 +
.../metric/transfer/testMetricTransfer.java | 79 +
.../thirdeye/eventprovider/TestEventFilter.java | 117 +
.../eventprovider/TestHolidayEventProvider.java | 124 +
.../AnomalyApplicationEndToEndTest.java | 435 ++
.../thirdeye/rootcause/AbstractMockManager.java | 104 +
.../rootcause/EntityMappingPipelineTest.java | 237 ++
.../pinot/thirdeye/rootcause/MaxScoreSetTest.java | 68 +
.../rootcause/MockDatasetConfigManager.java | 85 +
.../MockEntityToEntityMappingManager.java | 103 +
.../rootcause/MockMetricConfigManager.java | 97 +
.../pinot/thirdeye/rootcause/RCAFrameworkTest.java | 145 +
.../rootcause/impl/DimensionEntityTest.java | 54 +
.../rootcause/impl/DimensionsEntityTest.java | 91 +
.../thirdeye/rootcause/impl/EntityTypeTest.java | 71 +
.../thirdeye/rootcause/impl/MetricEntityTest.java | 129 +
.../rootcause/impl/MetricMappingPipelineTest.java | 178 +
.../rootcause/impl/RCAFrameworkLoaderTest.java | 47 +
.../rootcause/timeseries/BaselineTest.java | 283 ++
.../thirdeye/rootcause/util/EntityUtilsTest.java | 219 +
.../thirdeye/rootcause/util/ScoreUtilsTest.java | 88 +
.../thirdeye/tools/AutoTuneAlertFilterTool.java | 224 ++
.../tools/CleanupAndRegenerateAnomaliesConfig.java | 88 +
.../tools/CleanupAndRegenerateAnomaliesTool.java | 262 ++
.../pinot/thirdeye/tools/DashboardHttpUtils.java | 71 +
.../tools/FetchAnomaliesInRangeAndOutputCSV.java | 126 +
.../pinot/thirdeye/tools/FetchAutoTuneResult.java | 166 +
.../FetchMetricDataAndExistingAnomaliesTool.java | 228 ++
.../tools/FetchMetricDataInRangeAndOutputCSV.java | 147 +
.../tools/RunAdhocDatabaseQueriesTool.java | 447 +++
.../tools/anomaly/report/AnomalyReportConfig.java | 86 +
.../tools/anomaly/report/AnomalyReportDriver.java | 117 +
.../anomaly/report/GenerateAnomalyReport.java | 348 ++
.../pinot/thirdeye/tracking/RequestLogTest.java | 100 +
.../pinot/thirdeye/util/ThirdEyeUtilsTest.java | 176 +
.../datasource/csv/data-sources-config.yml | 4 -
.../mergeDimensionThresholdProperties.json | 31 -
.../compositePipelineTranslatorTestResult-1.json | 76 -
.../compositePipelineTranslatorTestResult-2.json | 31 -
.../data-sources/data-sources-config-1.yml | 10 +-
.../data-sources/data-sources-config-2.yml | 4 +-
.../apache/pinot}/thirdeye/dataframe/test.csv | 0
.../pinot}/thirdeye/datasource/csv/business.csv | 0
.../datasource/csv/data-sources-config.yml | 4 +
.../thirdeye/detection/algorithm/timeseries-4w.csv | 0
.../mergeDimensionThresholdProperties.json | 31 +
.../thirdeye/detection/integration/timeseries.csv | 0
.../detection/yaml/alertconfig/alert-config-1.yaml | 0
.../detection/yaml/alertconfig/alert-config-2.yaml | 0
.../detection/yaml/alertconfig/alert-config-3.yaml | 0
.../detection/yaml/alertconfig/alert-config-4.yaml | 0
.../compositePipelineTranslatorTestResult-1.json | 76 +
.../compositePipelineTranslatorTestResult-2.json | 31 +
.../thirdeye/detection/yaml/pipeline-config-1.yaml | 0
.../thirdeye/detection/yaml/pipeline-config-2.yaml | 0
.../sample-alertfilter-autotune.properties | 2 +-
.../test/resources/sample-alertfilter.properties | 4 +-
.../test/resources/sample-classifier.properties | 2 +-
.../src/test/resources/sample-functions.properties | 4 +-
2227 files changed, 152975 insertions(+), 152962 deletions(-)
diff --git a/pom.xml b/pom.xml
index 16cb23c..a28205b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1045,6 +1045,7 @@
<exclude>thirdeye/**/*</exclude>
<!-- Documentation files -->
<exclude>**/docs/_build/**</exclude>
+ <exclude>**/?*</exclude>
</excludes>
<mapping>
<thrift>JAVADOC_STYLE</thrift>
diff --git a/thirdeye/pom.xml b/thirdeye/pom.xml
index 7a0a2c8..d8f8080 100644
--- a/thirdeye/pom.xml
+++ b/thirdeye/pom.xml
@@ -20,7 +20,7 @@
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
- <groupId>com.linkedin.thirdeye</groupId>
+ <groupId>org.apache.pinot.thirdeye</groupId>
<artifactId>thirdeye</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>pom</packaging>
@@ -80,6 +80,7 @@
<kafka.clients.version>0.10.2.1</kafka.clients.version>
<swagger.version>0.7.0</swagger.version>
<calendar-api.version>v3-rev300-1.23.0</calendar-api.version>
+ <json.version>20170516</json.version>
</properties>
<licenses>
@@ -124,7 +125,7 @@
</developers>
<scm>
- <developerConnection>scm:git:git://git@github.com/linkedin/pinot.git</developerConnection>
+ <developerConnection>scm:git:git://git@github.com/apache/incubator-pinot.git</developerConnection>
</scm>
<inceptionYear>2014-2018</inceptionYear>
@@ -158,17 +159,17 @@
<dependencies>
<!-- project dependencies -->
<dependency>
- <groupId>com.linkedin.pinot</groupId>
+ <groupId>org.apache.pinot</groupId>
<artifactId>pinot-api</artifactId>
<version>${pinot.version}</version>
</dependency>
<dependency>
- <groupId>com.linkedin.pinot</groupId>
+ <groupId>org.apache.pinot</groupId>
<artifactId>pinot-common</artifactId>
<version>${pinot.version}</version>
</dependency>
<dependency>
- <groupId>com.linkedin.pinot</groupId>
+ <groupId>org.apache.pinot</groupId>
<artifactId>pinot-core</artifactId>
<version>${pinot.version}</version>
</dependency>
@@ -461,6 +462,12 @@
<artifactId>hsqldb</artifactId>
<version>${hsqldb.version}</version>
</dependency>
+ <dependency>
+ <!-- TODO: replace code dependencies and remove -->
+ <groupId>org.json</groupId>
+ <artifactId>json</artifactId>
+ <version>${json.version}</version>
+ </dependency>
<!-- swagger -->
<dependency>
diff --git a/thirdeye/thirdeye-frontend/app/pods/auto-onboard/controller.js b/thirdeye/thirdeye-frontend/app/pods/auto-onboard/controller.js
index a2b6bef..4f0657f 100644
--- a/thirdeye/thirdeye-frontend/app/pods/auto-onboard/controller.js
+++ b/thirdeye/thirdeye-frontend/app/pods/auto-onboard/controller.js
@@ -208,8 +208,8 @@ export default Controller.extend({
return;
}
const detectionConfig = {
- className: 'com.linkedin.thirdeye.detection.algorithm.DimensionWrapper', nested: [{
- className: 'com.linkedin.thirdeye.detection.algorithm.MovingWindowAlgorithm',
+ className: 'org.apache.pinot.thirdeye.detection.algorithm.DimensionWrapper', nested: [{
+ className: 'org.apache.pinot.thirdeye.detection.algorithm.MovingWindowAlgorithm',
baselineWeeks: 4,
windowSize: '4 weeks',
changeDuration: '7d',
@@ -242,7 +242,7 @@ export default Controller.extend({
const configResult = {
"cron": "45 10/15 * * * ? *", "name": get(this, 'detectionConfigName'), "lastTimestamp": 0, "properties": {
- "className": "com.linkedin.thirdeye.detection.algorithm.MergeWrapper",
+ "className": "org.apache.pinot.thirdeye.detection.algorithm.MergeWrapper",
"maxGap": 7200000,
"nested": nestedProperties,
"datasetName": get(this, 'datasetName')
diff --git a/thirdeye/thirdeye-frontend/pom.xml b/thirdeye/thirdeye-frontend/pom.xml
index 2068731..c8ac79c 100644
--- a/thirdeye/thirdeye-frontend/pom.xml
+++ b/thirdeye/thirdeye-frontend/pom.xml
@@ -4,11 +4,11 @@
xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<modelVersion>4.0.0</modelVersion>
<parent>
- <groupId>com.linkedin.thirdeye</groupId>
+ <groupId>org.apache.pinot.thirdeye</groupId>
<artifactId>thirdeye</artifactId>
<version>1.0-SNAPSHOT</version>
</parent>
- <groupId>com.linkedin.thirdeye</groupId>
+ <groupId>org.apache.pinot.thirdeye</groupId>
<artifactId>thirdeye-frontend</artifactId>
<name>thirdeye-frontend</name>
<url>http://maven.apache.org</url>
diff --git a/thirdeye/thirdeye-hadoop/pom.xml b/thirdeye/thirdeye-hadoop/pom.xml
index 66642a7..1c3561b 100644
--- a/thirdeye/thirdeye-hadoop/pom.xml
+++ b/thirdeye/thirdeye-hadoop/pom.xml
@@ -4,7 +4,7 @@
xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<modelVersion>4.0.0</modelVersion>
<parent>
- <groupId>com.linkedin.thirdeye</groupId>
+ <groupId>org.apache.pinot.thirdeye</groupId>
<artifactId>thirdeye</artifactId>
<version>1.0-SNAPSHOT</version>
</parent>
@@ -18,7 +18,7 @@
<dependencies>
<dependency>
- <groupId>com.linkedin.pinot</groupId>
+ <groupId>org.apache.pinot</groupId>
<artifactId>pinot-core</artifactId>
<exclusions>
<exclusion>
@@ -138,7 +138,7 @@
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
- <mainClass>com.linkedin.thirdeye.hadoop.ThirdEyeJob</mainClass>
+ <mainClass>org.apache.pinot.thirdeye.hadoop.ThirdEyeJob</mainClass>
</transformer>
</transformers>
<relocations>
@@ -146,7 +146,7 @@
<pattern>com.</pattern>
<shadedPattern>thirdeye.com.</shadedPattern>
<excludes>
- <exclude>com.linkedin.thirdeye.**</exclude>
+ <exclude>org.apache.pinot.thirdeye.**</exclude>
</excludes>
</relocation>
<relocation>
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/ThirdEyeJob.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/ThirdEyeJob.java
deleted file mode 100644
index 5f496de..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/ThirdEyeJob.java
+++ /dev/null
@@ -1,455 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.lang.reflect.Constructor;
-import java.lang.reflect.Method;
-import java.util.Properties;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.JobStatus;
-import org.joda.time.DateTime;
-import org.joda.time.format.ISODateTimeFormat;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.linkedin.thirdeye.hadoop.aggregation.AggregationPhaseConstants;
-import com.linkedin.thirdeye.hadoop.aggregation.AggregationPhaseJob;
-import com.linkedin.thirdeye.hadoop.backfill.BackfillPhaseConstants;
-import com.linkedin.thirdeye.hadoop.backfill.BackfillPhaseJob;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConstants;
-import com.linkedin.thirdeye.hadoop.derivedcolumn.transformation.DerivedColumnTransformationPhaseConstants;
-import com.linkedin.thirdeye.hadoop.derivedcolumn.transformation.DerivedColumnTransformationPhaseJob;
-import com.linkedin.thirdeye.hadoop.join.JoinPhaseJob;
-import com.linkedin.thirdeye.hadoop.push.SegmentPushPhase;
-import com.linkedin.thirdeye.hadoop.push.SegmentPushPhaseConstants;
-import com.linkedin.thirdeye.hadoop.segment.creation.SegmentCreationPhaseConstants;
-import com.linkedin.thirdeye.hadoop.segment.creation.SegmentCreationPhaseJob;
-import com.linkedin.thirdeye.hadoop.topk.TopKPhaseConstants;
-import com.linkedin.thirdeye.hadoop.topk.TopKPhaseJob;
-import com.linkedin.thirdeye.hadoop.transform.TransformPhaseJob;
-import com.linkedin.thirdeye.hadoop.wait.WaitPhaseJob;
-
-/**
- * Wrapper to manage segment create and segment push jobs for thirdeye
- */
-public class ThirdEyeJob {
- private static final Logger LOGGER = LoggerFactory.getLogger(ThirdEyeJob.class);
-
- private static final String USAGE = "usage: phase_name job.properties";
-
- private final String phaseName;
- private final Properties inputConfig;
-
- public ThirdEyeJob(String jobName, Properties config) {
- String phaseFromConfig = config.getProperty(ThirdEyeJobProperties.THIRDEYE_PHASE.getName());
- if (phaseFromConfig != null) {
- this.phaseName = phaseFromConfig;
- } else {
- this.phaseName = jobName;
- }
- this.inputConfig = config;
- }
-
- private enum PhaseSpec {
-
- BACKFILL {
- @Override
- Class<?> getKlazz() {
- return BackfillPhaseJob.class;
- }
-
- @Override
- String getDescription() {
- return "Backfills older pinot segments with star tree index and topk information";
- }
-
- @Override
- Properties getJobProperties(Properties inputConfig, String root, String collection,
- DateTime minTime, DateTime maxTime, String inputPaths)
- throws Exception {
- Properties config = new Properties();
-
- config.setProperty(BackfillPhaseConstants.BACKFILL_PHASE_CONTROLLER_HOST.toString(),
- inputConfig.getProperty(ThirdEyeJobProperties.THIRDEYE_PINOT_CONTROLLER_HOSTS.getName()));
- config.setProperty(BackfillPhaseConstants.BACKFILL_PHASE_CONTROLLER_PORT.toString(),
- inputConfig.getProperty(ThirdEyeJobProperties.THIRDEYE_PINOT_CONTROLLER_PORT.getName()));
-
- config.setProperty(BackfillPhaseConstants.BACKFILL_PHASE_START_TIME.toString(),
- inputConfig.getProperty(ThirdEyeJobProperties.THIRDEYE_BACKFILL_START_TIME.getName()));
- config.setProperty(BackfillPhaseConstants.BACKFILL_PHASE_END_TIME.toString(),
- inputConfig.getProperty(ThirdEyeJobProperties.THIRDEYE_BACKFILL_END_TIME.getName()));
-
- config.setProperty(BackfillPhaseConstants.BACKFILL_PHASE_OUTPUT_PATH.toString(),
- getIndexDir(root, collection, minTime, maxTime) + File.separator + BACKFILL.getName());
- config.setProperty(BackfillPhaseConstants.BACKFILL_PHASE_TABLE_NAME.toString(), collection);
-
- return config;
- }
- },
- WAIT {
- @Override
- Class<?> getKlazz() {
- return null;
- }
-
- @Override
- String getDescription() {
- return "Polls a pre-determined amount of time for the existence of input paths";
- }
-
- @Override
- Properties getJobProperties(Properties inputConfig, String root, String collection,
- DateTime minTime, DateTime maxTime, String inputPaths)
- throws Exception {
- return null;
- }
- },
- JOIN {
- @Override
- Class<?> getKlazz() {
- return JoinPhaseJob.class;
- }
-
- @Override
- String getDescription() {
- return "Joins multiple data sets based on join key";
- }
-
- @Override
- Properties getJobProperties(Properties inputConfig, String root, String collection,
- DateTime minTime, DateTime maxTime, String inputPaths) {
- return inputConfig;
- }
- },
- TRANSFORM {
- @Override
- Class<?> getKlazz() {
- return TransformPhaseJob.class;
- }
-
- @Override
- String getDescription() {
- return "Transforms avro record";
- }
-
- @Override
- Properties getJobProperties(Properties inputConfig, String root, String collection,
- DateTime minTime, DateTime maxTime, String inputPaths) {
- return inputConfig;
- }
- },
- AGGREGATION {
- @Override
- Class<?> getKlazz() {
- return AggregationPhaseJob.class;
- }
-
- @Override
- String getDescription() {
- return "Aggregates input avro data to another time granularity";
- }
-
- @Override
- Properties getJobProperties(Properties inputConfig, String root, String collection,
- DateTime minTime, DateTime maxTime, String inputPaths)
- throws Exception {
- Properties config = new Properties();
-
- config.setProperty(AggregationPhaseConstants.AGG_PHASE_INPUT_PATH.toString(),
- inputPaths);
- config.setProperty(AggregationPhaseConstants.AGG_PHASE_OUTPUT_PATH.toString(),
- getIndexDir(root, collection, minTime, maxTime) + File.separator
- + AGGREGATION.getName());
-
- return config;
- }
- },
- TOPK {
- @Override
- Class<?> getKlazz() {
- return TopKPhaseJob.class;
- }
-
- @Override
- String getDescription() {
- return "Topk";
- }
-
- @Override
- Properties getJobProperties(Properties inputConfig, String root, String collection,
- DateTime minTime, DateTime maxTime, String inputPaths)
- throws Exception {
- Properties config = new Properties();
-
- Path aggOutputPath = new Path(getIndexDir(root, collection, minTime, maxTime) + File.separator + AGGREGATION.getName());
- FileSystem fs = FileSystem.get(new Configuration());
- if (fs.exists(aggOutputPath)) {
- inputPaths = aggOutputPath.toString();
- }
- config.setProperty(TopKPhaseConstants.TOPK_PHASE_INPUT_PATH.toString(),
- inputPaths);
- config.setProperty(TopKPhaseConstants.TOPK_PHASE_OUTPUT_PATH.toString(),
- getIndexDir(root, collection, minTime, maxTime) + File.separator
- + TOPK.getName());
-
- return config;
- }
- },
- DERIVED_COLUMN_TRANSFORMATION {
- @Override
- Class<?> getKlazz() {
- return DerivedColumnTransformationPhaseJob.class;
- }
-
- @Override
- String getDescription() {
- return "Adds new columns for dimensions with topk or whitelist";
- }
-
- @Override
- Properties getJobProperties(Properties inputConfig, String root, String collection,
- DateTime minTime, DateTime maxTime, String inputPaths)
- throws Exception {
- Properties config = new Properties();
-
- Path aggOutputPath = new Path(getIndexDir(root, collection, minTime, maxTime) + File.separator + AGGREGATION.getName());
- FileSystem fs = FileSystem.get(new Configuration());
- if (fs.exists(aggOutputPath)) {
- inputPaths = aggOutputPath.toString();
- }
- config.setProperty(DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_INPUT_PATH.toString(),
- inputPaths);
- config.setProperty(DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_SCHEMA.toString(),
- getIndexDir(root, collection, minTime, maxTime));
- config.setProperty(DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_PATH.toString(),
- getIndexDir(root, collection, minTime, maxTime) + File.separator
- + DERIVED_COLUMN_TRANSFORMATION.getName());
- config.setProperty(DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_TOPK_PATH.toString(),
- getIndexDir(root, collection, minTime, maxTime) + File.separator + TOPK.getName());
-
- return config;
- }
- },
- SEGMENT_CREATION {
- @Override
- Class<?> getKlazz() {
- return SegmentCreationPhaseJob.class;
- }
-
- @Override
- String getDescription() {
- return "Generates pinot segments";
- }
-
- @Override
- Properties getJobProperties(Properties inputConfig, String root, String collection,
- DateTime minTime, DateTime maxTime, String inputPaths)
- throws Exception {
- Properties config = new Properties();
-
- Path derivedOutputPath = new Path(getIndexDir(root, collection, minTime, maxTime) + File.separator +
- DERIVED_COLUMN_TRANSFORMATION.getName());
- Path aggregationOutputPath = new Path(getIndexDir(root, collection, minTime, maxTime) + File.separator +
- AGGREGATION.getName());
- FileSystem fs = FileSystem.get(new Configuration());
- if (fs.exists(derivedOutputPath)) {
- inputPaths = derivedOutputPath.toString();
- } else if (fs.exists(aggregationOutputPath)) {
- inputPaths = aggregationOutputPath.toString();
- }
-
- config.setProperty(SegmentCreationPhaseConstants.SEGMENT_CREATION_INPUT_PATH.toString(), inputPaths);
- config.setProperty(SegmentCreationPhaseConstants.SEGMENT_CREATION_OUTPUT_PATH.toString(),
- getIndexDir(root, collection, minTime, maxTime) + File.separator + SEGMENT_CREATION.getName());
- config.setProperty(SegmentCreationPhaseConstants.SEGMENT_CREATION_WALLCLOCK_START_TIME.toString(),
- String.valueOf(minTime.getMillis()));
- config.setProperty(SegmentCreationPhaseConstants.SEGMENT_CREATION_WALLCLOCK_END_TIME.toString(),
- String.valueOf(maxTime.getMillis()));
-
- String schedule = inputConfig.getProperty(ThirdEyeJobProperties.THIRDEYE_FLOW_SCHEDULE.getName());
- config.setProperty(SegmentCreationPhaseConstants.SEGMENT_CREATION_SCHEDULE.toString(), schedule);
- return config;
- }
- },
- SEGMENT_PUSH {
- @Override
- Class<?> getKlazz() {
- return SegmentPushPhase.class;
- }
-
- @Override
- String getDescription() {
- return "Pushes pinot segments to pinot controller";
- }
-
- @Override
- Properties getJobProperties(Properties inputConfig, String root, String collection,
- DateTime minTime, DateTime maxTime, String inputPaths)
- throws Exception {
- Properties config = new Properties();
-
- config.setProperty(SegmentPushPhaseConstants.SEGMENT_PUSH_INPUT_PATH.toString(),
- getIndexDir(root, collection, minTime, maxTime) + File.separator + SEGMENT_CREATION.getName());
- config.setProperty(SegmentPushPhaseConstants.SEGMENT_PUSH_CONTROLLER_HOSTS.toString(),
- inputConfig.getProperty(ThirdEyeJobProperties.THIRDEYE_PINOT_CONTROLLER_HOSTS.getName()));
- config.setProperty(SegmentPushPhaseConstants.SEGMENT_PUSH_CONTROLLER_PORT.toString(),
- inputConfig.getProperty(ThirdEyeJobProperties.THIRDEYE_PINOT_CONTROLLER_PORT.getName()));
- return config;
- }
- };
-
- abstract Class<?> getKlazz();
-
- abstract String getDescription();
-
- abstract Properties getJobProperties(Properties inputConfig, String root, String collection,
- DateTime minTime, DateTime maxTime, String inputPaths) throws Exception;
-
- String getName() {
- return this.name().toLowerCase();
- }
-
- String getIndexDir(String root, String collection, DateTime minTime,
- DateTime maxTime) throws IOException {
- return getCollectionDir(root, collection) + File.separator
- + "data_" + ThirdEyeConstants.DATE_TIME_FORMATTER.print(minTime) + "_"
- + ThirdEyeConstants.DATE_TIME_FORMATTER.print(maxTime);
- }
-
- }
-
- private static void usage() {
- System.err.println(USAGE);
- for (PhaseSpec phase : PhaseSpec.values()) {
- System.err.printf("%-30s : %s\n", phase.getName(), phase.getDescription());
- }
- }
-
- private static String getAndCheck(String name, Properties properties) {
- String value = properties.getProperty(name);
- if (value == null) {
- throw new IllegalArgumentException("Must provide " + name);
- }
- return value;
- }
-
-
- private static String getCollectionDir(String root, String collection) {
- return root == null ? collection : root + File.separator + collection;
- }
-
- private void setMapreduceConfig(Configuration configuration) {
- String mapreduceConfig =
- inputConfig.getProperty(ThirdEyeJobProperties.THIRDEYE_MR_CONF.getName());
- if (mapreduceConfig != null && !mapreduceConfig.isEmpty()) {
- String[] options = mapreduceConfig.split(",");
- for (String option : options) {
- String[] configs = option.split("=", 2);
- if (configs.length == 2) {
- LOGGER.info("Setting job configuration {} to {}", configs[0], configs[1]);
- configuration.set(configs[0], configs[1]);
- }
- }
- }
- }
-
- @SuppressWarnings("unchecked")
- public void run() throws Exception {
- LOGGER.info("Input config:{}", inputConfig);
- PhaseSpec phaseSpec;
- try {
- phaseSpec = PhaseSpec.valueOf(phaseName.toUpperCase());
- } catch (Exception e) {
- usage();
- throw e;
- }
-
- if (PhaseSpec.TRANSFORM.equals(phaseSpec)) {
- TransformPhaseJob job = new TransformPhaseJob("Transform Job", inputConfig);
- job.run();
- return;
-
- } else if (PhaseSpec.JOIN.equals(phaseSpec)) {
- JoinPhaseJob job = new JoinPhaseJob("Join Job", inputConfig);
- job.run();
- return;
-
- } else if (PhaseSpec.WAIT.equals(phaseSpec)) {
- WaitPhaseJob job = new WaitPhaseJob("Wait for inputs", inputConfig);
- job.run();
- return;
- }
-
- // Get root, collection, input paths
- String root = getAndCheck(ThirdEyeJobProperties.THIRDEYE_ROOT.getName(), inputConfig);
- String collection =
- getAndCheck(ThirdEyeJobProperties.THIRDEYE_COLLECTION.getName(), inputConfig);
- String inputPaths = getAndCheck(ThirdEyeJobProperties.INPUT_PATHS.getName(), inputConfig);
-
- // Get min / max time
- DateTime minTime;
- DateTime maxTime;
-
- String minTimeProp = inputConfig.getProperty(ThirdEyeJobProperties.THIRDEYE_TIME_MIN.getName());
- String maxTimeProp = inputConfig.getProperty(ThirdEyeJobProperties.THIRDEYE_TIME_MAX.getName());
-
- minTime = ISODateTimeFormat.dateTimeParser().parseDateTime(minTimeProp);
- maxTime = ISODateTimeFormat.dateTimeParser().parseDateTime(maxTimeProp);
-
- Properties jobProperties = phaseSpec.getJobProperties(inputConfig, root, collection,
- minTime, maxTime, inputPaths);
- for (Object key : inputConfig.keySet()) {
- jobProperties.setProperty(key.toString(), inputConfig.getProperty(key.toString()));
- }
-
- // Instantiate the job
- Constructor<Configured> constructor = (Constructor<Configured>) phaseSpec.getKlazz()
- .getConstructor(String.class, Properties.class);
- Configured instance = constructor.newInstance(phaseSpec.getName(), jobProperties);
- setMapreduceConfig(instance.getConf());
-
- // Run the job
- Method runMethod = instance.getClass().getMethod("run");
- Job job = (Job) runMethod.invoke(instance);
- if (job != null) {
- JobStatus status = job.getStatus();
- if (status.getState() != JobStatus.State.SUCCEEDED) {
- throw new RuntimeException(
- "Job " + job.getJobName() + " failed to execute: Ran with config:" + jobProperties);
- }
- }
- }
-
- public static void main(String[] args) throws Exception {
- if (args.length != 2) {
- usage();
- System.exit(1);
- }
-
- String phaseName = args[0];
- Properties config = new Properties();
- config.load(new FileInputStream(args[1]));
- new ThirdEyeJob(phaseName, config).run();
- }
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/ThirdEyeJobProperties.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/ThirdEyeJobProperties.java
deleted file mode 100644
index a002c4a..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/ThirdEyeJobProperties.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop;
-
-public enum ThirdEyeJobProperties {
- THIRDEYE_FLOW_SCHEDULE("thirdeye.flow.schedule"), // HOURLY, DAILY
- THIRDEYE_PHASE("thirdeye.phase"), // segment_creation, segment_push
- THIRDEYE_ROOT("thirdeye.root"),
- THIRDEYE_COLLECTION("thirdeye.collection"),
- THIRDEYE_TIME_MIN("thirdeye.time.min"), // YYYY-mm-ddThh
- THIRDEYE_TIME_MAX("thirdeye.time.max"),
- INPUT_PATHS("input.paths"),
- THIRDEYE_MR_CONF("thirdeye.mr.conf"),
- THIRDEYE_PINOT_CONTROLLER_HOSTS("thirdeye.pinot.controller.hosts"),
- THIRDEYE_PINOT_CONTROLLER_PORT("thirdeye.pinot.controller.port"),
- THIRDEYE_BACKFILL_START_TIME("thirdeye.backfill.start.time"),
- THIRDEYE_BACKFILL_END_TIME("thirdeye.backfill.end.time"),
- THIRDEYE_NUM_REDUCERS("thirdeye.num.reducers");
-
- private final String propertyName;
-
- ThirdEyeJobProperties(String propertyName) {
- this.propertyName = propertyName;
- }
-
- public String getName() {
- return propertyName;
- }
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseConfig.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseConfig.java
deleted file mode 100644
index aa92df9..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseConfig.java
+++ /dev/null
@@ -1,109 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.aggregation;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import com.linkedin.thirdeye.hadoop.config.DimensionSpec;
-import com.linkedin.thirdeye.hadoop.config.DimensionType;
-import com.linkedin.thirdeye.hadoop.config.MetricSpec;
-import com.linkedin.thirdeye.hadoop.config.MetricType;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfig;
-import com.linkedin.thirdeye.hadoop.config.TimeSpec;
-
-/**
- * This class contains the config needed by aggregation
- * and the methods to obtain the config from the ThirdEyeConfig
- */
-public class AggregationPhaseConfig {
- private List<String> dimensionNames;
- private List<DimensionType> dimensionTypes;
- private List<String> metricNames;
- private List<MetricType> metricTypes;
- private TimeSpec time;
- private TimeSpec inputTime;
-
- public AggregationPhaseConfig() {
-
- }
-
- public AggregationPhaseConfig(List<String> dimensionNames, List<String> metricNames,
- List<DimensionType> dimensionTypes, List<MetricType> metricTypes, TimeSpec time, TimeSpec inputTime) {
- super();
- this.dimensionNames = dimensionNames;
- this.dimensionTypes = dimensionTypes;
- this.metricNames = metricNames;
- this.metricTypes = metricTypes;
- this.time = time;
- this.inputTime = inputTime;
- }
-
- public List<String> getDimensionNames() {
- return dimensionNames;
- }
-
- public List<DimensionType> getDimensionTypes() {
- return dimensionTypes;
- }
-
- public List<String> getMetricNames() {
- return metricNames;
- }
-
- public List<MetricType> getMetricTypes() {
- return metricTypes;
- }
-
- public TimeSpec getTime() {
- return time;
- }
-
- public TimeSpec getInputTime() {
- return inputTime;
- }
-
- public static AggregationPhaseConfig fromThirdEyeConfig(ThirdEyeConfig config) {
-
- // metrics
- List<String> metricNames = new ArrayList<>(config.getMetrics().size());
- List<MetricType> metricTypes = new ArrayList<>(config.getMetrics().size());
- for (MetricSpec spec : config.getMetrics()) {
- metricNames.add(spec.getName());
- metricTypes.add(spec.getType());
- }
-
- // dimensions
- List<String> dimensionNames = new ArrayList<>(config.getDimensions().size());
- List<DimensionType> dimensionTypes = new ArrayList<>(config.getDimensions().size());
- for (DimensionSpec spec : config.getDimensions()) {
- dimensionNames.add(spec.getName());
- dimensionTypes.add(spec.getDimensionType());
- }
-
- // time
- TimeSpec time = config.getTime();
-
- // input time
- TimeSpec inputTime = config.getInputTime();
- if (inputTime == null) {
- throw new IllegalStateException("Must provide input time configs for aggregation job");
- }
-
- return new AggregationPhaseConfig(dimensionNames, metricNames, dimensionTypes, metricTypes, time, inputTime);
- }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseConstants.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseConstants.java
deleted file mode 100644
index 58de4e7..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseConstants.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.aggregation;
-/**
- * This class contains the properties to be set for aggregation phase
- */
-public enum AggregationPhaseConstants {
-
- AGG_PHASE_INPUT_PATH("aggregation.phase.input.path"),
- AGG_PHASE_AVRO_SCHEMA("aggregation.phase.avro.schema"),
- AGG_PHASE_OUTPUT_PATH("aggregation.phase.output.path"),
- AGG_PHASE_THIRDEYE_CONFIG("aggregation.phase.thirdeye.config");
-
- String name;
-
- AggregationPhaseConstants(String name) {
- this.name = name;
- }
-
- public String toString() {
- return name;
- }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseJob.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseJob.java
deleted file mode 100644
index 302d19b..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseJob.java
+++ /dev/null
@@ -1,360 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.aggregation;
-
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Properties;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericData.Record;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.mapred.AvroKey;
-import org.apache.avro.mapreduce.AvroJob;
-import org.apache.avro.mapreduce.AvroKeyInputFormat;
-import org.apache.avro.mapreduce.AvroKeyOutputFormat;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.Counter;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.linkedin.thirdeye.hadoop.ThirdEyeJobProperties;
-import com.linkedin.thirdeye.hadoop.config.DimensionType;
-import com.linkedin.thirdeye.hadoop.config.MetricType;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfig;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfigProperties;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConstants;
-import com.linkedin.thirdeye.hadoop.config.TimeGranularity;
-import com.linkedin.thirdeye.hadoop.config.TimeSpec;
-import com.linkedin.thirdeye.hadoop.util.ThirdeyeAggregateMetricUtils;
-import com.linkedin.thirdeye.hadoop.util.ThirdeyeAvroUtils;
-
-import static com.linkedin.thirdeye.hadoop.aggregation.AggregationPhaseConstants.*;
-
-/**
- * Buckets input avro data according to granularity specified in config and aggregates metrics
- * Mapper:
- * Converts time column into bucket granularity
- * Reducer:
- * Aggregates all records with same dimensions in one time bucket
- */
-public class AggregationPhaseJob extends Configured {
- private static final Logger LOGGER = LoggerFactory.getLogger(AggregationPhaseJob.class);
-
- private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
- private String name;
- private Properties props;
-
- public AggregationPhaseJob(String name, Properties props) {
- super(new Configuration());
- this.name = name;
- this.props = props;
- }
-
- public static class AggregationMapper extends Mapper<AvroKey<GenericRecord>, NullWritable, BytesWritable, BytesWritable> {
-
- private ThirdEyeConfig thirdeyeConfig;
- private AggregationPhaseConfig config;
- private List<String> dimensionNames;
- private List<DimensionType> dimensionTypes;
- private List<String> metricNames;
- List<MetricType> metricTypes;
- private int numMetrics;
- private String timeColumnName;
- private TimeGranularity inputGranularity;
- private TimeGranularity aggregateGranularity;
- private BytesWritable keyWritable;
- private BytesWritable valWritable;
- private int numRecords;
-
- @Override
- public void setup(Context context) throws IOException, InterruptedException {
- LOGGER.info("AggregationPhaseJob.AggregationPhaseMapper.setup()");
- Configuration configuration = context.getConfiguration();
-
- thirdeyeConfig = OBJECT_MAPPER.readValue(configuration.get(AGG_PHASE_THIRDEYE_CONFIG.toString()), ThirdEyeConfig.class);
- config = AggregationPhaseConfig.fromThirdEyeConfig(thirdeyeConfig);
- dimensionNames = config.getDimensionNames();
- dimensionTypes = config.getDimensionTypes();
- metricNames = config.getMetricNames();
- numMetrics = metricNames.size();
- metricTypes = config.getMetricTypes();
- timeColumnName = config.getTime().getColumnName();
- inputGranularity = config.getInputTime().getTimeGranularity();
- aggregateGranularity = config.getTime().getTimeGranularity();
- keyWritable = new BytesWritable();
- valWritable = new BytesWritable();
- numRecords = 0;
- }
-
- @Override
- public void map(AvroKey<GenericRecord> record, NullWritable value, Context context) throws IOException, InterruptedException {
-
- // input record
- GenericRecord inputRecord = record.datum();
-
- // dimensions
- List<Object> dimensions = new ArrayList<>();
- for (String dimension : dimensionNames) {
- Object dimensionValue = ThirdeyeAvroUtils.getDimensionFromRecord(inputRecord, dimension);
- dimensions.add(dimensionValue);
- }
-
- // metrics
- Number[] metrics = new Number[numMetrics];
- for (int i = 0; i < numMetrics; i++) {
- Number metricValue = ThirdeyeAvroUtils.getMetricFromRecord(inputRecord, metricNames.get(i), metricTypes.get(i));
- metrics[i] = metricValue;
- }
-
- // time
- long timeValue = ThirdeyeAvroUtils.getMetricFromRecord(inputRecord, timeColumnName).longValue();
- long inputTimeMillis = inputGranularity.toMillis(timeValue);
- long bucketTime = aggregateGranularity.convertToUnit(inputTimeMillis);
-
- AggregationPhaseMapOutputKey keyWrapper = new AggregationPhaseMapOutputKey(bucketTime, dimensions, dimensionTypes);
- byte[] keyBytes = keyWrapper.toBytes();
- keyWritable.set(keyBytes, 0, keyBytes.length);
-
- AggregationPhaseMapOutputValue valWrapper = new AggregationPhaseMapOutputValue(metrics, metricTypes);
- byte[] valBytes = valWrapper.toBytes();
- valWritable.set(valBytes, 0, valBytes.length);
-
- numRecords ++;
- context.write(keyWritable, valWritable);
- }
-
- @Override
- public void cleanup(Context context) throws IOException, InterruptedException {
- context.getCounter(AggregationCounter.NUMBER_OF_RECORDS).increment(numRecords);
- }
- }
-
- public static class AggregationReducer
- extends Reducer<BytesWritable, BytesWritable, AvroKey<GenericRecord>, NullWritable> {
-
- private Schema avroSchema;
- private ThirdEyeConfig thirdeyeConfig;
- private AggregationPhaseConfig config;
- private List<String> dimensionsNames;
- private List<DimensionType> dimensionTypes;
- private List<String> metricNames;
- List<MetricType> metricTypes;
- private int numMetrics;
- private TimeSpec time;
- private int numRecords;
- private Number[] metricSums;
-
- @Override
- public void setup(Context context) throws IOException, InterruptedException {
- LOGGER.info("AggregationPhaseJob.AggregationPhaseReducer.setup()");
- Configuration configuration = context.getConfiguration();
-
- thirdeyeConfig = OBJECT_MAPPER.readValue(configuration.get(AGG_PHASE_THIRDEYE_CONFIG.toString()), ThirdEyeConfig.class);
- config = AggregationPhaseConfig.fromThirdEyeConfig(thirdeyeConfig);
- dimensionsNames = config.getDimensionNames();
- dimensionTypes = config.getDimensionTypes();
- metricNames = config.getMetricNames();
- numMetrics = metricNames.size();
- metricTypes = config.getMetricTypes();
- time = config.getTime();
- avroSchema = new Schema.Parser().parse(configuration.get(AGG_PHASE_AVRO_SCHEMA.toString()));
- numRecords = 0;
- metricSums = new Number[numMetrics];
- Arrays.fill(metricSums, 0);
- }
-
- @Override
- public void reduce(BytesWritable aggregationKey, Iterable<BytesWritable> values,
- Context context) throws IOException, InterruptedException {
-
- // output record
- GenericRecord outputRecord = new Record(avroSchema);
-
- AggregationPhaseMapOutputKey keyWrapper = AggregationPhaseMapOutputKey.fromBytes(aggregationKey.getBytes(), dimensionTypes);
-
- // time
- long timeValue = keyWrapper.getTime();
- outputRecord.put(time.getColumnName(), timeValue);
-
- // dimensions
- List<Object> dimensionValues = keyWrapper.getDimensionValues();
- for (int i = 0; i < dimensionsNames.size(); i++) {
- String dimensionName = dimensionsNames.get(i);
- Object dimensionValue = dimensionValues.get(i);
- outputRecord.put(dimensionName, dimensionValue);
- }
-
- // aggregate metrics
- Number[] aggMetricValues = new Number[numMetrics];
- Arrays.fill(aggMetricValues, 0);
- for (BytesWritable value : values) {
- Number[] metricValues = AggregationPhaseMapOutputValue.fromBytes(value.getBytes(), metricTypes).getMetricValues();
- ThirdeyeAggregateMetricUtils.aggregate(metricTypes, aggMetricValues, metricValues);
- }
- ThirdeyeAggregateMetricUtils.aggregate(metricTypes, metricSums, aggMetricValues);
-
- // metrics
- for (int i = 0; i < numMetrics; i++) {
- String metricName = metricNames.get(i);
- Number metricValue = aggMetricValues[i];
- outputRecord.put(metricName, metricValue);
- }
-
- numRecords ++;
- AvroKey<GenericRecord> outputKey = new AvroKey<GenericRecord>(outputRecord);
- context.write(outputKey, NullWritable.get());
- }
-
- @Override
- public void cleanup(Context context) throws IOException, InterruptedException {
- context.getCounter(AggregationCounter.NUMBER_OF_RECORDS_FLATTENED).increment(numRecords);
- for (int i = 0; i < numMetrics; i++) {
- context.getCounter(thirdeyeConfig.getCollection(), metricNames.get(i)).increment(metricSums[i].longValue());
- }
- }
- }
-
- public Job run() throws Exception {
- Job job = Job.getInstance(getConf());
- job.setJobName(name);
- job.setJarByClass(AggregationPhaseJob.class);
-
- FileSystem fs = FileSystem.get(getConf());
- Configuration configuration = job.getConfiguration();
-
- // Properties
- LOGGER.info("Properties {}", props);
-
- // Input Path
- String inputPathDir = getAndSetConfiguration(configuration, AGG_PHASE_INPUT_PATH);
- LOGGER.info("Input path dir: " + inputPathDir);
- for (String inputPath : inputPathDir.split(ThirdEyeConstants.FIELD_SEPARATOR)) {
- LOGGER.info("Adding input:" + inputPath);
- Path input = new Path(inputPath);
- FileInputFormat.addInputPath(job, input);
- }
-
- // Output path
- Path outputPath = new Path(getAndSetConfiguration(configuration, AGG_PHASE_OUTPUT_PATH));
- LOGGER.info("Output path dir: " + outputPath.toString());
- if (fs.exists(outputPath)) {
- fs.delete(outputPath, true);
- }
- FileOutputFormat.setOutputPath(job, outputPath);
-
- // Schema
- Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputPathDir);
- LOGGER.info("Schema : {}", avroSchema.toString(true));
- job.getConfiguration().set(AGG_PHASE_AVRO_SCHEMA.toString(), avroSchema.toString());
-
- // ThirdEyeConfig
- String dimensionTypesProperty = ThirdeyeAvroUtils.getDimensionTypesProperty(
- props.getProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString()), avroSchema);
- props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_TYPES.toString(), dimensionTypesProperty);
- String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty(
- props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()),
- props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema);
- props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty);
- ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
- LOGGER.info("Thirdeye Config {}", thirdeyeConfig.encode());
- job.getConfiguration().set(AGG_PHASE_THIRDEYE_CONFIG.toString(), OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));
-
- // Map config
- job.setMapperClass(AggregationMapper.class);
- job.setInputFormatClass(AvroKeyInputFormat.class);
- job.setMapOutputKeyClass(BytesWritable.class);
- job.setMapOutputValueClass(BytesWritable.class);
-
- // Reduce config
- job.setReducerClass(AggregationReducer.class);
- job.setOutputKeyClass(AvroKey.class);
- job.setOutputValueClass(NullWritable.class);
- AvroJob.setOutputKeySchema(job, avroSchema);
- job.setOutputFormatClass(AvroKeyOutputFormat.class);
- String numReducers = props.getProperty(ThirdEyeJobProperties.THIRDEYE_NUM_REDUCERS.getName());
- LOGGER.info("Num Reducers : {}", numReducers);
- if (StringUtils.isNotBlank(numReducers)) {
- job.setNumReduceTasks(Integer.valueOf(numReducers));
- LOGGER.info("Setting num reducers {}", job.getNumReduceTasks());
- }
-
- job.waitForCompletion(true);
-
- Counter counter = job.getCounters().findCounter(AggregationCounter.NUMBER_OF_RECORDS);
- LOGGER.info(counter.getDisplayName() + " : " + counter.getValue());
- if (counter.getValue() == 0) {
- throw new IllegalStateException("No input records in " + inputPathDir);
- }
- counter = job.getCounters().findCounter(AggregationCounter.NUMBER_OF_RECORDS_FLATTENED);
- LOGGER.info(counter.getDisplayName() + " : " + counter.getValue());
-
- for (String metric : thirdeyeConfig.getMetricNames()) {
- counter = job.getCounters().findCounter(thirdeyeConfig.getCollection(), metric);
- LOGGER.info(counter.getDisplayName() + " : " + counter.getValue());
- }
-
- return job;
- }
-
- private String getAndSetConfiguration(Configuration configuration,
- AggregationPhaseConstants constant) {
- String value = getAndCheck(constant.toString());
- configuration.set(constant.toString(), value);
- return value;
- }
-
- private String getAndCheck(String propName) {
- String propValue = props.getProperty(propName);
- if (propValue == null) {
- throw new IllegalArgumentException(propName + " required property");
- }
- return propValue;
- }
-
- public static enum AggregationCounter {
- NUMBER_OF_RECORDS,
- NUMBER_OF_RECORDS_FLATTENED
- }
-
- public static void main(String[] args) throws Exception {
- if (args.length != 1) {
- throw new IllegalArgumentException("usage: config.properties");
- }
-
- Properties props = new Properties();
- props.load(new FileInputStream(args[0]));
-
- AggregationPhaseJob job = new AggregationPhaseJob("aggregate_avro_job", props);
- job.run();
- }
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseMapOutputKey.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseMapOutputKey.java
deleted file mode 100644
index ecc0bdd..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseMapOutputKey.java
+++ /dev/null
@@ -1,111 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.aggregation;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-import com.linkedin.thirdeye.hadoop.config.DimensionType;
-
-/**
- * Wrapper for the key generated by mapper in Aggregation
- */
-public class AggregationPhaseMapOutputKey {
-
- private long time;
- private List<Object> dimensionValues;
- private List<DimensionType> dimensionTypes;
-
- public AggregationPhaseMapOutputKey(long time, List<Object> dimensionValues, List<DimensionType> dimensionTypes) {
- this.time = time;
- this.dimensionValues = dimensionValues;
- this.dimensionTypes = dimensionTypes;
- }
-
- public long getTime() {
- return time;
- }
-
- public List<Object> getDimensionValues() {
- return dimensionValues;
- }
-
- public List<DimensionType> getDimensionTypes() {
- return dimensionTypes;
- }
-
- /**
- * Converts AggregationPhaseMapOutputKey to bytes buffer
- * @return
- * @throws IOException
- */
- public byte[] toBytes() throws IOException {
-
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- DataOutputStream dos = new DataOutputStream(baos);
-
- // time
- dos.writeLong(time);
-
- // dimensions size
- dos.writeInt(dimensionValues.size());
- // dimension values
- for (int i = 0; i < dimensionValues.size(); i++) {
- Object dimensionValue = dimensionValues.get(i);
- DimensionType dimensionType = dimensionTypes.get(i);
- DimensionType.writeDimensionValueToOutputStream(dos, dimensionValue, dimensionType);
- }
-
- baos.close();
- dos.close();
- return baos.toByteArray();
- }
-
- /**
- * Constructs AggregationPhaseMapOutputKey from bytes buffer
- * @param buffer
- * @param dimensionTypes
- * @return
- * @throws IOException
- */
- public static AggregationPhaseMapOutputKey fromBytes(byte[] buffer, List<DimensionType> dimensionTypes) throws IOException {
- DataInputStream dis = new DataInputStream(new ByteArrayInputStream(buffer));
-
- // time
- long time = dis.readLong();
-
- // dimensions size
- int size = dis.readInt();
-
- // dimension value
- List<Object> dimensionValues = new ArrayList<>();
- for (int i = 0; i < size; i++) {
- DimensionType dimensionType = dimensionTypes.get(i);
- Object dimensionValue = DimensionType.readDimensionValueFromDataInputStream(dis, dimensionType);
- dimensionValues.add(dimensionValue);
- }
-
- AggregationPhaseMapOutputKey wrapper;
- wrapper = new AggregationPhaseMapOutputKey(time, dimensionValues, dimensionTypes);
- return wrapper;
- }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseMapOutputValue.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseMapOutputValue.java
deleted file mode 100644
index 2493907..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseMapOutputValue.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.aggregation;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.util.List;
-
-import com.linkedin.thirdeye.hadoop.config.MetricType;
-
-/**
- * Wrapper for value generated by mapper in AggregationPhase
- */
-public class AggregationPhaseMapOutputValue {
-
- private Number[] metricValues;
- private List<MetricType> metricTypes;
-
- public AggregationPhaseMapOutputValue(Number[] metricValues, List<MetricType> metricTypes) {
- this.metricValues = metricValues;
- this.metricTypes = metricTypes;
- }
-
- public Number[] getMetricValues() {
- return metricValues;
- }
-
- /**
- * Converts a AggregationPhaseMapOutputvalue to a bytes buffer
- * @return
- * @throws IOException
- */
- public byte[] toBytes() throws IOException {
-
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- DataOutputStream dos = new DataOutputStream(baos);
-
- // metric values
- dos.writeInt(metricValues.length);
- for (int i = 0; i < metricValues.length; i++) {
- Number number = metricValues[i];
- MetricType metricType = metricTypes.get(i);
- MetricType.writeMetricValueToDataOutputStream(dos, number, metricType);
- }
-
- baos.close();
- dos.close();
- return baos.toByteArray();
- }
-
- /**
- * Constructs an AggregationPhaseMapOutputValue from a bytes buffer
- * @param buffer
- * @param metricTypes
- * @return
- * @throws IOException
- */
- public static AggregationPhaseMapOutputValue fromBytes(byte[] buffer, List<MetricType> metricTypes) throws IOException {
- DataInputStream dis = new DataInputStream(new ByteArrayInputStream(buffer));
-
- // metric values
- int length = dis.readInt();
- Number[] metricValues = new Number[length];
-
- for (int i = 0 ; i < length; i++) {
- MetricType metricType = metricTypes.get(i);
- Number metricValue = MetricType.readMetricValueFromDataInputStream(dis, metricType);
- metricValues[i] = metricValue;
- }
-
- AggregationPhaseMapOutputValue wrapper;
- wrapper = new AggregationPhaseMapOutputValue(metricValues, metricTypes);
- return wrapper;
- }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/backfill/BackfillControllerAPIs.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/backfill/BackfillControllerAPIs.java
deleted file mode 100644
index 469a830..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/backfill/BackfillControllerAPIs.java
+++ /dev/null
@@ -1,251 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.backfill;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.FilenameFilter;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.net.URLEncoder;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.commons.compress.archivers.ArchiveException;
-import org.apache.commons.io.FileUtils;
-import org.apache.commons.io.IOUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.http.HttpHost;
-import org.apache.http.HttpResponse;
-import org.apache.http.client.HttpClient;
-import org.apache.http.client.methods.HttpGet;
-import org.apache.http.impl.client.DefaultHttpClient;
-import org.apache.http.util.EntityUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.google.common.io.Files;
-import com.linkedin.pinot.common.utils.TarGzCompressionUtils;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConstants;
-
-/**
- * Contains APIs which are used for backfilling the pinot segments with star tree index
- */
-public class BackfillControllerAPIs {
-
- private static Logger LOGGER = LoggerFactory.getLogger(BackfillControllerAPIs.class);
- private HttpHost controllerHttpHost;
- private String tableName;
-
- private static String SEGMENTS_ENDPOINT = "segments/";
- private static String TABLES_ENDPOINT = "tables/";
- private static String METADATA_ENDPOINT = "metadata";
- private static String UTF_8 = "UTF-8";
-
- private static String SEGMENT_NAME = "segment.name";
- private static String SEGMENT_TABLE_NAME = "segment.table.name";
- private static String SEGMENT_END_TIME = "segment.end.time";
- private static String SEGMENT_START_TIME = "segment.start.time";
- private static String SEGMENT_TIME_UNIT = "segment.time.unit";
-
- BackfillControllerAPIs(String controllerHost, int controllerPort, String tableName) {
- this.tableName = tableName;
- LOGGER.info("Connecting to {} {} table {}", controllerHost, controllerPort, tableName);
- controllerHttpHost = new HttpHost(controllerHost, controllerPort);
- }
-
- /**
- * Downloads a segment from the controller, given the table name and segment name
- * @param segmentName
- * @param hdfsSegmentPath
- * @throws IOException
- * @throws ArchiveException
- */
- public void downloadSegment(String segmentName, Path hdfsSegmentPath)
- throws IOException, ArchiveException {
-
- FileSystem fs = FileSystem.get(new Configuration());
- HttpClient controllerClient = new DefaultHttpClient();
- HttpGet req = new HttpGet(SEGMENTS_ENDPOINT + URLEncoder.encode(tableName, UTF_8)
- + "/" + URLEncoder.encode(segmentName, UTF_8));
- HttpResponse res = controllerClient.execute(controllerHttpHost, req);
- try {
- if (res.getStatusLine().getStatusCode() != 200) {
- throw new IllegalStateException(res.getStatusLine().toString());
- }
- LOGGER.info("Fetching segment {}", segmentName);
- InputStream content = res.getEntity().getContent();
-
- File tempDir = new File(Files.createTempDir(), "thirdeye_temp");
- tempDir.mkdir();
- LOGGER.info("Creating temporary dir for staging segments {}", tempDir);
- File tempSegmentDir = new File(tempDir, segmentName);
- File tempSegmentTar = new File(tempDir, segmentName + ThirdEyeConstants.TAR_SUFFIX);
-
- LOGGER.info("Downloading {} to {}", segmentName, tempSegmentTar);
- OutputStream out = new FileOutputStream(tempSegmentTar);
- IOUtils.copy(content, out);
- if (!tempSegmentTar.exists()) {
- throw new IllegalStateException("Download of " + segmentName + " unsuccessful");
- }
-
- LOGGER.info("Extracting segment {} to {}", tempSegmentTar, tempDir);
- TarGzCompressionUtils.unTar(tempSegmentTar, tempDir);
- File[] files = tempDir.listFiles(new FilenameFilter() {
-
- @Override
- public boolean accept(File dir, String name) {
- return !name.endsWith(ThirdEyeConstants.TAR_SUFFIX) && new File(dir, name).isDirectory();
- }
- });
- if (files.length == 0) {
- throw new IllegalStateException("Failed to extract " + tempSegmentTar + " to " + tempDir);
- } else if (!files[0].getName().equals(tempSegmentDir.getName())){
- LOGGER.info("Moving extracted segment to the segment dir {}", tempSegmentDir);
- FileUtils.moveDirectory(files[0], tempSegmentDir);
- }
- if (!tempSegmentDir.exists()) {
- throw new IllegalStateException("Failed to move " + files[0] + " to " + tempSegmentDir);
- }
-
- LOGGER.info("Copying segment from {} to hdfs {}", tempSegmentDir, hdfsSegmentPath);
- fs.copyFromLocalFile(new Path(tempSegmentDir.toString()), hdfsSegmentPath);
- Path hdfsSegmentDir = new Path(hdfsSegmentPath, segmentName);
- if (!fs.exists(hdfsSegmentDir)) {
- throw new IllegalStateException("Failed to copy segment " + segmentName + " from local path " + tempSegmentDir
- + " to hdfs path " + hdfsSegmentPath);
- }
- } finally {
- if (res.getEntity() != null) {
- EntityUtils.consume(res.getEntity());
- }
- }
- LOGGER.info("Successfully downloaded segment {} to {}", segmentName, hdfsSegmentPath);
- }
-
- /**
- * Given a time range and list of all segments for a table, returns all segments which are in the time range
- * @param tableName
- * @param allSegments
- * @param startTime
- * @param endTime
- * @return
- * @throws Exception
- */
- public List<String> findSegmentsInRange(String tableName, List<String> allSegments, long startTime, long endTime)
- throws Exception {
- List<String> segmentsInRange = new ArrayList<>();
- for (String segmentName : allSegments) {
- Map<String, String> metadata = getSegmentMetadata(tableName, segmentName);
- long segmentStartTime = Long.valueOf(metadata.get(SEGMENT_START_TIME));
- long segmentEndTime = Long.valueOf(metadata.get(SEGMENT_END_TIME));
- String segmentTableName = metadata.get(SEGMENT_TABLE_NAME);
-
- // TODO:
- // Using time value directly for now, as we only have time unit and not time size in metadata
- // Once we have time size in metadata, we can accept the time in millis and then convert time from metadata accordingly
- if (segmentTableName.equals(tableName) && ((segmentStartTime >= startTime && segmentStartTime <= endTime)
- || (segmentEndTime >= startTime && segmentEndTime <= endTime))) {
- LOGGER.info("Segment name : {}, Segment start : {}, Segment end : {}, Segment table : {}",
- segmentName, segmentStartTime, segmentEndTime, segmentTableName);
- segmentsInRange.add(segmentName);
- }
- }
- return segmentsInRange;
- }
-
- /**
- * Fetches the list of all segment names for a table
- * @param tableName
- * @return
- * @throws IOException
- */
- public List<String> getAllSegments(String tableName) throws IOException {
- List<String> allSegments = new ArrayList<>();
-
- HttpClient controllerClient = new DefaultHttpClient();
- HttpGet req = new HttpGet(SEGMENTS_ENDPOINT + URLEncoder.encode(tableName, UTF_8));
- HttpResponse res = controllerClient.execute(controllerHttpHost, req);
- try {
- if (res.getStatusLine().getStatusCode() != 200) {
- throw new IllegalStateException(res.getStatusLine().toString());
- }
- InputStream content = res.getEntity().getContent();
- String response = IOUtils.toString(content);
- List<String> allSegmentsPaths = getSegmentsFromResponse(response);
- for (String segment : allSegmentsPaths) {
- allSegments.add(segment.substring(segment.lastIndexOf("/") + 1));
- }
- LOGGER.info("All segments : {}", allSegments);
- } finally {
- if (res.getEntity() != null) {
- EntityUtils.consume(res.getEntity());
- }
- }
- return allSegments;
- }
-
- /**
- * Returns the metadata of a segment, given the segment name and table name
- * @param tableName - table where segment resides
- * @param segmentName - name of the segment
- * @return
- * @throws IOException
- */
- public Map<String, String> getSegmentMetadata(String tableName, String segmentName) throws IOException {
- Map<String, String> metadata = null;
- HttpClient controllerClient = new DefaultHttpClient();
- HttpGet req = new HttpGet(TABLES_ENDPOINT + URLEncoder.encode(tableName, UTF_8)
- + "/" + SEGMENTS_ENDPOINT + URLEncoder.encode(segmentName, UTF_8) + "/" + METADATA_ENDPOINT);
- HttpResponse res = controllerClient.execute(controllerHttpHost, req);
- try {
- if (res.getStatusLine().getStatusCode() != 200) {
- throw new IllegalStateException(res.getStatusLine().toString());
- }
- InputStream content = res.getEntity().getContent();
- String metadataResponse = IOUtils.toString(content);
- metadata = getMetadataFromResponse(metadataResponse);
- } finally {
- if (res.getEntity() != null) {
- EntityUtils.consume(res.getEntity());
- }
- }
- return metadata;
- }
-
- private List<String> getSegmentsFromResponse(String response) {
- String[] allSegments = response.replaceAll("\\[|\\]|\"", "").split(",");
- return Arrays.asList(allSegments);
- }
-
- private Map<String, String> getMetadataFromResponse(String response) {
- Map<String, String> metadata = new HashMap<>();
- String cleanUpResponse = response.replaceAll("\\[|\\]|\"|\\{|\\}|\\\\", "");
- String[] allProperties = cleanUpResponse.replace("state:", "").split(",");
- for (String property : allProperties) {
- String[] tokens = property.split(":", 2);
- metadata.put(tokens[0], tokens[1]);
- }
- return metadata;
- }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/backfill/BackfillPhaseConstants.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/backfill/BackfillPhaseConstants.java
deleted file mode 100644
index d9c4609..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/backfill/BackfillPhaseConstants.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.backfill;
-
-/**
- * This class contains the properties to be set for backfill phase
- */
-public enum BackfillPhaseConstants {
-
- BACKFILL_PHASE_CONTROLLER_HOST("backfill.phase.controller.host"),
- BACKFILL_PHASE_CONTROLLER_PORT("backfill.phase.controller.port"),
- BACKFILL_PHASE_START_TIME("backfill.phase.start.time"),
- BACKFILL_PHASE_END_TIME("backfill.phase.end.time"),
- BACKFILL_PHASE_TABLE_NAME("backfill.phase.table.name"),
- BACKFILL_PHASE_OUTPUT_PATH("backfill.phase.output.path");
-
- String name;
-
- BackfillPhaseConstants(String name) {
- this.name = name;
- }
-
- public String toString() {
- return name;
- }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/backfill/BackfillPhaseJob.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/backfill/BackfillPhaseJob.java
deleted file mode 100644
index b3a762e..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/backfill/BackfillPhaseJob.java
+++ /dev/null
@@ -1,203 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.backfill;
-
-import static com.linkedin.thirdeye.hadoop.backfill.BackfillPhaseConstants.*;
-
-import java.io.FileInputStream;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Properties;
-
-import com.google.common.collect.Lists;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * This phase is for backfilling segments which are already present on pinot, with star tree and topk information
- * The pinot segments are downloaded from the table, and converted to avro files
- * These avro files are then passed on to the rest of the thirdeye-hadoop segment generation pipeline
- */
-public class BackfillPhaseJob extends Configured {
- private static final Logger LOGGER = LoggerFactory.getLogger(BackfillPhaseJob.class);
- private static final String DOWNLOAD = "download";
- private static final String INPUT = "input";
- private static final String OUTPUT = "output";
-
- private String name;
- private Properties props;
-
- /**
- * @param name
- * @param props
- */
- public BackfillPhaseJob(String name, Properties props) {
- super(new Configuration());
- getConf().set("mapreduce.job.user.classpath.first", "true");
- this.name = name;
- this.props = props;
- }
-
- public Job run() throws Exception {
-
- Job job = Job.getInstance(getConf());
- job.setJarByClass(BackfillPhaseJob.class);
- job.setJobName(name);
-
- FileSystem fs = FileSystem.get(getConf());
- Configuration configuration = job.getConfiguration();
-
- LOGGER.info("*******************************************************************************");
- String controllerHost = getAndSetConfiguration(configuration, BACKFILL_PHASE_CONTROLLER_HOST);
- String controllerPort = getAndSetConfiguration(configuration, BACKFILL_PHASE_CONTROLLER_PORT);
- LOGGER.info("Controller Host : {} Controller Port : {}", controllerHost, controllerPort);
- String segmentStartTime = getAndSetConfiguration(configuration, BACKFILL_PHASE_START_TIME);
- String segmentEndTime = getAndSetConfiguration(configuration, BACKFILL_PHASE_END_TIME);
- long startTime = Long.valueOf(segmentStartTime);
- long endTime = Long.valueOf(segmentEndTime);
- if (Long.valueOf(segmentStartTime) > Long.valueOf(segmentEndTime)) {
- throw new IllegalStateException("Start time cannot be greater than end time");
- }
- String tableName = getAndSetConfiguration(configuration, BACKFILL_PHASE_TABLE_NAME);
- LOGGER.info("Start time : {} End time : {} Table name : {}", segmentStartTime, segmentEndTime, tableName);
-
- String outputPath = getAndSetConfiguration(configuration, BACKFILL_PHASE_OUTPUT_PATH);
- LOGGER.info("Output path : {}", outputPath);
- Path backfillDir = new Path(outputPath);
- if (fs.exists(backfillDir)) {
- LOGGER.warn("Found the output folder deleting it");
- fs.delete(backfillDir, true);
- }
- Path downloadDir = new Path(backfillDir, DOWNLOAD);
- LOGGER.info("Creating download dir : {}", downloadDir);
- fs.mkdirs(downloadDir);
- Path inputDir = new Path(backfillDir, INPUT);
- LOGGER.info("Creating input dir : {}", inputDir);
- fs.mkdirs(inputDir);
- Path outputDir = new Path(backfillDir, OUTPUT);
- LOGGER.info("Creating output dir : {}", outputDir);
-
- BackfillControllerAPIs backfillControllerAPIs = new BackfillControllerAPIs(controllerHost,
- Integer.valueOf(controllerPort), tableName);
-
- LOGGER.info("Downloading segments in range {} to {}", startTime, endTime);
- List<String> allSegments = backfillControllerAPIs.getAllSegments(tableName);
- List<String> segmentsToDownload = backfillControllerAPIs.findSegmentsInRange(tableName, allSegments, startTime, endTime);
- for (String segmentName : segmentsToDownload) {
- backfillControllerAPIs.downloadSegment(segmentName, downloadDir);
- }
-
- LOGGER.info("Reading downloaded segment input files");
- List<FileStatus> inputDataFiles = new ArrayList<>();
- inputDataFiles.addAll(Lists.newArrayList(fs.listStatus(downloadDir)));
- LOGGER.info("size {}", inputDataFiles.size());
-
- try {
- LOGGER.info("Creating input files at {} for segment input files", inputDir);
- for (int seqId = 0; seqId < inputDataFiles.size(); ++seqId) {
- FileStatus file = inputDataFiles.get(seqId);
- String completeFilePath = " " + file.getPath().toString() + " " + seqId;
- Path newOutPutFile = new Path((inputDir + "/" + file.getPath().toString().replace('.', '_').replace('/', '_').replace(':', '_') + ".txt"));
- FSDataOutputStream stream = fs.create(newOutPutFile);
- LOGGER.info("wrote {}", completeFilePath);
- stream.writeUTF(completeFilePath);
- stream.flush();
- stream.close();
- }
- } catch (Exception e) {
- LOGGER.error("Exception while reading input files ", e);
- }
-
- job.setMapperClass(BackfillPhaseMapJob.BackfillMapper.class);
-
- if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
- job.getConfiguration().set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
- }
-
- job.setInputFormatClass(TextInputFormat.class);
- job.setOutputFormatClass(TextOutputFormat.class);
-
- job.setMapOutputKeyClass(LongWritable.class);
- job.setMapOutputValueClass(Text.class);
-
- FileInputFormat.addInputPath(job, inputDir);
- FileOutputFormat.setOutputPath(job, outputDir);
-
- job.getConfiguration().setInt(JobContext.NUM_MAPS, inputDataFiles.size());
- job.setMaxReduceAttempts(1);
- job.setMaxMapAttempts(0);
- job.setNumReduceTasks(0);
-
- for (Object key : props.keySet()) {
- job.getConfiguration().set(key.toString(), props.getProperty(key.toString()));
- }
-
- job.waitForCompletion(true);
- if (!job.isSuccessful()) {
- throw new RuntimeException("Job failed : " + job);
- }
-
- LOGGER.info("Cleanup the working directory");
- LOGGER.info("Deleting the dir: {}", downloadDir);
- fs.delete(downloadDir, true);
- LOGGER.info("Deleting the dir: {}", inputDir);
- fs.delete(inputDir, true);
- LOGGER.info("Deleting the dir: {}", outputDir);
- fs.delete(outputDir, true);
-
- return job;
- }
-
- private String getAndCheck(String propName) {
- String propValue = props.getProperty(propName);
- if (propValue == null) {
- throw new IllegalArgumentException(propName + " required property");
- }
- return propValue;
- }
-
- private String getAndSetConfiguration(Configuration configuration, BackfillPhaseConstants constant) {
- String value = getAndCheck(constant.toString());
- configuration.set(constant.toString(), value);
- return value;
- }
-
- public static void main(String[] args) throws Exception {
- if (args.length != 1) {
- throw new IllegalArgumentException("usage: config.properties");
- }
- Properties props = new Properties();
- props.load(new FileInputStream(args[0]));
- BackfillPhaseJob job = new BackfillPhaseJob("backfill_job", props);
- job.run();
- }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/backfill/BackfillPhaseMapJob.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/backfill/BackfillPhaseMapJob.java
deleted file mode 100644
index 878a727..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/backfill/BackfillPhaseMapJob.java
+++ /dev/null
@@ -1,164 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.backfill;
-
-import com.linkedin.pinot.core.data.GenericRow;
-import com.linkedin.pinot.core.data.readers.PinotSegmentRecordReader;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConstants;
-import com.linkedin.thirdeye.hadoop.util.ThirdeyeAvroUtils;
-import java.io.File;
-import java.io.IOException;
-import org.apache.avro.Schema;
-import org.apache.avro.file.DataFileWriter;
-import org.apache.avro.generic.GenericData.Record;
-import org.apache.avro.generic.GenericDatumWriter;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import static com.linkedin.thirdeye.hadoop.backfill.BackfillPhaseConstants.*;
-
-/**
- * Mapper class for Backfill job, which converts a pinot segment to avro files
- */
-public class BackfillPhaseMapJob {
-
- public static class BackfillMapper extends Mapper<LongWritable, Text, LongWritable, Text> {
- private static Logger LOGGER = LoggerFactory.getLogger(BackfillPhaseMapJob.class);
-
- private Configuration properties;
-
- private String inputPath;
- private String outputPath;
- private String currentDiskWorkDir;
- private FileSystem fs;
-
-
- @Override
- public void setup(Context context) throws IOException, InterruptedException {
-
- currentDiskWorkDir = "pinot_hadoop_tmp";
- new File(currentDiskWorkDir).mkdir();
-
- LOGGER.info("*********************************************************************");
- LOGGER.info("Configurations : {}", context.getConfiguration().toString());
- LOGGER.info("Current DISK working dir : {}", new File(currentDiskWorkDir).getAbsolutePath());
- LOGGER.info("*********************************************************************");
-
- properties = context.getConfiguration();
- fs = FileSystem.get(new Configuration());
-
- outputPath = properties.get(BACKFILL_PHASE_OUTPUT_PATH.toString());
- }
-
- @Override
- protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
-
- String line = value.toString();
- String[] lineSplits = line.split(" ");
-
- LOGGER.info("*********************************************************************");
- LOGGER.info("mapper input : {}", value);
- LOGGER.info("Path to output : {}", outputPath);
- LOGGER.info("num lines : {}", lineSplits.length);
-
- for (String split : lineSplits) {
- LOGGER.info("Command line : {}", split);
- }
- if (lineSplits.length != 3) {
- throw new RuntimeException("Input to the mapper is malformed");
- }
- inputPath = lineSplits[1].trim();
-
- LOGGER.info("input data file path : {}", inputPath);
- LOGGER.info("*********************************************************************");
-
- try {
- createAvro(inputPath);
- LOGGER.info("Finished avro creation job successfully");
- } catch (Exception e) {
- LOGGER.error("Got exceptions during creating avro!", e);
- }
- LOGGER.info("Finished the job successfully!");
- }
-
- private void createAvro(String dataFilePath) throws Exception {
-
- Path hdfsDataPath = new Path(dataFilePath);
- File dataPath = new File(currentDiskWorkDir, "data");
- if (dataPath.exists()) {
- dataPath.delete();
- }
- dataPath.mkdir();
- LOGGER.info("Creating temporary data dir {}", dataPath);
-
- final File avroPath = new File(currentDiskWorkDir, "avro");
- if (avroPath.exists()) {
- avroPath.delete();
- }
- avroPath.mkdir();
- LOGGER.info("Creating temporary avro dir {}", avroPath);
-
- String segmentName = hdfsDataPath.getName();
- final Path localFilePath = new Path(dataPath + "/" + segmentName);
- fs.copyToLocalFile(hdfsDataPath, localFilePath);
- LOGGER.info("Copying segment {} from {} to local {}", segmentName, hdfsDataPath, localFilePath);
- File segmentIndexDir = new File(localFilePath.toString());
- if (!segmentIndexDir.exists()) {
- throw new IllegalStateException("Failed to copy " + hdfsDataPath + " to " + localFilePath);
- }
-
- LOGGER.info("Initializing PinotSegmentRecordReader with segment index dir {}", segmentIndexDir);
- PinotSegmentRecordReader pinotSegmentRecordReader = new PinotSegmentRecordReader(segmentIndexDir);
- LOGGER.info("Schema {}", pinotSegmentRecordReader.getSchema());
-
- Schema avroSchema = ThirdeyeAvroUtils.constructAvroSchemaFromPinotSchema(pinotSegmentRecordReader.getSchema());
- GenericDatumWriter<GenericRecord> datum = new GenericDatumWriter<GenericRecord>(avroSchema);
- DataFileWriter<GenericRecord> recordWriter = new DataFileWriter<GenericRecord>(datum);
- File localAvroFile = new File(avroPath, segmentName + ThirdEyeConstants.AVRO_SUFFIX);
- recordWriter.create(avroSchema, localAvroFile);
-
- LOGGER.info("Converting pinot segment to avro at {}", localAvroFile);
- while (pinotSegmentRecordReader.hasNext()) {
- GenericRecord outputRecord = new Record(avroSchema);
- GenericRow row = pinotSegmentRecordReader.next();
- for (String fieldName : row.getFieldNames()) {
- outputRecord.put(fieldName, row.getValue(fieldName));
- }
- recordWriter.append(outputRecord);
- }
- LOGGER.info("Writing to avro file at {}", localAvroFile);
- recordWriter.close();
- if (!localAvroFile.exists()) {
- LOGGER.info("Failed to write avro file to {}", localAvroFile);
- }
- pinotSegmentRecordReader.close();
-
- LOGGER.info("Coping avro file from {} to hdfs at {}", localAvroFile, outputPath);
- fs.copyFromLocalFile(true, true, new Path(localAvroFile.toString()), new Path(outputPath));
- if (!fs.exists(new Path(outputPath))) {
- throw new IllegalStateException("Failed to copy avro file to hdfs at " + outputPath );
- }
- LOGGER.info("Successfully copied {} to {}", localAvroFile, outputPath);
- }
- }
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/DimensionSpec.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/DimensionSpec.java
deleted file mode 100644
index 5834765..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/DimensionSpec.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.config;
-
-import java.util.Objects;
-
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-/**
- * Class for representing dimension specs such as name
- * @param name - dimension name
- */
-public class DimensionSpec {
- private String name;
- private DimensionType dimensionType;
-
- public DimensionSpec() {
- }
-
-
- public DimensionSpec(String name, DimensionType dimensionType) {
- this.name = name;
- this.dimensionType = dimensionType;
- }
-
- @JsonProperty
- public String getName() {
- return name;
- }
-
- @JsonProperty
- public DimensionType getDimensionType() {
- return dimensionType;
- }
-
- @Override
- public boolean equals(Object o) {
- if (!(o instanceof DimensionSpec)) {
- return false;
- }
- DimensionSpec d = (DimensionSpec) o;
-
- return Objects.equals(d.getName(), name) && Objects.equals(d.getDimensionType(), dimensionType);
- }
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/DimensionType.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/DimensionType.java
deleted file mode 100644
index 0efaa0d..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/DimensionType.java
+++ /dev/null
@@ -1,205 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.config;
-
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-
-/**
- * Represents the various data types supported for a dimension<br/>
- * Currently we support INT, SHORT, LONG, FLOAT, DOUBLE, STRING, BOOLEAN
- */
-public enum DimensionType {
- INT {
- @Override
- public Object getValueFromString(String strVal) {
- return Integer.valueOf(strVal);
- }
-
- @Override
- public Object getDefaultNullvalue() {
- return ThirdEyeConstants.EMPTY_INT;
- }
-
- @Override
- public Object getDefaultOtherValue() {
- return ThirdEyeConstants.EMPTY_INT;
- }
- },
- SHORT {
- @Override
- public Object getValueFromString(String strVal) {
- return Short.valueOf(strVal);
- }
-
- @Override
- public Object getDefaultNullvalue() {
- return ThirdEyeConstants.EMPTY_SHORT;
- }
-
- @Override
- public Object getDefaultOtherValue() {
- return ThirdEyeConstants.EMPTY_SHORT;
- }
- },
- LONG {
- @Override
- public Object getValueFromString(String strVal) {
- return Long.valueOf(strVal);
- }
-
- @Override
- public Object getDefaultNullvalue() {
- return ThirdEyeConstants.EMPTY_LONG;
- }
-
- @Override
- public Object getDefaultOtherValue() {
- return ThirdEyeConstants.EMPTY_LONG;
- }
- },
- FLOAT {
- @Override
- public Object getValueFromString(String strVal) {
- return Float.valueOf(strVal);
- }
-
- @Override
- public Object getDefaultNullvalue() {
- return ThirdEyeConstants.EMPTY_FLOAT;
- }
-
- @Override
- public Object getDefaultOtherValue() {
- return ThirdEyeConstants.EMPTY_FLOAT;
- }
- },
- DOUBLE {
- @Override
- public Object getValueFromString(String strVal) {
- return Double.valueOf(strVal);
- }
-
- @Override
- public Object getDefaultNullvalue() {
- return ThirdEyeConstants.EMPTY_DOUBLE;
- }
-
- @Override
- public Object getDefaultOtherValue() {
- return ThirdEyeConstants.EMPTY_DOUBLE;
- }
- },
- STRING {
- @Override
- public Object getValueFromString(String strVal) {
- return strVal;
- }
-
- @Override
- public Object getDefaultNullvalue() {
- return ThirdEyeConstants.EMPTY_STRING;
- }
-
- @Override
- public Object getDefaultOtherValue() {
- return ThirdEyeConstants.OTHER;
- }
- };
-
-
- public abstract Object getValueFromString(String strVal);
-
- public abstract Object getDefaultNullvalue();
-
- public abstract Object getDefaultOtherValue();
-
-
- /**
- * Writes the dimension value to a data outputstream
- * @param dos DataOutputStream
- * @param dimensionValue
- * @param dimensionType
- * @throws IOException
- */
- public static void writeDimensionValueToOutputStream(DataOutputStream dos, Object dimensionValue,
- DimensionType dimensionType) throws IOException {
- switch (dimensionType) {
- case DOUBLE:
- dos.writeDouble((double) dimensionValue);
- break;
- case FLOAT:
- dos.writeFloat((float) dimensionValue);
- break;
- case INT:
- dos.writeInt((int) dimensionValue);
- break;
- case LONG:
- dos.writeLong((long) dimensionValue);
- break;
- case SHORT:
- dos.writeShort((short) dimensionValue);
- break;
- case STRING:
- String stringVal = (String) dimensionValue;
- byte[] bytes = stringVal.getBytes();
- dos.writeInt(bytes.length);
- dos.write(bytes);
- break;
- default:
- throw new IllegalArgumentException("Unsupported dimensionType " + dimensionType);
- }
- }
-
- /**
- * Reads the dimension value from a given data input stream
- * @param dis DataInputStream
- * @param dimensionType
- * @return
- * @throws IOException
- */
- public static Object readDimensionValueFromDataInputStream(DataInputStream dis, DimensionType dimensionType) throws IOException {
- Object dimensionValue = null;
- switch (dimensionType) {
- case DOUBLE:
- dimensionValue = dis.readDouble();
- break;
- case FLOAT:
- dimensionValue = dis.readFloat();
- break;
- case INT:
- dimensionValue = dis.readInt();
- break;
- case SHORT:
- dimensionValue = dis.readShort();
- break;
- case LONG:
- dimensionValue = dis.readLong();
- break;
- case STRING:
- int length = dis.readInt();
- byte[] bytes = new byte[length];
- dis.read(bytes);
- dimensionValue = new String(bytes);
- break;
- default:
- throw new IllegalArgumentException("Unsupported dimensionType " + dimensionType);
- }
- return dimensionValue;
- }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/MetricSpec.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/MetricSpec.java
deleted file mode 100644
index 340048b..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/MetricSpec.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.config;
-
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-/**
- * Class for representing metric specs
- * @param name - metric name
- * @param type - metric type
- */
-public class MetricSpec {
- private String name;
- private MetricType type;
-
- public MetricSpec() {
- }
-
- public MetricSpec(String name, MetricType type) {
- this.name = name;
- this.type = type;
- }
-
- @JsonProperty
- public String getName() {
- return name;
- }
-
- @JsonProperty
- public MetricType getType() {
- return type;
- }
-
- @Override
- public boolean equals(Object o) {
- if (!(o instanceof MetricSpec)) {
- return false;
- }
-
- MetricSpec m = (MetricSpec) o;
-
- return name.equals(m.getName()) && type.equals(m.getType());
- }
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/MetricType.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/MetricType.java
deleted file mode 100644
index bb4c0ff..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/MetricType.java
+++ /dev/null
@@ -1,175 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.config;
-
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-
-/**
- * Represents the various data types supported for a metric<br/>
- * Currently we support INT, SHORT, LONG, FLOAT, DOUBLE
- */
-public enum MetricType {
-
- INT {
- public Number toNumber(String s) {
- return Integer.parseInt(s);
- }
-
- public int byteSize() {
- return 4;
- }
-
- @Override
- public Number getDefaultNullValue() {
- return ThirdEyeConstants.EMPTY_INT;
- }
-
- },
- SHORT {
- public Number toNumber(String s) {
- return Short.parseShort(s);
- }
-
- public int byteSize() {
- return 2;
-
- }
-
- @Override
- public Number getDefaultNullValue() {
- return ThirdEyeConstants.EMPTY_SHORT;
- }
-
- },
- LONG {
- public Number toNumber(String s) {
- return Long.parseLong(s);
- }
-
- public int byteSize() {
- return 8;
-
- }
-
- @Override
- public Number getDefaultNullValue() {
- return ThirdEyeConstants.EMPTY_LONG;
- }
-
- },
- FLOAT {
- public Number toNumber(String s) {
- return Float.parseFloat(s);
- }
-
- public int byteSize() {
- return 4;
-
- }
-
- @Override
- public Number getDefaultNullValue() {
- return ThirdEyeConstants.EMPTY_FLOAT;
- }
-
- },
- DOUBLE {
- public Number toNumber(String s) {
- return Double.parseDouble(s);
- }
-
- public int byteSize() {
- return 8;
- }
-
- @Override
- public Number getDefaultNullValue() {
- return ThirdEyeConstants.EMPTY_DOUBLE;
- }
- };
-
- public Number toNumber(String s) {
- throw new AbstractMethodError();
- }
-
- public int byteSize() {
- throw new AbstractMethodError();
- }
-
- public abstract Number getDefaultNullValue();
-
- /**
- * Writes a metric value to a data output stream
- * @param dos
- * @param number
- * @param metricType
- * @throws IOException
- */
- public static void writeMetricValueToDataOutputStream(DataOutputStream dos, Number number, MetricType metricType) throws IOException {
- switch (metricType) {
- case SHORT:
- dos.writeShort(number.intValue());
- break;
- case LONG:
- dos.writeLong(number.longValue());
- break;
- case INT:
- dos.writeInt(number.intValue());
- break;
- case FLOAT:
- dos.writeFloat(number.floatValue());
- break;
- case DOUBLE:
- dos.writeDouble(number.doubleValue());
- break;
- default:
- throw new IllegalArgumentException("Unsupported metricType " + metricType);
- }
- }
-
- /**
- * Reads a metric value from a data input stream
- * @param dis
- * @param metricType
- * @return
- * @throws IOException
- */
- public static Number readMetricValueFromDataInputStream(DataInputStream dis, MetricType metricType) throws IOException {
- Number metricValue = null;
- switch (metricType) {
- case SHORT:
- metricValue = dis.readShort();
- break;
- case LONG:
- metricValue = dis.readLong();
- break;
- case INT:
- metricValue = dis.readInt();
- break;
- case FLOAT:
- metricValue = dis.readFloat();
- break;
- case DOUBLE:
- metricValue = dis.readDouble();
- break;
- default:
- throw new IllegalArgumentException("Unsupported metricType " + metricType);
- }
- return metricValue;
- }
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/SplitSpec.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/SplitSpec.java
deleted file mode 100644
index a2c353e..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/SplitSpec.java
+++ /dev/null
@@ -1,48 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.config;
-
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import java.util.List;
-
-/**
- * Class for representing split spec
- * @param threshold - threshold after which to stop splitting on a node in star tree
- * @param order - order in which dimensions should be chosen to split in star tree creation
- */
-public class SplitSpec {
- private int threshold = 1000;
- private List<String> order;
-
- public SplitSpec() {
- }
-
- public SplitSpec(int threshold, List<String> order) {
- this.threshold = threshold;
- this.order = order;
- }
-
- @JsonProperty
- public int getThreshold() {
- return threshold;
- }
-
- @JsonProperty
- public List<String> getOrder() {
- return order;
- }
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/ThirdEyeConfig.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/ThirdEyeConfig.java
deleted file mode 100644
index 7f5f383..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/ThirdEyeConfig.java
+++ /dev/null
@@ -1,479 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.config;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.Set;
-import java.util.concurrent.TimeUnit;
-
-import org.apache.commons.lang.StringUtils;
-
-import com.fasterxml.jackson.annotation.JsonIgnore;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
-import com.google.common.collect.Lists;
-import com.linkedin.pinot.common.data.TimeGranularitySpec.TimeFormat;
-import com.linkedin.thirdeye.hadoop.config.DimensionSpec;
-import com.linkedin.thirdeye.hadoop.config.MetricSpec;
-import com.linkedin.thirdeye.hadoop.config.MetricType;
-import com.linkedin.thirdeye.hadoop.config.SplitSpec;
-import com.linkedin.thirdeye.hadoop.config.TimeGranularity;
-import com.linkedin.thirdeye.hadoop.config.TimeSpec;
-import com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec;
-import com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec;
-
-/**
- * This class represents the configs required by the thirdeye-hadoop jobs
- * @param collection - name of the pinot table
- * @param dimensions - list of dimensionSpecs for dimensions
- * @param metrics - list of metricSpecs for metrics
- * @param time - time spec
- * @topKWhitelist - metric threshold, topk and whitelist spec
- * @split - split spec
- */
-public final class ThirdEyeConfig {
- private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(new YAMLFactory());
- private static final String FIELD_SEPARATOR = ",";
- private static final String CONFIG_JOINER = ".";
- private static final String DEFAULT_TIME_TYPE = "HOURS";
- private static final String DEFAULT_TIME_SIZE = "1";
- private static final String DEFAULT_TIME_FORMAT = TimeFormat.EPOCH.toString();
-
- private String collection;
- private List<DimensionSpec> dimensions;
- private List<MetricSpec> metrics;
- private TimeSpec inputTime = new TimeSpec();
- private TimeSpec time = new TimeSpec();
- private TopkWhitelistSpec topKWhitelist = new TopkWhitelistSpec();
- private SplitSpec split = new SplitSpec();
-
- public ThirdEyeConfig() {
- }
-
- public ThirdEyeConfig(String collection, List<DimensionSpec> dimensions,
- List<MetricSpec> metrics, TimeSpec inputTime, TimeSpec time, TopkWhitelistSpec topKWhitelist, SplitSpec split) {
- this.collection = collection;
- this.dimensions = dimensions;
- this.metrics = metrics;
- this.inputTime = inputTime;
- this.time = time;
- this.topKWhitelist = topKWhitelist;
- this.split = split;
- }
-
- public String getCollection() {
- return collection;
- }
-
- public List<DimensionSpec> getDimensions() {
- return dimensions;
- }
-
- @JsonIgnore
- public List<String> getDimensionNames() {
- List<String> results = new ArrayList<>(dimensions.size());
- for (DimensionSpec dimensionSpec : dimensions) {
- results.add(dimensionSpec.getName());
- }
- return results;
- }
-
- public List<MetricSpec> getMetrics() {
- return metrics;
- }
-
- @JsonIgnore
- public List<String> getMetricNames() {
- List<String> results = new ArrayList<>(metrics.size());
- for (MetricSpec metricSpec : metrics) {
- results.add(metricSpec.getName());
- }
- return results;
- }
-
- public TimeSpec getInputTime() {
- return inputTime;
- }
-
- public TimeSpec getTime() {
- return time;
- }
-
- public TopkWhitelistSpec getTopKWhitelist() {
- return topKWhitelist;
- }
-
- /**
- * Returns a set of all dimensions which have either topk or whitelist config
- * @return
- */
- @JsonIgnore
- public Set<String> getTransformDimensions() {
- Set<String> transformDimensions = new HashSet<>();
-
- if (topKWhitelist != null) {
- List<TopKDimensionToMetricsSpec> topk = topKWhitelist.getTopKDimensionToMetricsSpec();
- if (topk != null) {
- for (TopKDimensionToMetricsSpec spec : topk) {
- transformDimensions.add(spec.getDimensionName());
- }
- }
- }
- return transformDimensions;
- }
-
- public SplitSpec getSplit() {
- return split;
- }
-
- public String encode() throws IOException {
- return OBJECT_MAPPER.writerWithDefaultPrettyPrinter().writeValueAsString(this);
- }
-
- public static class Builder {
- private String collection;
- private List<DimensionSpec> dimensions;
- private List<MetricSpec> metrics;
- private TimeSpec inputTime = new TimeSpec();
- private TimeSpec time = new TimeSpec();
- private TopkWhitelistSpec topKWhitelist = new TopkWhitelistSpec();
- private SplitSpec split = new SplitSpec();
-
- public String getCollection() {
- return collection;
- }
-
- public Builder setCollection(String collection) {
- this.collection = collection;
- return this;
- }
-
- public List<DimensionSpec> getDimensions() {
- return dimensions;
- }
-
- public Builder setDimensions(List<DimensionSpec> dimensions) {
- this.dimensions = dimensions;
- return this;
- }
-
- public List<MetricSpec> getMetrics() {
- return metrics;
- }
-
- public Builder setMetrics(List<MetricSpec> metrics) {
- this.metrics = metrics;
- return this;
- }
-
- public TimeSpec getInputTime() {
- return inputTime;
- }
-
- public TimeSpec getTime() {
- return time;
- }
-
- public Builder setTime(TimeSpec time) {
- this.time = time;
- return this;
- }
-
- public TopkWhitelistSpec getTopKWhitelist() {
- return topKWhitelist;
- }
-
- public Builder setTopKWhitelist(TopkWhitelistSpec topKWhitelist) {
- this.topKWhitelist = topKWhitelist;
- return this;
- }
-
- public SplitSpec getSplit() {
- return split;
- }
-
- public Builder setSplit(SplitSpec split) {
- this.split = split;
- return this;
- }
-
- public ThirdEyeConfig build() throws Exception {
- if (collection == null) {
- throw new IllegalArgumentException("Must provide collection");
- }
-
- if (dimensions == null || dimensions.isEmpty()) {
- throw new IllegalArgumentException("Must provide dimension names");
- }
-
- if (metrics == null || metrics.isEmpty()) {
- throw new IllegalArgumentException("Must provide metric specs");
- }
-
- return new ThirdEyeConfig(collection, dimensions, metrics, inputTime, time, topKWhitelist, split);
- }
- }
-
- public static ThirdEyeConfig decode(InputStream inputStream) throws IOException {
- return OBJECT_MAPPER.readValue(inputStream, ThirdEyeConfig.class);
- }
-
- /**
- * Creates a ThirdEyeConfig object from the Properties object
- * @param props
- * @return
- */
- public static ThirdEyeConfig fromProperties(Properties props) {
-
- String collection = getCollectionFromProperties(props);
- List<DimensionSpec> dimensions = getDimensionFromProperties(props);
- List<MetricSpec> metrics = getMetricsFromProperties(props);
- TimeSpec inputTime = getInputTimeFromProperties(props);
- TimeSpec time = getTimeFromProperties(props);
- SplitSpec split = getSplitFromProperties(props);
- TopkWhitelistSpec topKWhitelist = getTopKWhitelistFromProperties(props);
- ThirdEyeConfig thirdeyeConfig = new ThirdEyeConfig(collection, dimensions, metrics, inputTime, time, topKWhitelist, split);
- return thirdeyeConfig;
- }
-
- private static TopkWhitelistSpec getTopKWhitelistFromProperties(Properties props) {
- TopkWhitelistSpec topKWhitelist = null;
-
- Map<String, Double> threshold = getThresholdFromProperties(props);
- List<TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec = getTopKDimensionToMetricsSpecFromProperties(props);
- Map<String, List<String>> whitelist = getWhitelistFromProperties(props);
- Map<String, String> nonWhitelistValue = getNonWhitelistValueFromProperties(props);
-
- if (threshold != null || topKDimensionToMetricsSpec != null || whitelist != null) {
- topKWhitelist = new TopkWhitelistSpec();
- topKWhitelist.setThreshold(threshold);
- topKWhitelist.setTopKDimensionToMetricsSpec(topKDimensionToMetricsSpec);
- topKWhitelist.setWhitelist(whitelist);
- topKWhitelist.setNonWhitelistValue(nonWhitelistValue);
- }
- return topKWhitelist;
- }
-
- /**
- * Creates a map of dimension name to the value that should be used for "others"
- * @param props
- * @return
- */
- private static Map<String, String> getNonWhitelistValueFromProperties(Properties props) {
- Map<String, String> dimensionToNonWhitelistValueMap = null;
-
- // create dimension to type map
- List<DimensionSpec> dimensions = getDimensionFromProperties(props);
- Map<String, DimensionType> dimensionToType = new HashMap<>();
- for (int i = 0; i < dimensions.size(); i ++) {
- DimensionSpec spec = dimensions.get(i);
- dimensionToType.put(spec.getName(), spec.getDimensionType());
- }
-
- // dimensions with whitelist
- String whitelistDimensionsStr = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_WHITELIST_DIMENSION_NAMES.toString(), null);
- List<String> whitelistDimensions = new ArrayList<>();
- if (StringUtils.isNotBlank(whitelistDimensionsStr)) {
- dimensionToNonWhitelistValueMap = new HashMap<>();
- whitelistDimensions.addAll(Lists.newArrayList(whitelistDimensionsStr.split(FIELD_SEPARATOR)));
- }
-
- for (String whitelistDimension : whitelistDimensions) {
- String nonWhitelistValue = getAndCheck(props,
- ThirdEyeConfigProperties.THIRDEYE_NONWHITELIST_VALUE_DIMENSION.toString() + CONFIG_JOINER + whitelistDimension, null);
- if (StringUtils.isNotBlank(nonWhitelistValue)) {
- dimensionToNonWhitelistValueMap.put(whitelistDimension, nonWhitelistValue);
- } else {
- dimensionToNonWhitelistValueMap.put(whitelistDimension, String.valueOf(dimensionToType.get(whitelistDimension).getDefaultOtherValue()));
- }
- }
- return dimensionToNonWhitelistValueMap;
- }
-
-
- private static Map<String, List<String>> getWhitelistFromProperties(Properties props) {
- // create dimension to type map
- List<DimensionSpec> dimensions = getDimensionFromProperties(props);
- Map<String, DimensionType> dimensionToType = new HashMap<>();
- Map<String, Integer> dimensionToIndex = new HashMap<>();
- for (int i = 0; i < dimensions.size(); i ++) {
- DimensionSpec spec = dimensions.get(i);
- dimensionToType.put(spec.getName(), spec.getDimensionType());
- dimensionToIndex.put(spec.getName(), i);
- }
-
- Map<String, List<String>> whitelist = null;
- String whitelistDimensionsStr = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_WHITELIST_DIMENSION_NAMES.toString(), null);
- if (whitelistDimensionsStr != null && whitelistDimensionsStr.split(FIELD_SEPARATOR).length > 0) {
- whitelist = new HashMap<>();
- for (String dimension : whitelistDimensionsStr.split(FIELD_SEPARATOR)) {
- String whitelistValuesStr = getAndCheck(props,
- ThirdEyeConfigProperties.THIRDEYE_WHITELIST_DIMENSION.toString() + CONFIG_JOINER + dimension);
- String[] whitelistValues = whitelistValuesStr.split(FIELD_SEPARATOR);
- List<String> whitelistValuesList = Lists.newArrayList(whitelistValues);
- whitelist.put(dimension, whitelistValuesList);
- }
- }
- return whitelist;
- }
-
- private static List<TopKDimensionToMetricsSpec> getTopKDimensionToMetricsSpecFromProperties(Properties props) {
- List<TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec = null;
- String topKDimensionNames = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_TOPK_DIMENSION_NAMES.toString(), null);
- if (StringUtils.isNotEmpty(topKDimensionNames) && topKDimensionNames.split(FIELD_SEPARATOR).length > 0) {
- topKDimensionToMetricsSpec = new ArrayList<>();
- for (String dimension : topKDimensionNames.split(FIELD_SEPARATOR)) {
- String[] topKDimensionMetrics = getAndCheck(props,
- ThirdEyeConfigProperties.THIRDEYE_TOPK_METRICS.toString() + CONFIG_JOINER + dimension)
- .split(FIELD_SEPARATOR);
- String[] topKDimensionKValues = getAndCheck(props,
- ThirdEyeConfigProperties.THIRDEYE_TOPK_KVALUES.toString() + CONFIG_JOINER + dimension)
- .split(FIELD_SEPARATOR);
- if (topKDimensionMetrics.length != topKDimensionKValues.length) {
- throw new IllegalStateException("Number of topk metric names and kvalues should be same for a dimension");
- }
- Map<String, Integer> topk = new HashMap<>();
- for (int i = 0; i < topKDimensionMetrics.length; i++) {
- topk.put(topKDimensionMetrics[i], Integer.parseInt(topKDimensionKValues[i]));
- }
- topKDimensionToMetricsSpec.add(new TopKDimensionToMetricsSpec(dimension, topk));
- }
- }
- return topKDimensionToMetricsSpec;
- }
-
- private static Map<String, Double> getThresholdFromProperties(Properties props) {
- Map<String, Double> threshold = null;
- String thresholdMetricNames = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_TOPK_THRESHOLD_METRIC_NAMES.toString(), null);
- String metricThresholdValues = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_TOPK_METRIC_THRESHOLD_VALUES.toString(), null);
- if (thresholdMetricNames != null && metricThresholdValues != null) {
- String[] thresholdMetrics = thresholdMetricNames.split(FIELD_SEPARATOR);
- String[] thresholdValues = metricThresholdValues.split(FIELD_SEPARATOR);
- if (thresholdMetrics.length != thresholdValues.length) {
- throw new IllegalStateException("Number of threshold metric names should be same as threshold values");
- }
- threshold = new HashMap<>();
- for (int i = 0; i < thresholdMetrics.length; i++) {
- threshold.put(thresholdMetrics[i], Double.parseDouble(thresholdValues[i]));
- }
- }
- return threshold;
- }
-
- private static SplitSpec getSplitFromProperties(Properties props) {
- SplitSpec split = null;
- String splitThreshold = getAndCheck(props,
- ThirdEyeConfigProperties.THIRDEYE_SPLIT_THRESHOLD.toString(), null);
- if (splitThreshold != null) {
- String splitOrder = getAndCheck(props,
- ThirdEyeConfigProperties.THIRDEYE_SPLIT_ORDER.toString(), null);
- List<String> splitOrderList = null;
- if (splitOrder != null) {
- splitOrderList = Arrays.asList(splitOrder.split(FIELD_SEPARATOR));
- }
- split = new SplitSpec(Integer.parseInt(splitThreshold), splitOrderList);
- }
- return split;
- }
-
- private static TimeSpec getTimeFromProperties(Properties props) {
- String timeColumnName = getAndCheck(props,
- ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_NAME.toString());
- String timeColumnType = getAndCheck(props,
- ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_TYPE.toString(), DEFAULT_TIME_TYPE);
- String timeColumnSize = getAndCheck(props,
- ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_SIZE.toString(), DEFAULT_TIME_SIZE);
- TimeGranularity timeGranularity = new TimeGranularity(Integer.parseInt(timeColumnSize), TimeUnit.valueOf(timeColumnType));
- String timeFormat = getAndCheck(props,
- ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_FORMAT.toString(), DEFAULT_TIME_FORMAT);
- TimeSpec time = new TimeSpec(timeColumnName, timeGranularity, timeFormat);
- return time;
- }
-
-
- private static TimeSpec getInputTimeFromProperties(Properties props) {
- TimeSpec inputTime = null;
- String timeColumnName = getAndCheck(props,
- ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_NAME.toString());
- String timeColumnType = getAndCheck(props,
- ThirdEyeConfigProperties.THIRDEYE_INPUT_TIMECOLUMN_TYPE.toString(), null);
- String timeColumnSize = getAndCheck(props,
- ThirdEyeConfigProperties.THIRDEYE_INPUT_TIMECOLUMN_SIZE.toString(), null);
- String timeFormat = getAndCheck(props,
- ThirdEyeConfigProperties.THIRDEYE_INPUT_TIMECOLUMN_FORMAT.toString(), DEFAULT_TIME_FORMAT);
- if (timeColumnType != null && timeColumnSize != null) {
- TimeGranularity timeGranularity = new TimeGranularity(Integer.parseInt(timeColumnSize), TimeUnit.valueOf(timeColumnType));
- inputTime = new TimeSpec(timeColumnName, timeGranularity, timeFormat);
- }
- return inputTime;
- }
-
- private static List<MetricSpec> getMetricsFromProperties(Properties props) {
- List<MetricSpec> metrics = new ArrayList<>();
- String[] metricNames = getAndCheck(props,
- ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()).split(FIELD_SEPARATOR);
- String[] metricTypes = getAndCheck(props,
- ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()).split(FIELD_SEPARATOR);
- if (metricNames.length != metricTypes.length) {
- throw new IllegalStateException("Number of metric names provided "
- + "should be same as number of metric types");
- }
- for (int i = 0; i < metricNames.length; i++) {
- metrics.add(new MetricSpec(metricNames[i], MetricType.valueOf(metricTypes[i])));
- }
- return metrics;
- }
-
- private static List<DimensionSpec> getDimensionFromProperties(Properties props) {
- List<DimensionSpec> dimensions = new ArrayList<>();
- String[] dimensionNames = getAndCheck(props,
- ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString()).split(FIELD_SEPARATOR);
- String[] dimensionTypes = getAndCheck(props,
- ThirdEyeConfigProperties.THIRDEYE_DIMENSION_TYPES.toString()).split(FIELD_SEPARATOR);
- for (int i = 0; i < dimensionNames.length; i++) {
- dimensions.add(new DimensionSpec(dimensionNames[i], DimensionType.valueOf(dimensionTypes[i])));
- }
- return dimensions;
- }
-
- private static String getCollectionFromProperties(Properties props) {
- String collection = getAndCheck(props,
- ThirdEyeConfigProperties.THIRDEYE_TABLE_NAME.toString());
- return collection;
- }
-
- private static String getAndCheck(Properties props, String propName) {
- String propValue = props.getProperty(propName);
- if (propValue == null) {
- throw new IllegalArgumentException(propName + " required property");
- }
- return propValue;
- }
-
- private static String getAndCheck(Properties props, String propName, String defaultValue) {
- String propValue = props.getProperty(propName, defaultValue);
- return propValue;
- }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/ThirdEyeConfigProperties.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/ThirdEyeConfigProperties.java
deleted file mode 100644
index 18be146..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/ThirdEyeConfigProperties.java
+++ /dev/null
@@ -1,105 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.config;
-
-/**
- * Class for representing all property names used in thirdeye-hadoop jobs
- */
-public enum ThirdEyeConfigProperties {
-
- /** Pinot table name */
- THIRDEYE_TABLE_NAME("thirdeye.table.name"),
-
- /** Comma Separated dimension names */
- THIRDEYE_DIMENSION_NAMES("thirdeye.dimension.names"),
-
- /** Comma Separated dimension types */
- THIRDEYE_DIMENSION_TYPES("thirdeye.dimension.types"),
-
- /** Comma separated metric names */
- THIRDEYE_METRIC_NAMES("thirdeye.metric.names"),
-
- /** Comma separated metric types */
- THIRDEYE_METRIC_TYPES("thirdeye.metric.types"),
-
- /** Time column name */
- THIRDEYE_TIMECOLUMN_NAME("thirdeye.timecolumn.name"),
-
- /** Time input column type before aggregation (HOURS, DAYS etc) */
- THIRDEYE_INPUT_TIMECOLUMN_TYPE("thirdeye.input.timecolumn.type"),
-
- /** Time input bucket size before aggregation*/
- THIRDEYE_INPUT_TIMECOLUMN_SIZE("thirdeye.input.timecolumn.size"),
-
- /** Time format
- * Can be either EPOCH (default) or SIMPLE_DATE_FORMAT:pattern e.g SIMPLE_DATE_FORMAT:yyyyMMdd */
- THIRDEYE_INPUT_TIMECOLUMN_FORMAT("thirdeye.input.timecolumn.format"),
-
- /** Time column type (HOURS, DAYS etc) */
- THIRDEYE_TIMECOLUMN_TYPE("thirdeye.timecolumn.type"),
-
- /** Time bucket size */
- THIRDEYE_TIMECOLUMN_SIZE("thirdeye.timecolumn.size"),
-
- /** Time format
- * Can be either EPOCH (default) or SIMPLE_DATE_FORMAT:pattern e.g SIMPLE_DATE_FORMAT:yyyyMMdd */
- THIRDEYE_TIMECOLUMN_FORMAT("thirdeye.timecolumn.format"),
-
- /** Split threshold for star tree */
- THIRDEYE_SPLIT_THRESHOLD("thirdeye.split.threshold"),
-
- /** Split order for star tree */
- THIRDEYE_SPLIT_ORDER("thirdeye.split.order"),
-
- /** Comma separated metric names for threshold filtering */
- THIRDEYE_TOPK_THRESHOLD_METRIC_NAMES("thirdeye.topk.threshold.metric.names"),
-
- /** Comma separated metric threshold values */
- THIRDEYE_TOPK_METRIC_THRESHOLD_VALUES("thirdeye.topk.metric.threshold.values"),
-
- /** Comma separated dimension names for topk config */
- THIRDEYE_TOPK_DIMENSION_NAMES("thirdeye.topk.dimension.names"),
-
- /** Use by appending dimension name at the end eg: thirdeye.topk.metrics.d1
- * Comma separated metrics with topk specification for given dimension */
- THIRDEYE_TOPK_METRICS("thirdeye.topk.metrics"),
-
- /** Use by appending dimension name at the end eg: thirdeye.topk.kvalues.d1
- * Comma separated top k values for corresponding metrics for given dimension */
- THIRDEYE_TOPK_KVALUES("thirdeye.topk.kvalues"),
-
- /** Comma separated dimension names which have whitelist */
- THIRDEYE_WHITELIST_DIMENSION_NAMES("thirdeye.whitelist.dimension.names"),
-
- /** Use by appending dimension name at the end eg: thirdeye.whitelist.dimension.d1
- * Comma separated list of values to whitelist for given dimension */
- THIRDEYE_WHITELIST_DIMENSION("thirdeye.whitelist.dimension"),
-
- /** Use by appending dimension name at the end eg: thirdeye.nonwhitelist.value.dimension.d1
- * Value to be used for values which don't belong to whitelist */
- THIRDEYE_NONWHITELIST_VALUE_DIMENSION("thirdeye.nonwhitelist.value.dimension");
-
- String name;
-
- ThirdEyeConfigProperties(String name) {
- this.name = name;
- }
-
- public String toString() {
- return name;
- }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/ThirdEyeConstants.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/ThirdEyeConstants.java
deleted file mode 100644
index b088835..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/ThirdEyeConstants.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.config;
-
-import org.joda.time.format.DateTimeFormat;
-import org.joda.time.format.DateTimeFormatter;
-
-public final class ThirdEyeConstants {
- public static final String TOPK_VALUES_FILE = "topk_values";
- public static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormat.forPattern("YYYY-MM-dd-HHmmss");
- public static final String TOPK_DIMENSION_SUFFIX = "_topk";
- public static final String OTHER = "other";
- public static final String EMPTY_STRING = "";
- public static final Number EMPTY_NUMBER = 0;
- public static final Double EMPTY_DOUBLE = 0d;
- public static final Float EMPTY_FLOAT = 0f;
- public static final Integer EMPTY_INT = 0;
- public static final Long EMPTY_LONG = 0l;
- public static final Short EMPTY_SHORT = 0;
- public static final String SEGMENT_JOINER = "_";
- public static final String AUTO_METRIC_COUNT = "__COUNT";
- public static final String FIELD_SEPARATOR = ",";
- public static final String TAR_SUFFIX = ".tar.gz";
- public static final String AVRO_SUFFIX = ".avro";
- public static final String SDF_SEPARATOR = ":";
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/TimeGranularity.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/TimeGranularity.java
deleted file mode 100644
index c10e056..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/TimeGranularity.java
+++ /dev/null
@@ -1,102 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.config;
-
-import java.util.Objects;
-import java.util.concurrent.TimeUnit;
-
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-/**
- * TimeGranularity class contains time unit and time size of the star tree time config
- *
- * unit: the TimeUnit of the column
- * size: the bucket size of the time column
- */
-public class TimeGranularity {
- private static int DEFAULT_TIME_SIZE = 1;
-
- private int size = DEFAULT_TIME_SIZE;
- private TimeUnit unit;
-
- public TimeGranularity() {
- }
-
- public TimeGranularity(int size, TimeUnit unit) {
- this.size = size;
- this.unit = unit;
- }
-
- @JsonProperty
- public int getSize() {
- return size;
- }
-
- @JsonProperty
- public TimeUnit getUnit() {
- return unit;
- }
-
- public long toMillis() {
- return toMillis(1);
- }
-
- /**
- * Converts time in bucketed unit to millis
- *
- * @param time
- * @return
- */
- public long toMillis(long time) {
- return unit.toMillis(time * size);
- }
-
- /**
- * Converts millis to time unit
- *
- * e.g. If TimeGranularity is defined as 1 HOURS,
- * and we invoke convertToUnit(1458284400000) (i.e. 2016-03-18 00:00:00)
- * this method will return HOURS.convert(1458284400000, MILLISECONDS)/1 = 405079 hoursSinceEpoch
- *
- * If TimeGranularity is defined as 10 MINUTES,
- * and we invoke convertToUnit(1458284400000) (i.e. 2016-03-18 00:00:00)
- * this method will return MINUTES.convert(1458284400000, MILLISECONDS)/10 = 2430474 tenMinutesSinceEpoch
- * @param millis
- * @return
- */
- public long convertToUnit(long millis) {
- return unit.convert(millis, TimeUnit.MILLISECONDS) / size;
- }
-
- @Override
- public String toString() {
- return size + "-" + unit;
- }
-
- @Override
- public int hashCode() {
- return Objects.hash(size, unit);
- }
-
- @Override
- public boolean equals(Object obj) {
- if (!(obj instanceof TimeGranularity)) {
- return false;
- }
- TimeGranularity other = (TimeGranularity) obj;
- return Objects.equals(other.size, this.size) && Objects.equals(other.unit, this.unit);
- }
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/TimeSpec.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/TimeSpec.java
deleted file mode 100644
index 60a254e..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/TimeSpec.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.config;
-
-import com.fasterxml.jackson.annotation.JsonProperty;
-import com.linkedin.pinot.common.data.TimeGranularitySpec.TimeFormat;
-
-import java.util.concurrent.TimeUnit;
-
-/** This class represents the time spec for thirdeye-hadoop jobs
- * @param columnName - columnName which represents time
- * @param timeGranularity - time granularity for the time column
- */
-public class TimeSpec {
- private static final TimeGranularity DEFAULT_TIME_GRANULARITY = new TimeGranularity(1, TimeUnit.HOURS);
- private static final String DEFAULT_TIME_FORMAT = TimeFormat.EPOCH.toString();
-
- private String columnName;
- private TimeGranularity timeGranularity = DEFAULT_TIME_GRANULARITY;
- private String timeFormat = DEFAULT_TIME_FORMAT;
-
- public TimeSpec() {
- }
-
- public TimeSpec(String columnName, TimeGranularity timeGranularity, String timeFormat) {
- this.columnName = columnName;
- this.timeGranularity = timeGranularity;
- this.timeFormat = timeFormat;
- }
-
- @JsonProperty
- public String getColumnName() {
- return columnName;
- }
-
- @JsonProperty
- public TimeGranularity getTimeGranularity() {
- return timeGranularity;
- }
-
- @JsonProperty
- public String getTimeFormat() {
- return timeFormat;
- }
-
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/TopKDimensionToMetricsSpec.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/TopKDimensionToMetricsSpec.java
deleted file mode 100644
index 55d7425..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/TopKDimensionToMetricsSpec.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.config;
-
-import java.util.Map;
-
-/**
- * This class manages config for dimension with topk
- * config defined on multiple metrics
- * @param dimensionName - The dimension of this topk config
- * @param topk - map of metric name to k value
- */
-public class TopKDimensionToMetricsSpec {
-
- String dimensionName;
- Map<String, Integer> topk;
-
- public TopKDimensionToMetricsSpec() {
-
- }
-
- public TopKDimensionToMetricsSpec(String dimensionName, Map<String, Integer> topk) {
- this.dimensionName = dimensionName;
- this.topk = topk;
- }
-
- public String getDimensionName() {
- return dimensionName;
- }
-
- public void setDimensionName(String dimensionName) {
- this.dimensionName = dimensionName;
- }
-
- public Map<String, Integer> getTopk() {
- return topk;
- }
-
- public void setTopk(Map<String, Integer> topk) {
- this.topk = topk;
- }
-
- public String toString() {
- return "{ dimensionName : " + dimensionName + ", topk : " + topk + " }";
- }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/TopkWhitelistSpec.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/TopkWhitelistSpec.java
deleted file mode 100644
index 2861d40..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/TopkWhitelistSpec.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.config;
-
-import java.util.List;
-import java.util.Map;
-
-/**
- * Config class to define topk and whitelist
- *
- * @param threshold - dimension values which do not satisfy metric thresholds will be ignored.
- * The metric total contributed by a dimension will be compared with the metric total across all the records.
- *
- * @param topKDimensionToMetricsSpec - list of dimension and a map of metric to topk value for that dimension
- * Only top k values for the dimension will be picked, based on metric
- *
- * @param whitelist - values to whitelist for given dimension (dimension:whitelist values)
- *
- * @param non whitelist value - value to be used for a dimension value, which is not in whitelist
- */
-public class TopkWhitelistSpec {
-
- Map<String, Double> threshold;
- List<TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec;
- Map<String, List<String>> whitelist;
- Map<String, String> nonWhitelistValue;
-
- public TopkWhitelistSpec() {
-
- }
-
- public Map<String, Double> getThreshold() {
- return threshold;
- }
-
- public void setThreshold(Map<String, Double> threshold) {
- this.threshold = threshold;
- }
-
- public List<TopKDimensionToMetricsSpec> getTopKDimensionToMetricsSpec() {
- return topKDimensionToMetricsSpec;
- }
-
- public void setTopKDimensionToMetricsSpec(List<TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec) {
- this.topKDimensionToMetricsSpec = topKDimensionToMetricsSpec;
- }
-
- public Map<String, List<String>> getWhitelist() {
- return whitelist;
- }
-
- public void setWhitelist(Map<String, List<String>> whitelist) {
- this.whitelist = whitelist;
- }
-
- public Map<String, String> getNonWhitelistValue() {
- return nonWhitelistValue;
- }
-
- public void setNonWhitelistValue(Map<String, String> nonWhitelistValue) {
- this.nonWhitelistValue = nonWhitelistValue;
- }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnTransformationPhaseConfig.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnTransformationPhaseConfig.java
deleted file mode 100644
index a90a77a..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnTransformationPhaseConfig.java
+++ /dev/null
@@ -1,135 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.derivedcolumn.transformation;
-
-import com.linkedin.thirdeye.hadoop.config.DimensionSpec;
-import com.linkedin.thirdeye.hadoop.config.DimensionType;
-import com.linkedin.thirdeye.hadoop.config.MetricSpec;
-import com.linkedin.thirdeye.hadoop.config.MetricType;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfig;
-import com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-/**
- * This class contains the config needed by TopKColumnTransformation
- * and the methods to obtain the config from the ThirdEyeConfig
- */
-public class DerivedColumnTransformationPhaseConfig {
- private List<String> dimensionNames;
- private List<DimensionType> dimensionTypes;
- private List<String> metricNames;
- private List<MetricType> metricTypes;
- private String timeColumnName;
- private Map<String, List<String>> whitelist;
- private Map<String, String> nonWhitelistValue;
-
-
- public DerivedColumnTransformationPhaseConfig() {
-
- }
-
- /**
- * @param dimensionNames
- * @param dimensionTypes
- * @param metricNames
- * @param metricTypes
- * @param timeColumnName
- * @param whitelist
- */
- public DerivedColumnTransformationPhaseConfig(List<String> dimensionNames, List<DimensionType> dimensionTypes,
- List<String> metricNames, List<MetricType> metricTypes, String timeColumnName,
- Map<String, List<String>> whitelist, Map<String, String> nonWhitelistValue) {
- super();
- this.dimensionNames = dimensionNames;
- this.dimensionTypes = dimensionTypes;
- this.metricNames = metricNames;
- this.metricTypes = metricTypes;
- this.timeColumnName = timeColumnName;
- this.whitelist = whitelist;
- this.nonWhitelistValue = nonWhitelistValue;
- }
-
- public List<String> getDimensionNames() {
- return dimensionNames;
- }
-
- public List<DimensionType> getDimensionTypes() {
- return dimensionTypes;
- }
-
- public List<String> getMetricNames() {
- return metricNames;
- }
-
- public List<MetricType> getMetricTypes() {
- return metricTypes;
- }
-
- public String getTimeColumnName() {
- return timeColumnName;
- }
-
- public Map<String, List<String>> getWhitelist() {
- return whitelist;
- }
-
- public Map<String, String> getNonWhitelistValue() {
- return nonWhitelistValue;
- }
-
- public static DerivedColumnTransformationPhaseConfig fromThirdEyeConfig(ThirdEyeConfig config) {
-
- // metrics
- List<String> metricNames = new ArrayList<>(config.getMetrics().size());
- List<MetricType> metricTypes = new ArrayList<>(config.getMetrics().size());
- for (MetricSpec spec : config.getMetrics()) {
- metricNames.add(spec.getName());
- metricTypes.add(spec.getType());
- }
-
- // dimensions
- List<String> dimensionNames = new ArrayList<>(config.getDimensions().size());
- List<DimensionType> dimensionTypes = new ArrayList<>(config.getDimensions().size());
- for (DimensionSpec spec : config.getDimensions()) {
- dimensionNames.add(spec.getName());
- dimensionTypes.add(spec.getDimensionType());
- }
-
- // time
- String timeColumnName = config.getTime().getColumnName();
-
- TopkWhitelistSpec topKWhitelist = config.getTopKWhitelist();
- Map<String, List<String>> whitelist = new HashMap<>();
-
- // topkwhitelist
- if (topKWhitelist != null && topKWhitelist.getWhitelist() != null) {
- whitelist.putAll(topKWhitelist.getWhitelist());
- }
-
- Map<String, String> nonWhitelistValueMap = new HashMap<>();
- if (topKWhitelist != null && topKWhitelist.getNonWhitelistValue() != null) {
- nonWhitelistValueMap.putAll(topKWhitelist.getNonWhitelistValue());
- }
-
- return new DerivedColumnTransformationPhaseConfig(dimensionNames, dimensionTypes, metricNames, metricTypes,
- timeColumnName, whitelist, nonWhitelistValueMap);
- }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnTransformationPhaseConstants.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnTransformationPhaseConstants.java
deleted file mode 100644
index 151a853..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnTransformationPhaseConstants.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.derivedcolumn.transformation;
-
-/**
- * This class contains the properties to be set for topk column transformation phase
- */
-public enum DerivedColumnTransformationPhaseConstants {
- DERIVED_COLUMN_TRANSFORMATION_PHASE_INPUT_PATH("derived.column.transformation.phase.input.path"),
- DERIVED_COLUMN_TRANSFORMATION_PHASE_TOPK_PATH("derived.column.transformation.phase.topk.path"),
- DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_PATH("derived.column.transformation.phase.output.path"),
- DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_SCHEMA("derived.column.transformation.phase.output.schema"),
- DERIVED_COLUMN_TRANSFORMATION_PHASE_THIRDEYE_CONFIG("derived.column.transformation.phase.thirdeye.config");
-
- String name;
-
- DerivedColumnTransformationPhaseConstants(String name) {
- this.name = name;
- }
-
- public String toString() {
- return name;
- }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnTransformationPhaseJob.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnTransformationPhaseJob.java
deleted file mode 100644
index 88dbe2c..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnTransformationPhaseJob.java
+++ /dev/null
@@ -1,403 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.derivedcolumn.transformation;
-
-import static com.linkedin.thirdeye.hadoop.derivedcolumn.transformation.DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_INPUT_PATH;
-import static com.linkedin.thirdeye.hadoop.derivedcolumn.transformation.DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_PATH;
-import static com.linkedin.thirdeye.hadoop.derivedcolumn.transformation.DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_SCHEMA;
-import static com.linkedin.thirdeye.hadoop.derivedcolumn.transformation.DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_THIRDEYE_CONFIG;
-import static com.linkedin.thirdeye.hadoop.derivedcolumn.transformation.DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_TOPK_PATH;
-
-import java.io.DataInput;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.Set;
-
-import com.linkedin.thirdeye.hadoop.config.DimensionSpec;
-import com.linkedin.thirdeye.hadoop.config.DimensionType;
-import com.linkedin.thirdeye.hadoop.config.MetricSpec;
-import com.linkedin.thirdeye.hadoop.config.MetricType;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfigProperties;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConstants;
-import com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec;
-import com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfig;
-import com.linkedin.thirdeye.hadoop.topk.TopKDimensionValues;
-import com.linkedin.thirdeye.hadoop.util.ThirdeyeAvroUtils;
-
-import org.apache.avro.Schema;
-import org.apache.avro.SchemaBuilder;
-import org.apache.avro.SchemaBuilder.BaseFieldTypeBuilder;
-import org.apache.avro.SchemaBuilder.FieldAssembler;
-import org.apache.avro.SchemaBuilder.RecordBuilder;
-import org.apache.avro.generic.GenericData.Record;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.mapred.AvroKey;
-import org.apache.avro.mapreduce.AvroJob;
-import org.apache.avro.mapreduce.AvroKeyInputFormat;
-import org.apache.avro.mapreduce.AvroKeyOutputFormat;
-import org.apache.avro.mapreduce.AvroMultipleOutputs;
-import org.apache.commons.collections.CollectionUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-
-/**
- * This phase will add a new column for every column that has topk config
- * The new column added will be called "column_topk" (containing only topk values plus any whitelist)
- * and "column" will contain all values with whitelist applied
- * For all non topk values, the dimension value will be replaced by "other"
- * For all non-whitelist values, the dimension value will be replaced by the defaultOtherValue specified in DimensionType
- * This default other value can be configured, using config like thirdeye.nonwhitelist.value.dimension.d1=x
- */
-public class DerivedColumnTransformationPhaseJob extends Configured {
- private static final Logger LOGGER = LoggerFactory.getLogger(DerivedColumnTransformationPhaseJob.class);
-
- private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
- private String name;
- private Properties props;
-
- /**
- * @param name
- * @param props
- */
- public DerivedColumnTransformationPhaseJob(String name, Properties props) {
- super(new Configuration());
- this.name = name;
- this.props = props;
- }
-
- public static class DerivedColumnTransformationPhaseMapper
- extends Mapper<AvroKey<GenericRecord>, NullWritable, AvroKey<GenericRecord>, NullWritable> {
-
- private Schema outputSchema;
- private ThirdEyeConfig thirdeyeConfig;
- private DerivedColumnTransformationPhaseConfig config;
- private List<String> dimensionsNames;
- private List<DimensionType> dimensionsTypes;
- private List<String> metricNames;
- private List<MetricType> metricTypes;
- private TopKDimensionValues topKDimensionValues;
- private Map<String, Set<String>> topKDimensionsMap;
- private Map<String, List<String>> whitelist;
- private Map<String, String> nonWhitelistValueMap;
- private String timeColumnName;
-
- private AvroMultipleOutputs avroMultipleOutputs;
- String inputFileName;
-
- @Override
- public void setup(Context context) throws IOException, InterruptedException {
- LOGGER.info("DerivedColumnTransformationPhaseJob.DerivedColumnTransformationPhaseMapper.setup()");
- Configuration configuration = context.getConfiguration();
- FileSystem fs = FileSystem.get(configuration);
-
- FileSplit fileSplit = (FileSplit) context.getInputSplit();
- inputFileName = fileSplit.getPath().getName();
- inputFileName = inputFileName.substring(0, inputFileName.lastIndexOf(ThirdEyeConstants.AVRO_SUFFIX));
- LOGGER.info("split name:" + inputFileName);
-
- thirdeyeConfig = OBJECT_MAPPER.readValue(configuration.get(DERIVED_COLUMN_TRANSFORMATION_PHASE_THIRDEYE_CONFIG.toString()), ThirdEyeConfig.class);
- config = DerivedColumnTransformationPhaseConfig.fromThirdEyeConfig(thirdeyeConfig);
- dimensionsNames = config.getDimensionNames();
- dimensionsTypes = config.getDimensionTypes();
- metricNames = config.getMetricNames();
- metricTypes = config.getMetricTypes();
- timeColumnName = config.getTimeColumnName();
- whitelist = config.getWhitelist();
- nonWhitelistValueMap = config.getNonWhitelistValue();
-
- outputSchema = new Schema.Parser().parse(configuration.get(DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_SCHEMA.toString()));
-
- Path topKPath = new Path(configuration.get(DERIVED_COLUMN_TRANSFORMATION_PHASE_TOPK_PATH.toString())
- + File.separator + ThirdEyeConstants.TOPK_VALUES_FILE);
- topKDimensionValues = new TopKDimensionValues();
- if (fs.exists(topKPath)) {
- FSDataInputStream topkValuesStream = fs.open(topKPath);
- topKDimensionValues = OBJECT_MAPPER.readValue((DataInput) topkValuesStream, TopKDimensionValues.class);
- topkValuesStream.close();
- }
- topKDimensionsMap = topKDimensionValues.getTopKDimensions();
-
- avroMultipleOutputs = new AvroMultipleOutputs(context);
- }
-
-
- @Override
- public void map(AvroKey<GenericRecord> key, NullWritable value, Context context)
- throws IOException, InterruptedException {
-
- // input record
- GenericRecord inputRecord = key.datum();
-
- // output record
- GenericRecord outputRecord = new Record(outputSchema);
-
- // dimensions
- for (int i = 0; i < dimensionsNames.size(); i++) {
-
- String dimensionName = dimensionsNames.get(i);
- DimensionType dimensionType = dimensionsTypes.get(i);
- Object dimensionValue = ThirdeyeAvroUtils.getDimensionFromRecord(inputRecord, dimensionName);
- String dimensionValueStr = String.valueOf(dimensionValue);
-
-
- // add original dimension value with whitelist applied
- Object whitelistDimensionValue = dimensionValue;
- if (whitelist != null) {
- List<String> whitelistDimensions = whitelist.get(dimensionName);
- if (CollectionUtils.isNotEmpty(whitelistDimensions)) {
- // whitelist config exists for this dimension but value not present in whitelist
- if (!whitelistDimensions.contains(dimensionValueStr)) {
- whitelistDimensionValue = dimensionType.getValueFromString(nonWhitelistValueMap.get(dimensionName));
- }
- }
- }
- outputRecord.put(dimensionName, whitelistDimensionValue);
-
- // add column for topk, if topk config exists for that column, plus any whitelist values
- if (topKDimensionsMap.containsKey(dimensionName)) {
- Set<String> topKDimensionValues = topKDimensionsMap.get(dimensionName);
- // if topk config exists for that dimension
- if (CollectionUtils.isNotEmpty(topKDimensionValues)) {
- String topkDimensionName = dimensionName + ThirdEyeConstants.TOPK_DIMENSION_SUFFIX;
- Object topkDimensionValue = dimensionValue;
- // topk config exists for this dimension, but value not present in topk or whitelist
- if (!topKDimensionValues.contains(dimensionValueStr) &&
- (whitelist == null || whitelist.get(dimensionName) == null
- || !whitelist.get(dimensionName).contains(dimensionValueStr))) {
- topkDimensionValue = ThirdEyeConstants.OTHER;
- }
- outputRecord.put(topkDimensionName, String.valueOf(topkDimensionValue));
- }
- }
- }
-
- // metrics
- for (int i = 0; i < metricNames.size(); i ++) {
- String metricName = metricNames.get(i);
- MetricType metricType = metricTypes.get(i);
- outputRecord.put(metricName, ThirdeyeAvroUtils.getMetricFromRecord(inputRecord, metricName, metricType));
- }
-
- // time
- outputRecord.put(timeColumnName, ThirdeyeAvroUtils.getMetricFromRecord(inputRecord, timeColumnName));
-
- AvroKey<GenericRecord> outputKey = new AvroKey<GenericRecord>(outputRecord);
- avroMultipleOutputs.write(outputKey, NullWritable.get(), inputFileName);
- }
-
- @Override
- public void cleanup(Context context) throws IOException, InterruptedException {
- avroMultipleOutputs.close();
- }
-
-
- }
-
- public Job run() throws Exception {
- Job job = Job.getInstance(getConf());
- job.setJobName(name);
- job.setJarByClass(DerivedColumnTransformationPhaseJob.class);
-
- Configuration configuration = job.getConfiguration();
- FileSystem fs = FileSystem.get(configuration);
-
- // Input Path
- String inputPathDir = getAndSetConfiguration(configuration, DERIVED_COLUMN_TRANSFORMATION_PHASE_INPUT_PATH);
- LOGGER.info("Input path dir: " + inputPathDir);
- for (String inputPath : inputPathDir.split(",")) {
- LOGGER.info("Adding input:" + inputPath);
- Path input = new Path(inputPath);
- FileInputFormat.addInputPath(job, input);
- }
-
- // Topk path
- String topkPath = getAndSetConfiguration(configuration, DERIVED_COLUMN_TRANSFORMATION_PHASE_TOPK_PATH);
- LOGGER.info("Topk path : " + topkPath);
-
- // Output path
- Path outputPath = new Path(getAndSetConfiguration(configuration, DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_PATH));
- LOGGER.info("Output path dir: " + outputPath.toString());
- if (fs.exists(outputPath)) {
- fs.delete(outputPath, true);
- }
- FileOutputFormat.setOutputPath(job, outputPath);
-
- // Schema
- Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputPathDir);
- LOGGER.info("Schema : {}", avroSchema.toString(true));
-
- // ThirdEyeConfig
- String dimensionTypesProperty = ThirdeyeAvroUtils.getDimensionTypesProperty(
- props.getProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString()), avroSchema);
- props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_TYPES.toString(), dimensionTypesProperty);
- String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty(
- props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()),
- props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema);
- props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty);
- ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
- job.getConfiguration().set(DERIVED_COLUMN_TRANSFORMATION_PHASE_THIRDEYE_CONFIG.toString(),
- OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));
- LOGGER.info("ThirdEyeConfig {}", thirdeyeConfig.encode());
-
- // New schema
- Schema outputSchema = newSchema(thirdeyeConfig);
- job.getConfiguration().set(DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_SCHEMA.toString(), outputSchema.toString());
-
- // Map config
- job.setMapperClass(DerivedColumnTransformationPhaseMapper.class);
- job.setInputFormatClass(AvroKeyInputFormat.class);
- job.setMapOutputKeyClass(AvroKey.class);
- job.setMapOutputValueClass(NullWritable.class);
- AvroJob.setOutputKeySchema(job, outputSchema);
- LazyOutputFormat.setOutputFormatClass(job, AvroKeyOutputFormat.class);
- AvroMultipleOutputs.addNamedOutput(job, "avro", AvroKeyOutputFormat.class, outputSchema);
-
- job.setNumReduceTasks(0);
-
- job.waitForCompletion(true);
-
- return job;
- }
-
-
- public Schema newSchema(ThirdEyeConfig thirdeyeConfig) {
- Schema outputSchema = null;
-
- Set<String> topKTransformDimensionSet = new HashSet<>();
- TopkWhitelistSpec topkWhitelist = thirdeyeConfig.getTopKWhitelist();
-
- // gather topk columns
- if (topkWhitelist != null) {
- List<TopKDimensionToMetricsSpec> topKDimensionToMetricsSpecs = topkWhitelist.getTopKDimensionToMetricsSpec();
- if (topKDimensionToMetricsSpecs != null) {
- for (TopKDimensionToMetricsSpec topKDimensionToMetricsSpec : topKDimensionToMetricsSpecs) {
- topKTransformDimensionSet.add(topKDimensionToMetricsSpec.getDimensionName());
- }
- }
- }
- RecordBuilder<Schema> recordBuilder = SchemaBuilder.record(thirdeyeConfig.getCollection());
- FieldAssembler<Schema> fieldAssembler = recordBuilder.fields();
-
- // add new column for topk columns
- for (DimensionSpec dimensionSpec : thirdeyeConfig.getDimensions()) {
- String dimensionName = dimensionSpec.getName();
- DimensionType dimensionType = dimensionSpec.getDimensionType();
- BaseFieldTypeBuilder<Schema> baseFieldTypeBuilder = fieldAssembler.name(dimensionName).type().nullable();
-
- switch (dimensionType) {
- case DOUBLE:
- fieldAssembler = baseFieldTypeBuilder.doubleType().noDefault();
- break;
- case FLOAT:
- fieldAssembler = baseFieldTypeBuilder.floatType().noDefault();
- break;
- case INT:
- case SHORT:
- fieldAssembler = baseFieldTypeBuilder.intType().noDefault();
- break;
- case LONG:
- fieldAssembler = baseFieldTypeBuilder.longType().noDefault();
- break;
- case STRING:
- fieldAssembler = baseFieldTypeBuilder.stringType().noDefault();
- break;
- default:
- throw new IllegalArgumentException("Unsupported dimensionType " + dimensionType);
- }
- if (topKTransformDimensionSet.contains(dimensionName)) {
- fieldAssembler = fieldAssembler.name(dimensionName + ThirdEyeConstants.TOPK_DIMENSION_SUFFIX).type().nullable().stringType().noDefault();
- }
- }
-
- for (MetricSpec metricSpec : thirdeyeConfig.getMetrics()) {
- String metric = metricSpec.getName();
- MetricType metricType = metricSpec.getType();
- BaseFieldTypeBuilder<Schema> baseFieldTypeBuilder = fieldAssembler.name(metric).type().nullable();
-
- switch (metricType) {
- case SHORT:
- case INT:
- fieldAssembler = baseFieldTypeBuilder.intType().noDefault();
- break;
- case FLOAT:
- fieldAssembler = baseFieldTypeBuilder.floatType().noDefault();
- break;
- case DOUBLE:
- fieldAssembler = baseFieldTypeBuilder.doubleType().noDefault();
- break;
- case LONG:
- default:
- fieldAssembler = baseFieldTypeBuilder.longType().noDefault();
- }
- }
-
- String timeColumnName = thirdeyeConfig.getTime().getColumnName();
- fieldAssembler = fieldAssembler.name(timeColumnName).type().longType().noDefault();
-
- outputSchema = fieldAssembler.endRecord();
- LOGGER.info("New schema {}", outputSchema.toString(true));
-
- return outputSchema;
- }
-
- private String getAndSetConfiguration(Configuration configuration,
- DerivedColumnTransformationPhaseConstants constant) {
- String value = getAndCheck(constant.toString());
- configuration.set(constant.toString(), value);
- return value;
- }
-
- private String getAndCheck(String propName) {
- String propValue = props.getProperty(propName);
- if (propValue == null) {
- throw new IllegalArgumentException(propName + " required property");
- }
- return propValue;
- }
-
- public static void main(String[] args) throws Exception {
- if (args.length != 1) {
- throw new IllegalArgumentException("usage: config.properties");
- }
-
- Properties props = new Properties();
- props.load(new FileInputStream(args[0]));
- DerivedColumnTransformationPhaseJob job = new DerivedColumnTransformationPhaseJob("derived_column_transformation_job", props);
- job.run();
- }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/DefaultJoinConfigUDF.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/DefaultJoinConfigUDF.java
deleted file mode 100644
index f13e579..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/DefaultJoinConfigUDF.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.join;
-
-import org.apache.hadoop.mapreduce.Job;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class DefaultJoinConfigUDF implements JoinConfigUDF {
- private static final Logger LOGGER = LoggerFactory.getLogger(DefaultJoinConfigUDF.class);
-
- @Override
- public void setJoinConfig(Job job) {
-
- }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/DefaultJoinKeyExtractor.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/DefaultJoinKeyExtractor.java
deleted file mode 100644
index 461f16e..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/DefaultJoinKeyExtractor.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.join;
-
-import java.util.Map;
-
-import org.apache.avro.generic.GenericRecord;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class DefaultJoinKeyExtractor implements JoinKeyExtractor {
- private static final Logger LOGGER = LoggerFactory.getLogger(DefaultJoinKeyExtractor.class);
-
- private Map<String, String> joinKeyMap;
- private String defaultJoinKey;
-
- public DefaultJoinKeyExtractor(Map<String, String> params) {
- this.joinKeyMap = params;
- this.defaultJoinKey = params.get("defaultJoinKey");
- }
-
- @Override
- public String extractJoinKey(String sourceName, GenericRecord record) {
-
- String joinKey = defaultJoinKey;
- if (joinKeyMap != null && joinKeyMap.containsKey(sourceName)) {
- joinKey = joinKeyMap.get(sourceName);
- }
- String ret = "INVALID";
- if (joinKey != null) {
- Object object = record.get(joinKey);
- if (object != null) {
- ret = object.toString();
- }
- }
- LOGGER.info("source:{} JoinKey:{} value:{}", sourceName, joinKey, ret);
- return ret;
- }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/DelegatingAvroKeyInputFormat.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/DelegatingAvroKeyInputFormat.java
deleted file mode 100644
index 0ca9266..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/DelegatingAvroKeyInputFormat.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.join;
-
-import java.io.IOException;
-import java.util.Map;
-
-import org.apache.avro.Schema;
-import org.apache.avro.mapreduce.AvroKeyInputFormat;
-import org.apache.avro.mapreduce.AvroKeyRecordReader;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-import org.codehaus.jackson.JsonParseException;
-import org.codehaus.jackson.map.JsonMappingException;
-import org.codehaus.jackson.map.ObjectMapper;
-
-import org.codehaus.jackson.type.TypeReference;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class DelegatingAvroKeyInputFormat<T> extends AvroKeyInputFormat<T> {
- private static final Logger LOGGER = LoggerFactory.getLogger(DelegatingAvroKeyInputFormat.class);
- private static TypeReference MAP_STRING_STRING_TYPE = new TypeReference<Map<String, String>>() {
- };
-
- public org.apache.hadoop.mapreduce.RecordReader<org.apache.avro.mapred.AvroKey<T>, NullWritable> createRecordReader(
- InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
- LOGGER.info("DelegatingAvroKeyInputFormat.createRecordReader() for split:{}", split);
- FileSplit fileSplit = (FileSplit) split;
- Configuration configuration = context.getConfiguration();
- String sourceName = getSourceNameFromPath(fileSplit, configuration);
- LOGGER.info("Source Name for path {} : {}", fileSplit.getPath(), sourceName);
- Map<String, String> schemaJSONMapping = new ObjectMapper()
- .readValue(configuration.get("schema.json.mapping"), MAP_STRING_STRING_TYPE);
-
- LOGGER.info("Schema JSON Mapping: {}", schemaJSONMapping);
-
- String sourceSchemaJSON = schemaJSONMapping.get(sourceName);
-
- Schema schema = new Schema.Parser().parse(sourceSchemaJSON);
- return new AvroKeyRecordReader<T>(schema);
- }
-
- public static String getSourceNameFromPath(FileSplit fileSplit, Configuration configuration)
- throws IOException, JsonParseException, JsonMappingException {
- String content = configuration.get("schema.path.mapping");
- Map<String, String> schemaPathMapping =
- new ObjectMapper().readValue(content, MAP_STRING_STRING_TYPE);
- LOGGER.info("Schema Path Mapping: {}", schemaPathMapping);
-
- String sourceName = null;
- for (String path : schemaPathMapping.keySet()) {
- if (fileSplit.getPath().toString().indexOf(path) > -1) {
- sourceName = schemaPathMapping.get(path);
- break;
- }
- }
- return sourceName;
- };
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/GenericJoinUDF.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/GenericJoinUDF.java
deleted file mode 100644
index e4312fe..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/GenericJoinUDF.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.join;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericRecord;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.linkedin.thirdeye.hadoop.join.GenericJoinUDFConfig.Field;
-
-public class GenericJoinUDF implements JoinUDF {
-
- private static final Logger LOGGER = LoggerFactory.getLogger(GenericJoinUDF.class);
- private GenericJoinUDFConfig config;
- private Schema outputSchema;
- private List<Field> fields;
-
- public GenericJoinUDF(Map<String, String> params) {
- LOGGER.info("Initializing GenericJoinUDF with params:" + params);
- this.config = new GenericJoinUDFConfig(params);
- fields = config.getFields();
- }
-
- @Override
- public void init(Schema outputSchema) {
- this.outputSchema = outputSchema;
- }
-
- /**
- * Trivial implementation of a generic join udf. Assumes the data type is the
- * same in source and output.
- */
- @Override
- public List<GenericRecord> performJoin(Object joinKeyVal,
- Map<String, List<GenericRecord>> joinInput) {
-
- List<GenericRecord> outputRecords = new ArrayList<GenericRecord>();
- GenericRecord outputRecord = new GenericData.Record(outputSchema);
- for (Field field : fields) {
- Object value = null;
- // try to find the field in one of the source events, break out as soon as
- // we find a non null value
- for (String source : field.sourceEvents) {
- List<GenericRecord> list = joinInput.get(source);
- if (list != null && list.size() >= 1) {
- for (GenericRecord record : list) {
- value = record.get(field.name);
- if (value != null) {
- break;
- }
- }
- }
- if (value != null) {
- break;
- }
- }
- if (value != null) {
- outputRecord.put(field.name, value);
- }
- }
- outputRecords.add(outputRecord);
- return outputRecords;
- }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/GenericJoinUDFConfig.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/GenericJoinUDFConfig.java
deleted file mode 100644
index 0d9b6d4..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/GenericJoinUDFConfig.java
+++ /dev/null
@@ -1,100 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.join;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.avro.Schema;
-import org.apache.avro.Schema.Type;
-
-public class GenericJoinUDFConfig {
-
- List<Field> fields;
-
- public GenericJoinUDFConfig(Map<String, String> params) {
- fields = new ArrayList<Field>();
- String fieldNamesString = params.get("field.names");
- String[] split = fieldNamesString.split(",");
- for (String fieldName : split) {
- Field field = new Field();
- field.name = fieldName;
- String type = params.get(fieldName + ".type");
- if (type != null) {
- field.type = Schema.Type.valueOf(type.toUpperCase());
- }
- field.sourceEvents = new ArrayList<String>();
- String[] fieldSources = params.get(fieldName + ".sources").split(",");
- for (String fieldSource : fieldSources) {
- field.sourceEvents.add(fieldSource.trim());
- }
- fields.add(field);
- }
- }
-
- public List<Field> getFields() {
- return fields;
- }
-
- public void setFields(List<Field> fields) {
- this.fields = fields;
- }
-
- /*
- * For now support name and source Name. Will be nice to support data type
- * conversion and transform function in future
- */
- public static class Field {
- String name;
- List<String> sourceEvents;
- Schema.Type type;
- List<String> tranformFunc;
-
- public String getName() {
- return name;
- }
-
- public void setName(String name) {
- this.name = name;
- }
-
- public Type getType() {
- return type;
- }
-
- public void setType(Type type) {
- this.type = type;
- }
-
- public List<String> getSourceEvents() {
- return sourceEvents;
- }
-
- public void setSourceEvents(List<String> sourceEvents) {
- this.sourceEvents = sourceEvents;
- }
-
- public List<String> getTranformFunc() {
- return tranformFunc;
- }
-
- public void setTranformFunc(List<String> tranformFunc) {
- this.tranformFunc = tranformFunc;
- }
- }
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/JoinConfigUDF.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/JoinConfigUDF.java
deleted file mode 100644
index 5ece800..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/JoinConfigUDF.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.join;
-
-import org.apache.hadoop.mapreduce.Job;
-
-/**
- * Simple interface to transform a Generic Record
- */
-public interface JoinConfigUDF {
-
- /**
- * @param record
- * @return
- */
- void setJoinConfig(Job job);
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/JoinKeyExtractor.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/JoinKeyExtractor.java
deleted file mode 100644
index 7a6c0f1..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/JoinKeyExtractor.java
+++ /dev/null
@@ -1,32 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.join;
-
-import org.apache.avro.generic.GenericRecord;
-
-/**
- * Simple interface to extract the joinKey from a Generic Record
- */
-public interface JoinKeyExtractor {
- /**
- * @param sourceName name of the source
- * @param record record from which the join Key is extracted. join key value is expected to be a
- * string.
- * @return
- */
- String extractJoinKey(String sourceName, GenericRecord record);
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/JoinPhaseConstants.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/JoinPhaseConstants.java
deleted file mode 100644
index 52a0ef4..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/JoinPhaseConstants.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.join;
-
-public enum JoinPhaseConstants {
- // SCHEMA AND INPUT PER SOURCE actual property access would be {source}.join.input.path
- JOIN_INPUT_SCHEMA("join.input.schema"), // one schema for each source
- JOIN_INPUT_PATH("join.input.path"), // one input for each source
- JOIN_OUTPUT_PATH("join.output.path"),
- JOIN_OUTPUT_SCHEMA("join.output.schema"),
- JOIN_SOURCE_NAMES("join.source.names"), // comma separated list of sources
- JOIN_CONFIG_UDF_CLASS("join.config.udf.class"),
- JOIN_UDF_CLASS("join.udf.class"),
- JOIN_KEY_EXTRACTOR_CLASS("join.key.extractor.class"),
- JOIN_KEY_EXTRACTOR_CONFIG("join.key.extractor.config"), // one for each source
- JOIN_UDF_CONFIG("join.udf.config"); // one for each source
-
- String name;
-
- JoinPhaseConstants(String name) {
- this.name = name;
- }
-
- public String toString() {
- return name;
- }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/JoinPhaseJob.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/JoinPhaseJob.java
deleted file mode 100644
index b13d1ec..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/JoinPhaseJob.java
+++ /dev/null
@@ -1,394 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.join;
-
-import static com.linkedin.thirdeye.hadoop.join.JoinPhaseConstants.*;
-
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.StringWriter;
-import java.lang.reflect.Constructor;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.concurrent.atomic.AtomicInteger;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.mapred.AvroKey;
-import org.apache.avro.mapreduce.AvroJob;
-import org.apache.avro.mapreduce.AvroKeyOutputFormat;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.Counter;
-import org.apache.hadoop.mapreduce.CounterGroup;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.fasterxml.jackson.core.type.TypeReference;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.google.common.collect.Lists;
-
-/**
- * This is a generic join job that can be used to prepare the data for Third
- * Eye. Many teams just need a way to join multiple data sets into one.
- * Currently they do this by using pig script which is highly inefficient, since
- * it does a pair wise join. The idea is as follows there are N named sources,
- * there is a join key common across all these sources. <br/>
- * S1: join key s1_key <br/>
- * S2: join key s2_key <br/>
- * ... <br/>
- * SN: join key sn_key<br/>
- */
-public class JoinPhaseJob extends Configured {
- private static final Logger LOGGER = LoggerFactory.getLogger(JoinPhaseJob.class);
-
- private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
- private String name;
- private Properties props;
-
- public JoinPhaseJob(String name, Properties props) {
- super(new Configuration());
- this.name = name;
- this.props = props;
- }
-
- public static class GenericJoinMapper
- extends Mapper<AvroKey<GenericRecord>, NullWritable, BytesWritable, BytesWritable> {
- String sourceName;
- JoinKeyExtractor joinKeyExtractor;
-
- @Override
- public void setup(Context context) throws IOException, InterruptedException {
-
- LOGGER.info("GenericAvroJoinJob.GenericJoinMapper.setup()");
- FileSplit fileSplit = (FileSplit) context.getInputSplit();
- LOGGER.info("split name:" + fileSplit.toString());
- Configuration configuration = context.getConfiguration();
-
- try {
- sourceName = DelegatingAvroKeyInputFormat.getSourceNameFromPath(fileSplit, configuration);
- LOGGER.info("Input: {} belongs to Source:{}", fileSplit, sourceName);
- String joinKeyExtractorClass = configuration.get(JOIN_KEY_EXTRACTOR_CLASS.toString());
-
- Map<String, String> params = new HashMap<>();
- List<String> sourceNames = Lists.newArrayList(configuration.get(JOIN_SOURCE_NAMES.toString()).split(","));
- for (String sourceName : sourceNames) {
- String joinKeyExtractorConfig = configuration.get(sourceName + "." + JOIN_KEY_EXTRACTOR_CONFIG.toString());
- if (StringUtils.isNotBlank(joinKeyExtractorConfig)) {
- params.put(sourceName, joinKeyExtractorConfig);
- }
- }
- LOGGER.info("Initializing JoinKeyExtractorClass:{} with params:{}", joinKeyExtractorClass, params);
- Constructor<?> constructor = Class.forName(joinKeyExtractorClass).getConstructor(Map.class);
- joinKeyExtractor = (JoinKeyExtractor) constructor.newInstance(params);
- } catch (Exception e) {
- throw new IOException(e);
- }
-
- }
-
- @Override
- public void map(AvroKey<GenericRecord> recordWrapper, NullWritable value, Context context)
- throws IOException, InterruptedException {
- GenericRecord record = recordWrapper.datum();
- MapOutputValue mapOutputValue = new MapOutputValue(record.getSchema().getName(), record);
- String joinKeyValue = joinKeyExtractor.extractJoinKey(sourceName, record);
- LOGGER.info("Join Key:{}", joinKeyValue);
-
- if (!"INVALID".equals(joinKeyValue)) {
- context.write(new BytesWritable(joinKeyValue.toString().getBytes()),
- new BytesWritable(mapOutputValue.toBytes()));
- }
- }
-
- }
-
- public static class GenericJoinReducer
- extends Reducer<BytesWritable, BytesWritable, AvroKey<GenericRecord>, NullWritable> {
-
- String statOutputDir;
- private FileSystem fileSystem;
- private static TypeReference MAP_STRING_STRING_TYPE = new TypeReference<Map<String, String>>() {
- };
- private Map<String, Schema> schemaMap = new HashMap<String, Schema>();
- private JoinUDF joinUDF;
- private Map<String, AtomicInteger> countersMap = new HashMap<String, AtomicInteger>();
- private List<String> sourceNames;
-
- @Override
- public void setup(Context context) throws IOException, InterruptedException {
- Configuration configuration = context.getConfiguration();
- fileSystem = FileSystem.get(configuration);
-
- try {
-
- Map<String, String> schemaJSONMapping = new ObjectMapper().readValue(
- context.getConfiguration().get("schema.json.mapping"), MAP_STRING_STRING_TYPE);
-
- LOGGER.info("Schema JSON Mapping: {}", schemaJSONMapping);
- for (String sourceName : schemaJSONMapping.keySet()) {
- Schema schema = new Schema.Parser().parse(schemaJSONMapping.get(sourceName));
- schemaMap.put(sourceName, schema);
- }
- sourceNames = Lists.newArrayList(configuration.get(JOIN_SOURCE_NAMES.toString()).split(","));
- String joinUDFClass = configuration.get(JOIN_UDF_CLASS.toString());
- Map<String, String> params = new HashMap<>();
- for (String sourceName : sourceNames) {
- String joinUdfConfig = configuration.get(sourceName + "." + JOIN_UDF_CONFIG.toString());
- if (StringUtils.isNotBlank(joinUdfConfig)) {
- params.put(sourceName, joinUdfConfig);
- }
- }
-
- Constructor<?> constructor = Class.forName(joinUDFClass).getConstructor(Map.class);
- LOGGER.info("Initializing JoinUDFClass:{} with params:{}", joinUDFClass, params);
- joinUDF = (JoinUDF) constructor.newInstance(params);
- String outputSchemaPath = configuration.get(JOIN_OUTPUT_SCHEMA.toString());
- // Avro schema
- Schema.Parser parser = new Schema.Parser();
- Schema outputSchema = parser.parse(fileSystem.open(new Path(outputSchemaPath)));
- LOGGER.info("Setting outputschema:{}", outputSchema);
- joinUDF.init(outputSchema);
- } catch (Exception e) {
- throw new IOException(e);
- }
- }
-
- @Override
- public void reduce(BytesWritable joinKeyWritable, Iterable<BytesWritable> recordBytesWritable,
- Context context) throws IOException, InterruptedException {
- Map<String, List<GenericRecord>> joinInput = new HashMap<String, List<GenericRecord>>();
- for (BytesWritable writable : recordBytesWritable) {
-
- byte[] bytes = writable.copyBytes();
- MapOutputValue mapOutputValue = MapOutputValue.fromBytes(bytes, schemaMap);
- String schemaName = mapOutputValue.getSchemaName();
- if (!joinInput.containsKey(schemaName)) {
- joinInput.put(schemaName, new ArrayList<GenericRecord>());
- }
- joinInput.get(schemaName).add(mapOutputValue.getRecord());
- }
-
- int[] exists = new int[sourceNames.size()];
- for (int i = 0; i < sourceNames.size(); i++) {
- String source = sourceNames.get(i);
- if (joinInput.containsKey(source)) {
- exists[i] = 1;
- } else {
- exists[i] = 0;
- }
- }
- String counterName = Arrays.toString(exists);
- if (!countersMap.containsKey(counterName)) {
- countersMap.put(counterName, new AtomicInteger(0));
- }
- countersMap.get(counterName).incrementAndGet();
- // invoke the udf and pass in the join data
- List<GenericRecord> outputRecords =
- joinUDF.performJoin(new String(joinKeyWritable.copyBytes()), joinInput);
- if (outputRecords != null) {
- for (GenericRecord outputRecord : outputRecords) {
- context.write(new AvroKey<GenericRecord>(outputRecord), NullWritable.get());
- }
- }
- }
-
- protected void cleanup(Context context) throws IOException, InterruptedException {
- for (String counterName : countersMap.keySet()) {
- context.getCounter("DynamicCounter", counterName)
- .increment(countersMap.get(counterName).get());
- }
- }
- }
-
- public Job run() throws Exception {
- Job job = Job.getInstance(getConf());
- Configuration conf = job.getConfiguration();
- job.setJobName(name);
- job.setJarByClass(JoinPhaseJob.class);
-
- FileSystem fs = FileSystem.get(conf);
-
- String outputSchemaPath = getAndSetConfiguration(conf, JOIN_OUTPUT_SCHEMA);
- Schema.Parser parser = new Schema.Parser();
- Schema outputSchema = parser.parse(fs.open(new Path(outputSchemaPath)));
- LOGGER.info("{}", outputSchema);
-
- // Set custom config like adding distributed caches
- String joinConfigUDFClass = getAndSetConfiguration(conf, JoinPhaseConstants.JOIN_CONFIG_UDF_CLASS);
- LOGGER.info("Initializing JoinConfigUDFClass:{} with params:{}", joinConfigUDFClass);
- Constructor<?> constructor = Class.forName(joinConfigUDFClass).getConstructor();
- JoinConfigUDF joinConfigUDF = (JoinConfigUDF) constructor.newInstance();
- joinConfigUDF.setJoinConfig(job);
- getAndSetConfiguration(conf, JOIN_KEY_EXTRACTOR_CLASS);
- getAndSetConfiguration(conf, JOIN_UDF_CLASS);
-
- List<String> sourceNames = Lists.newArrayList(
- getAndSetConfiguration(conf, JoinPhaseConstants.JOIN_SOURCE_NAMES).split(","));
-
- // Map config
- job.setMapperClass(GenericJoinMapper.class);
- // AvroJob.setInputKeySchema(job, unionSchema);
- job.setInputFormatClass(DelegatingAvroKeyInputFormat.class);
- job.setMapOutputKeyClass(BytesWritable.class);
- job.setMapOutputValueClass(BytesWritable.class);
-
- // Reduce config
- job.setReducerClass(GenericJoinReducer.class);
- AvroJob.setOutputKeySchema(job, outputSchema);
- job.setOutputFormatClass(AvroKeyOutputFormat.class);
- job.setOutputKeyClass(AvroKey.class);
- job.setOutputValueClass(NullWritable.class);
-
- String numReducers = props.getProperty("num.reducers");
- if (numReducers != null) {
- job.setNumReduceTasks(Integer.parseInt(numReducers));
- } else {
- job.setNumReduceTasks(10);
- }
- LOGGER.info("Setting number of reducers : " + job.getNumReduceTasks());
- Map<String, String> schemaMap = new HashMap<String, String>();
- Map<String, String> schemaPathMapping = new HashMap<String, String>();
-
- for (String sourceName : sourceNames) {
- // load schema for each source
- LOGGER.info("Loading Schema for {}", sourceName);
-
- FSDataInputStream schemaStream =
- fs.open(new Path(getAndCheck(sourceName + "." + JOIN_INPUT_SCHEMA.toString())));
- Schema schema = new Schema.Parser().parse(schemaStream);
- schemaMap.put(sourceName, schema.toString());
- LOGGER.info("Schema for {}: \n{}", sourceName, schema);
-
- // configure input data for each source
- String inputPathDir = getAndCheck(sourceName + "." + JOIN_INPUT_PATH.toString());
- LOGGER.info("Input path dir for " + sourceName + ": " + inputPathDir);
- for (String inputPath : inputPathDir.split(",")) {
- Path input = new Path(inputPath);
- FileStatus[] listFiles = fs.listStatus(input);
- boolean isNested = false;
- for (FileStatus fileStatus : listFiles) {
- if (fileStatus.isDirectory()) {
- isNested = true;
- Path path = fileStatus.getPath();
- LOGGER.info("Adding input:" + path);
- FileInputFormat.addInputPath(job, path);
- schemaPathMapping.put(path.toString(), sourceName);
- }
- }
- if (!isNested) {
- LOGGER.info("Adding input:" + inputPath);
- FileInputFormat.addInputPath(job, input);
- schemaPathMapping.put(input.toString(), sourceName);
- }
- }
- }
- StringWriter temp = new StringWriter();
- OBJECT_MAPPER.writeValue(temp, schemaPathMapping);
- job.getConfiguration().set("schema.path.mapping", temp.toString());
-
- temp = new StringWriter();
- OBJECT_MAPPER.writeValue(temp, schemaMap);
- job.getConfiguration().set("schema.json.mapping", temp.toString());
-
- Path outputPath = new Path(getAndCheck(JOIN_OUTPUT_PATH.toString()));
- if (fs.exists(outputPath)) {
- fs.delete(outputPath, true);
- }
- FileOutputFormat.setOutputPath(job, new Path(getAndCheck(JOIN_OUTPUT_PATH.toString())));
-
- for (Object key : props.keySet()) {
- conf.set(key.toString(), props.getProperty(key.toString()));
- }
-
- job.waitForCompletion(true);
-
- dumpSummary(job, sourceNames);
-
- return job;
- }
-
- private void dumpSummary(Job job, List<String> sourceNames) throws IOException {
- System.out.println("Join Input Matrix.");
- CounterGroup group = job.getCounters().getGroup("DynamicCounter");
- for (String source : sourceNames) {
- System.out.print(String.format("%25s\t", source));
- }
- if (group != null) {
- Iterator<Counter> iterator = group.iterator();
- while (iterator.hasNext()) {
- Counter counter = iterator.next();
- String displayName = counter.getDisplayName();
- String[] split = displayName.replace("[", "").replace("[", "").split(",");
- for (String str : split) {
- if (str.trim().equals("1")) {
- System.out.print(String.format("%25s\t", "1"));
- } else {
- System.out.print(String.format("%25s\t", "-"));
- }
- }
- }
- }
- }
-
- private String getAndSetConfiguration(Configuration configuration,
- JoinPhaseConstants constant) {
- String value = getAndCheck(constant.toString());
- configuration.set(constant.toString(), value);
- return value;
- }
-
- private String getAndCheck(String propName) {
- String propValue = props.getProperty(propName);
- if (propValue == null) {
- throw new IllegalArgumentException(propName + " required property");
- }
- return propValue;
- }
-
- public static void main(String[] args) throws Exception {
- if (args.length != 1) {
- throw new IllegalArgumentException("usage: config.properties");
- }
-
- Properties props = new Properties();
- props.load(new FileInputStream(args[0]));
-
- JoinPhaseJob job = new JoinPhaseJob("aggregate_avro_job", props);
- job.run();
- }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/JoinUDF.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/JoinUDF.java
deleted file mode 100644
index c36dfdd..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/JoinUDF.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.join;
-
-import java.util.List;
-import java.util.Map;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-
-public interface JoinUDF {
- /**
- * Initializes by providing the output schema.
- * @param outputSchema
- */
- void init(Schema outputSchema);
-
- /**
- * @param joinKey common key used to join all the sources
- * @param joinInput Mapping from sourceName to GenericRecord(s)
- * @return
- */
- List<GenericRecord> performJoin(Object joinKeyVal, Map<String, List<GenericRecord>> joinInput);
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/MapOutputKey.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/MapOutputKey.java
deleted file mode 100644
index 5d168b5..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/MapOutputKey.java
+++ /dev/null
@@ -1,21 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.join;
-
-public class MapOutputKey {
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/MapOutputValue.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/MapOutputValue.java
deleted file mode 100644
index a7c7783..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/MapOutputValue.java
+++ /dev/null
@@ -1,103 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.join;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.util.Map;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericDatumReader;
-import org.apache.avro.generic.GenericDatumWriter;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.io.BinaryDecoder;
-import org.apache.avro.io.BinaryEncoder;
-import org.apache.avro.io.DecoderFactory;
-import org.apache.avro.io.EncoderFactory;
-
-public class MapOutputValue {
-
- private static BinaryDecoder binaryDecoder;
- private String schemaName;
- private GenericRecord record;
- private GenericDatumWriter<GenericRecord> WRITER;
- private EncoderFactory factory = EncoderFactory.get();
-
- private BinaryEncoder binaryEncoder;
-
- public MapOutputValue(String schemaName, GenericRecord record) {
- this.schemaName = schemaName;
- this.record = record;
- }
-
- public String getSchemaName() {
- return schemaName;
- }
-
- public GenericRecord getRecord() {
- return record;
- }
-
- public byte[] toBytes() throws IOException {
- ByteArrayOutputStream dataStream = new ByteArrayOutputStream();
- Schema schema = record.getSchema();
- if (WRITER == null) {
- WRITER = new GenericDatumWriter<GenericRecord>(schema);
- }
- binaryEncoder = factory.directBinaryEncoder(dataStream, binaryEncoder);
- WRITER.write(record, binaryEncoder);
-
- // serialize to bytes, we also need to know the schema name when we
- // process this record on the reducer since reducer gets the record from
- // multiple mappers. So we first write the schema/source name and then
- // write the serialized bytes
- ByteArrayOutputStream out = new ByteArrayOutputStream();
- DataOutputStream dos = new DataOutputStream(out);
- dos.writeInt(schema.getName().getBytes().length);
- dos.write(schema.getName().getBytes());
- byte[] dataBytes = dataStream.toByteArray();
-
- dos.writeInt(dataBytes.length);
- dos.write(dataBytes);
- return out.toByteArray();
- }
-
- public static MapOutputValue fromBytes(byte[] bytes, Map<String, Schema> schemaMap)
- throws IOException {
- DataInputStream dataInputStream = new DataInputStream(new ByteArrayInputStream(bytes));
- int length = dataInputStream.readInt();
- byte[] sourceNameBytes = new byte[length];
- dataInputStream.read(sourceNameBytes);
- String schemaName = new String(sourceNameBytes);
-
- int recordDataLength = dataInputStream.readInt();
-
- byte[] recordBytes = new byte[recordDataLength];
- dataInputStream.read(recordBytes);
- Schema schema = schemaMap.get(schemaName);
- GenericRecord record = new GenericData.Record(schema);
- binaryDecoder = DecoderFactory.get().binaryDecoder(recordBytes, binaryDecoder);
- GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>(schema);
- gdr.read(record, binaryDecoder);
- return new MapOutputValue(schemaName, record);
- }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/DefaultSegmentPushUDF.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/DefaultSegmentPushUDF.java
deleted file mode 100644
index 1cfca7f..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/DefaultSegmentPushUDF.java
+++ /dev/null
@@ -1,33 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.push;
-
-import java.util.Properties;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class DefaultSegmentPushUDF implements SegmentPushUDF {
- private static final Logger LOG = LoggerFactory.getLogger(DefaultSegmentPushUDF.class);
-
- @Override
- public void emitCustomEvents(Properties properties) {
- // do nothing
- LOG.info("Default segment push udf");
- }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/DeleteOverlappingSegmentsInPinot.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/DeleteOverlappingSegmentsInPinot.java
deleted file mode 100644
index 21c77af..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/DeleteOverlappingSegmentsInPinot.java
+++ /dev/null
@@ -1,145 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.push;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
-import java.util.TreeSet;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.helix.AccessOption;
-import org.apache.helix.BaseDataAccessor;
-import org.apache.helix.HelixDataAccessor;
-import org.apache.helix.PropertyKey;
-import org.apache.helix.PropertyKey.Builder;
-import org.apache.helix.ZNRecord;
-import org.apache.helix.manager.zk.ZKHelixDataAccessor;
-import org.apache.helix.manager.zk.ZNRecordSerializer;
-import org.apache.helix.manager.zk.ZkBaseDataAccessor;
-import org.apache.helix.manager.zk.ZkClient;
-import org.apache.helix.model.ExternalView;
-import org.apache.helix.model.IdealState;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class DeleteOverlappingSegmentsInPinot {
-
- private static final Logger LOG = LoggerFactory.getLogger(DeleteOverlappingSegmentsInPinot.class);
-
- public static void main(String[] args) throws Exception {
- String zkUrl = args[0];
- String zkCluster = args[1];
- String tableName = args[2];
- deleteOverlappingSegments(zkUrl, zkCluster, tableName);
- }
-
- private static IdealState computeNewIdealStateAfterDeletingOverlappingSegments(HelixDataAccessor helixDataAccessor, PropertyKey idealStatesKey) {
- IdealState is = helixDataAccessor.getProperty(idealStatesKey);
- // compute existing DAILY segments
- Set<String> daysWithDailySegments = new HashSet<>();
- for (String segmentName : is.getPartitionSet()) {
- LOG.info("Segment Name : {}", segmentName);
- if (segmentName.indexOf("DAILY") > -1) {
- String[] splits = segmentName.split("_");
- String endDay = splits[splits.length - 2].substring(0, "yyyy-mm-dd".length());
- String startDay = splits[splits.length - 3].substring(0, "yyyy-mm-dd".length());
- LOG.info("Start : {} End : {}", startDay, endDay);
- daysWithDailySegments.add(startDay);
- }
- }
- // compute list of HOURLY segments to be deleted
- Set<String> hourlySegmentsToDelete = new TreeSet<>();
- for (String segmentName : is.getPartitionSet()) {
- LOG.info("Segment name {}", segmentName);
- if (segmentName.indexOf("HOURLY") > -1) {
- String[] splits = segmentName.split("_");
- String endDay = splits[splits.length - 2].substring(0, "yyyy-mm-dd".length());
- String startDay = splits[splits.length - 3].substring(0, "yyyy-mm-dd".length());
- LOG.info("Start : {} End : {}", startDay, endDay);
- if (daysWithDailySegments.contains(startDay)) {
- hourlySegmentsToDelete.add(segmentName);
- }
- }
- }
- LOG.info("HOURLY segments that can be deleted: {}", hourlySegmentsToDelete.size());
- LOG.info("Hourly segments to delete {}", hourlySegmentsToDelete.toString().replaceAll(",", "\n"));
- IdealState newIdealState = new IdealState(is.getRecord());
- for (String hourlySegmentToDelete : hourlySegmentsToDelete) {
- newIdealState.getRecord().getMapFields().remove(hourlySegmentToDelete);
- }
- return newIdealState;
- }
-
- public static boolean deleteOverlappingSegments(String zkUrl, String zkCluster, String tableName) {
- boolean updateSuccessful = false;
-
- if (!tableName.endsWith("_OFFLINE")) {
- tableName = tableName + "_OFFLINE";
- }
-
- ZkClient zkClient = new ZkClient(zkUrl);
- ZNRecordSerializer zkSerializer = new ZNRecordSerializer();
- zkClient.setZkSerializer(zkSerializer);
- BaseDataAccessor<ZNRecord> baseDataAccessor = new ZkBaseDataAccessor<>(zkClient);
- HelixDataAccessor helixDataAccessor = new ZKHelixDataAccessor(zkCluster, baseDataAccessor);
- Builder keyBuilder = helixDataAccessor.keyBuilder();
- PropertyKey idealStateKey = keyBuilder.idealStates(tableName);
- PropertyKey externalViewKey = keyBuilder.externalView(tableName);
- IdealState currentIdealState = helixDataAccessor.getProperty(idealStateKey);
- byte[] serializeIS = zkSerializer.serialize(currentIdealState.getRecord());
- String name = tableName + ".idealstate." + System.currentTimeMillis();
- File outputFile = new File("/tmp", name);
-
- try (FileOutputStream fileOutputStream = new FileOutputStream(outputFile)) {
- IOUtils.write(serializeIS, fileOutputStream);
- } catch (IOException e) {
- LOG.error("Exception in delete overlapping segments", e);
- return updateSuccessful;
- }
- LOG.info("Saved current idealstate to {}", outputFile);
- IdealState newIdealState;
- do {
- newIdealState = computeNewIdealStateAfterDeletingOverlappingSegments(helixDataAccessor, idealStateKey);
- LOG.info("Updating IdealState");
- updateSuccessful = helixDataAccessor.getBaseDataAccessor().set(idealStateKey.getPath(), newIdealState.getRecord(), newIdealState.getRecord().getVersion(), AccessOption.PERSISTENT);
- if (updateSuccessful) {
- int numSegmentsDeleted = currentIdealState.getPartitionSet().size() - newIdealState.getPartitionSet().size();
- LOG.info("Successfully updated IdealState: Removed segments: {}", (numSegmentsDeleted));
- }
- } while (!updateSuccessful);
-
- try {
- while (true) {
- Thread.sleep(10000);
- ExternalView externalView = helixDataAccessor.getProperty(externalViewKey);
- IdealState idealState = helixDataAccessor.getProperty(idealStateKey);
- Set<String> evPartitionSet = externalView.getPartitionSet();
- Set<String> isPartitionSet = idealState.getPartitionSet();
- if (evPartitionSet.equals(isPartitionSet)) {
- LOG.info("Table {} has reached stable state. i.e segments in external view match idealstates", tableName);
- break;
- }
- }
- } catch (InterruptedException e) {
- e.printStackTrace();
- }
- return updateSuccessful;
- }
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/SegmentPushControllerAPIs.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/SegmentPushControllerAPIs.java
deleted file mode 100644
index 264c369..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/SegmentPushControllerAPIs.java
+++ /dev/null
@@ -1,225 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.push;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.net.URLEncoder;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.http.HttpHost;
-import org.apache.http.HttpResponse;
-import org.apache.http.client.HttpClient;
-import org.apache.http.client.methods.HttpDelete;
-import org.apache.http.client.methods.HttpGet;
-import org.apache.http.impl.client.DefaultHttpClient;
-import org.apache.http.util.EntityUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.google.common.base.Joiner;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConstants;
-
-/**
- * Contains APIs which can be used for segment operations
- * such as listing, deleting overlap
- */
-public class SegmentPushControllerAPIs {
-
- private static Logger LOGGER = LoggerFactory.getLogger(SegmentPushControllerAPIs.class);
- private String[] controllerHosts;
- private int controllerPort;
- private HttpHost controllerHttpHost;
-
- private static final String OFFLINE_SEGMENTS = "OFFLINE";
- private static String DAILY_SCHEDULE = "DAILY";
- private static String HOURLY_SCHEDULE = "HOURLY";
- private static String SEGMENTS_ENDPOINT = "/segments/";
- private static String TABLES_ENDPOINT = "/tables/";
- private static String TYPE_PARAMETER = "?type=offline";
- private static String UTF_8 = "UTF-8";
- private static long TIMEOUT = 120000;
- private static String DATE_JOINER = "-";
-
- SegmentPushControllerAPIs(String[] controllerHosts, String controllerPort) {
- this.controllerHosts = controllerHosts;
- this.controllerPort = Integer.valueOf(controllerPort);
- }
-
- public void deleteOverlappingSegments(String tableName, String segmentName) throws IOException {
- if (segmentName.contains(DAILY_SCHEDULE)) {
- for (String controllerHost : controllerHosts) {
- controllerHttpHost = new HttpHost(controllerHost, controllerPort);
-
- LOGGER.info("Getting overlapped segments for {}*************", segmentName);
- List<String> overlappingSegments = getOverlappingSegments(tableName, segmentName);
-
- if (overlappingSegments.isEmpty()) {
- LOGGER.info("No overlapping segments found");
- } else {
- LOGGER.info("Deleting overlapped segments****************");
- deleteOverlappingSegments(tableName, overlappingSegments);
- }
- }
- } else {
- LOGGER.info("No overlapping segments to delete for HOURLY");
- }
- }
-
- private List<String> getOverlappingSegments(String tablename, String segmentName) throws IOException {
-
- List<String> overlappingSegments = new ArrayList<>();
- String pattern = getOverlapPattern(segmentName, tablename);
- if (pattern != null) {
- LOGGER.info("Finding segments overlapping to {} with pattern {}", segmentName, pattern);
- List<String> allSegments = getAllSegments(tablename, segmentName);
- overlappingSegments = getOverlappingSegments(allSegments, pattern);
- }
- return overlappingSegments;
- }
-
- public List<String> getOverlappingSegments(List<String> allSegments, String pattern) {
- List<String> overlappingSegments = new ArrayList<>();
- for (String segment : allSegments) {
- if (segment.startsWith(pattern)) {
- LOGGER.info("Found overlapping segment {}", segment);
- overlappingSegments.add(segment);
- }
- }
- return overlappingSegments;
- }
-
- public String getOverlapPattern(String segmentName, String tablename) {
- String pattern = null;
- // segment name format: table[_*]Name_schedule_startDate_endDate
- String[] tokens = segmentName.split(ThirdEyeConstants.SEGMENT_JOINER);
- int size = tokens.length;
- if (size > 3) {
- String startDateToken = tokens[size - 2];
- if (startDateToken.lastIndexOf(DATE_JOINER) != -1) {
- String datePrefix = startDateToken.substring(0, startDateToken.lastIndexOf(DATE_JOINER));
- pattern = Joiner.on(ThirdEyeConstants.SEGMENT_JOINER).join(tablename, HOURLY_SCHEDULE, datePrefix);
- }
- }
- return pattern;
- }
-
- private List<String> getAllSegments(String tablename, String segmentName) throws IOException {
- List<String> allSegments = new ArrayList<>();
-
- HttpClient controllerClient = new DefaultHttpClient();
- HttpGet req = new HttpGet(SEGMENTS_ENDPOINT + URLEncoder.encode(tablename, UTF_8));
- HttpResponse res = controllerClient.execute(controllerHttpHost, req);
- try {
- if (res.getStatusLine().getStatusCode() != 200) {
- throw new IllegalStateException(res.getStatusLine().toString());
- }
- InputStream content = res.getEntity().getContent();
- JsonNode segmentsData = new ObjectMapper().readTree(content);
-
- if (segmentsData != null) {
- JsonNode offlineSegments = segmentsData.get(0).get(OFFLINE_SEGMENTS);
- if (offlineSegments != null) {
- for (JsonNode segment : offlineSegments) {
- allSegments.add(segment.asText());
- }
- }
- }
- LOGGER.info("All segments : {}", allSegments);
- } finally {
- if (res.getEntity() != null) {
- EntityUtils.consume(res.getEntity());
- }
- }
- return allSegments;
- }
-
- private boolean isDeleteSuccessful(String tablename, String segmentName) throws IOException {
-
- boolean deleteSuccessful = false;
- HttpClient controllerClient = new DefaultHttpClient();
- // this endpoint gets from ideal state
- HttpGet req = new HttpGet(TABLES_ENDPOINT + URLEncoder.encode(tablename, UTF_8) + SEGMENTS_ENDPOINT);
- HttpResponse res = controllerClient.execute(controllerHttpHost, req);
- try {
- if (res.getStatusLine().getStatusCode() != 200) {
- throw new IllegalStateException(res.getStatusLine().toString());
- }
- InputStream content = res.getEntity().getContent();
- String response = IOUtils.toString(content);
- LOGGER.info("All segments from ideal state {}", response);
- String decoratedSegmentName = "\\\""+segmentName+"\\\"";
- LOGGER.info("Decorated segment name {}", decoratedSegmentName);
- if (!response.contains(decoratedSegmentName)) {
- deleteSuccessful = true;
- LOGGER.info("Delete successful");
- } else {
- LOGGER.info("Delete failed");
- }
- } finally {
- if (res.getEntity() != null) {
- EntityUtils.consume(res.getEntity());
- }
-
- }
- return deleteSuccessful;
-
- }
-
-
- private void deleteOverlappingSegments(String tablename, List<String> overlappingSegments) throws IOException {
-
- for (String segment : overlappingSegments) {
- boolean deleteSuccessful = false;
- long elapsedTime = 0;
- long startTimeMillis = System.currentTimeMillis();
- while (elapsedTime < TIMEOUT && !deleteSuccessful) {
- deleteSuccessful = deleteSegment(tablename, segment);
- LOGGER.info("Response {} while deleting segment {} from table {}", deleteSuccessful, segment, tablename);
- long currentTimeMillis = System.currentTimeMillis();
- elapsedTime = elapsedTime + (currentTimeMillis - startTimeMillis);
- }
- }
- }
-
- private boolean deleteSegment(String tablename, String segmentName) throws IOException {
- boolean deleteSuccessful = false;
-
- HttpClient controllerClient = new DefaultHttpClient();
- HttpDelete req = new HttpDelete(SEGMENTS_ENDPOINT + URLEncoder.encode(tablename, UTF_8) + "/"
- + URLEncoder.encode(segmentName, UTF_8)
- + TYPE_PARAMETER);
- HttpResponse res = controllerClient.execute(controllerHttpHost, req);
- try {
- if (res == null || res.getStatusLine() == null || res.getStatusLine().getStatusCode() != 200
- || !isDeleteSuccessful(tablename, segmentName)) {
- LOGGER.info("Exception in deleting segment, trying again {}", res);
- } else {
- deleteSuccessful = true;
- }
- } finally {
- if (res.getEntity() != null) {
- EntityUtils.consume(res.getEntity());
- }
- }
- return deleteSuccessful;
- }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/SegmentPushPhase.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/SegmentPushPhase.java
deleted file mode 100644
index 0d01a48..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/SegmentPushPhase.java
+++ /dev/null
@@ -1,178 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.push;
-
-import static com.linkedin.thirdeye.hadoop.push.SegmentPushPhaseConstants.*;
-
-import com.linkedin.pinot.common.utils.SimpleHttpResponse;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.lang.reflect.Constructor;
-import java.util.Properties;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.linkedin.pinot.common.utils.FileUploadDownloadClient;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfigProperties;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConstants;
-
-/**
- * This class pushed pinot segments generated by SegmentCreation
- * onto the pinot cluster
- */
-public class SegmentPushPhase extends Configured {
-
- private static final Logger LOGGER = LoggerFactory.getLogger(SegmentPushPhase.class);
- private final String name;
- private final Properties props;
- private String[] hosts;
- private String port;
- private String tablename;
- private boolean uploadSuccess = true;
- private String segmentName = null;
- private String segmentPushUDFClass;
- SegmentPushControllerAPIs segmentPushControllerAPIs;
-
-
- public SegmentPushPhase(String jobName, Properties properties) throws Exception {
- super(new Configuration());
- name = jobName;
- props = properties;
- }
-
- public void run() throws Exception {
- Configuration configuration = new Configuration();
- FileSystem fs = FileSystem.get(configuration);
-
- long startTime = System.currentTimeMillis();
-
- String segmentPath = getAndSetConfiguration(configuration, SEGMENT_PUSH_INPUT_PATH);
- LOGGER.info("Segment path : {}", segmentPath);
- hosts = getAndSetConfiguration(configuration, SEGMENT_PUSH_CONTROLLER_HOSTS).split(ThirdEyeConstants.FIELD_SEPARATOR);
- port = getAndSetConfiguration(configuration, SEGMENT_PUSH_CONTROLLER_PORT);
- tablename = getAndCheck(ThirdEyeConfigProperties.THIRDEYE_TABLE_NAME.toString());
- segmentPushUDFClass = props.getProperty(SEGMENT_PUSH_UDF_CLASS.toString(), DefaultSegmentPushUDF.class.getCanonicalName());
-
- Path path = new Path(segmentPath);
- FileStatus[] fileStatusArr = fs.globStatus(path);
- for (FileStatus fileStatus : fileStatusArr) {
- if (fileStatus.isDirectory()) {
- pushDir(fs, fileStatus.getPath());
- } else {
- pushOneTarFile(fs, fileStatus.getPath());
- }
- }
- long endTime = System.currentTimeMillis();
-
- if (uploadSuccess && segmentName != null) {
- props.setProperty(SEGMENT_PUSH_START_TIME.toString(), String.valueOf(startTime));
- props.setProperty(SEGMENT_PUSH_END_TIME.toString(), String.valueOf(endTime));
-
- segmentPushControllerAPIs = new SegmentPushControllerAPIs(hosts, port);
- LOGGER.info("Deleting segments overlapping to {} from table {} ", segmentName, tablename);
- segmentPushControllerAPIs.deleteOverlappingSegments(tablename, segmentName);
-
- try {
- LOGGER.info("Initializing SegmentPushUDFClass:{}", segmentPushUDFClass);
- Constructor<?> constructor = Class.forName(segmentPushUDFClass).getConstructor();
- SegmentPushUDF segmentPushUDF = (SegmentPushUDF) constructor.newInstance();
- segmentPushUDF.emitCustomEvents(props);
- } catch (Exception e) {
- throw new IOException(e);
- }
- }
-
- }
-
- public void pushDir(FileSystem fs, Path path) throws Exception {
- LOGGER.info("******** Now uploading segments tar from dir: {}", path);
- FileStatus[] fileStatusArr = fs.listStatus(new Path(path.toString() + "/"));
- for (FileStatus fileStatus : fileStatusArr) {
- if (fileStatus.isDirectory()) {
- pushDir(fs, fileStatus.getPath());
- } else {
- pushOneTarFile(fs, fileStatus.getPath());
- }
- }
- }
-
- public void pushOneTarFile(FileSystem fs, Path path) throws Exception {
- String fileName = path.getName();
- if (!fileName.endsWith(".tar.gz")) {
- return;
- }
- long length = fs.getFileStatus(path).getLen();
- try (FileUploadDownloadClient fileUploadDownloadClient = new FileUploadDownloadClient()) {
- for (String host : hosts) {
- try (InputStream inputStream = fs.open(path)) {
- fileName = fileName.split(".tar.gz")[0];
- if (fileName.lastIndexOf(ThirdEyeConstants.SEGMENT_JOINER) != -1) {
- segmentName = fileName.substring(0, fileName.lastIndexOf(ThirdEyeConstants.SEGMENT_JOINER));
- }
- LOGGER.info("******** Uploading file: {} to Host: {} and Port: {} *******", fileName, host, port);
- SimpleHttpResponse simpleHttpResponse = fileUploadDownloadClient.uploadSegment(
- FileUploadDownloadClient.getUploadSegmentHttpURI(host, Integer.parseInt(port)), fileName, inputStream);
- int responseCode = simpleHttpResponse.getStatusCode();
- LOGGER.info("Response code: {}", responseCode);
- if (responseCode != 200) {
- uploadSuccess = false;
- }
- } catch (Exception e) {
- LOGGER.error("******** Error Uploading file: {} to Host: {} and Port: {} *******", fileName, host, port);
- LOGGER.error("Caught exception during upload", e);
- throw new RuntimeException("Got Error during send tar files to push hosts!");
- }
- }
- }
- }
-
-
- private String getAndSetConfiguration(Configuration configuration,
- SegmentPushPhaseConstants constant) {
- String value = getAndCheck(constant.toString());
- configuration.set(constant.toString(), value);
- return value;
- }
-
- private String getAndCheck(String propName) {
- String propValue = props.getProperty(propName);
- if (propValue == null) {
- throw new IllegalArgumentException(propName + " required property");
- }
- return propValue;
- }
-
- public static void main(String[] args) throws Exception {
- if (args.length != 1) {
- throw new IllegalArgumentException("usage: config.properties");
- }
-
- Properties props = new Properties();
- props.load(new FileInputStream(args[0]));
-
- SegmentPushPhase job = new SegmentPushPhase("segment_push_job", props);
- job.run();
- }
-
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/SegmentPushPhaseConstants.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/SegmentPushPhaseConstants.java
deleted file mode 100644
index 421c026..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/SegmentPushPhaseConstants.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.push;
-
-/**
- * Class containing properties to be set for segment push
- */
-public enum SegmentPushPhaseConstants {
-
- SEGMENT_PUSH_INPUT_PATH("segment.push.input.path"),
- SEGMENT_PUSH_UDF_CLASS("segment.push.udf.class"),
- SEGMENT_PUSH_CONTROLLER_HOSTS("segment.push.controller.hosts"),
- SEGMENT_PUSH_CONTROLLER_PORT("segment.push.controller.port"),
- SEGMENT_PUSH_START_TIME("segment.push.start.time"),
- SEGMENT_PUSH_END_TIME("segment.push.end.time");
-
- String name;
-
- SegmentPushPhaseConstants(String name) {
- this.name = name;
- }
-
- public String toString() {
- return name;
- }
-
-}
\ No newline at end of file
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/SegmentPushUDF.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/SegmentPushUDF.java
deleted file mode 100644
index a9e9039..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/SegmentPushUDF.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.push;
-
-import java.util.Properties;
-
-public interface SegmentPushUDF {
-
- void emitCustomEvents(Properties properties);
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/segment/creation/SegmentCreationPhaseConstants.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/segment/creation/SegmentCreationPhaseConstants.java
deleted file mode 100644
index fe3d171..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/segment/creation/SegmentCreationPhaseConstants.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.segment.creation;
-
-/**
- * Properties to be set for the segment creation phase
- */
-public enum SegmentCreationPhaseConstants {
-
- SEGMENT_CREATION_INPUT_PATH("segment.creation.input.path"),
- SEGMENT_CREATION_OUTPUT_PATH("segment.creation.output.path"),
- SEGMENT_CREATION_THIRDEYE_CONFIG("segment.creation.thirdeye.config"),
- SEGMENT_CREATION_WALLCLOCK_START_TIME("segment.creation.wallclock.start.time"),
- SEGMENT_CREATION_WALLCLOCK_END_TIME("segment.creation.wallclock.end.time"),
- SEGMENT_CREATION_SCHEDULE("segment.creation.schedule"),
- SEGMENT_CREATION_BACKFILL("segment.creation.backfill");
-
- String name;
-
- SegmentCreationPhaseConstants(String name) {
- this.name = name;
- }
-
- public String toString() {
- return name;
- }
-
-}
\ No newline at end of file
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/segment/creation/SegmentCreationPhaseJob.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/segment/creation/SegmentCreationPhaseJob.java
deleted file mode 100644
index 2446e7f..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/segment/creation/SegmentCreationPhaseJob.java
+++ /dev/null
@@ -1,223 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.segment.creation;
-
-import static com.linkedin.thirdeye.hadoop.segment.creation.SegmentCreationPhaseConstants.SEGMENT_CREATION_INPUT_PATH;
-import static com.linkedin.thirdeye.hadoop.segment.creation.SegmentCreationPhaseConstants.SEGMENT_CREATION_OUTPUT_PATH;
-import static com.linkedin.thirdeye.hadoop.segment.creation.SegmentCreationPhaseConstants.SEGMENT_CREATION_SCHEDULE;
-import static com.linkedin.thirdeye.hadoop.segment.creation.SegmentCreationPhaseConstants.SEGMENT_CREATION_THIRDEYE_CONFIG;
-import static com.linkedin.thirdeye.hadoop.segment.creation.SegmentCreationPhaseConstants.SEGMENT_CREATION_WALLCLOCK_END_TIME;
-import static com.linkedin.thirdeye.hadoop.segment.creation.SegmentCreationPhaseConstants.SEGMENT_CREATION_WALLCLOCK_START_TIME;
-import static com.linkedin.thirdeye.hadoop.segment.creation.SegmentCreationPhaseConstants.SEGMENT_CREATION_BACKFILL;
-
-import java.io.FileInputStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Properties;
-
-import org.apache.avro.Schema;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.JobContext;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfig;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfigProperties;
-import com.linkedin.thirdeye.hadoop.util.ThirdeyeAvroUtils;
-
-/**
- * This class contains the job that generates pinot segments with star tree index
- */
-public class SegmentCreationPhaseJob extends Configured {
-
- private static final String TEMP = "temp";
- private static final String DEFAULT_BACKFILL = "false";
-
- private static final Logger LOGGER = LoggerFactory.getLogger(SegmentCreationPhaseJob.class);
- private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
- private final String name;
- private final Properties props;
-
-
- public SegmentCreationPhaseJob(String jobName, Properties properties) throws Exception {
- super(new Configuration());
- getConf().set("mapreduce.job.user.classpath.first", "true");
- name = jobName;
- props = properties;
-
- }
-
- public Job run() throws Exception {
-
- Job job = Job.getInstance(getConf());
-
- job.setJarByClass(SegmentCreationPhaseJob.class);
- job.setJobName(name);
-
- FileSystem fs = FileSystem.get(getConf());
-
- Configuration configuration = job.getConfiguration();
-
- String inputSegmentDir = getAndSetConfiguration(configuration, SEGMENT_CREATION_INPUT_PATH);
- LOGGER.info("Input path : {}", inputSegmentDir);
- Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputSegmentDir);
- LOGGER.info("Schema : {}", avroSchema);
- String dimensionTypesProperty = ThirdeyeAvroUtils.getDimensionTypesProperty(
- props.getProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString()), avroSchema);
- props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_TYPES.toString(), dimensionTypesProperty);
- String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty(
- props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()),
- props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema);
- props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty);
- ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
- LOGGER.info("ThirdEyeConfig {}", thirdeyeConfig.encode());
- String outputDir = getAndSetConfiguration(configuration, SEGMENT_CREATION_OUTPUT_PATH);
- LOGGER.info("Output path : {}", outputDir);
- Path stagingDir = new Path(outputDir, TEMP);
- LOGGER.info("Staging dir : {}", stagingDir);
- String segmentWallClockStart = getAndSetConfiguration(configuration, SEGMENT_CREATION_WALLCLOCK_START_TIME);
- LOGGER.info("Segment wallclock start time : {}", segmentWallClockStart);
- String segmentWallClockEnd = getAndSetConfiguration(configuration, SEGMENT_CREATION_WALLCLOCK_END_TIME);
- LOGGER.info("Segment wallclock end time : {}", segmentWallClockEnd);
- String schedule = getAndSetConfiguration(configuration, SEGMENT_CREATION_SCHEDULE);
- LOGGER.info("Segment schedule : {}", schedule);
- String isBackfill = props.getProperty(SEGMENT_CREATION_BACKFILL.toString(), DEFAULT_BACKFILL);
- configuration.set(SEGMENT_CREATION_BACKFILL.toString(), isBackfill);
- LOGGER.info("Is Backfill : {}", configuration.get(SEGMENT_CREATION_BACKFILL.toString()));
-
- // Create temporary directory
- if (fs.exists(stagingDir)) {
- LOGGER.warn("Found the temp folder, deleting it");
- fs.delete(stagingDir, true);
- }
- fs.mkdirs(stagingDir);
- fs.mkdirs(new Path(stagingDir + "/input/"));
-
- // Create output directory
- if (fs.exists(new Path(outputDir))) {
- LOGGER.warn("Found the output folder deleting it");
- fs.delete(new Path(outputDir), true);
- }
- fs.mkdirs(new Path(outputDir));
-
- // Read input files
- List<FileStatus> inputDataFiles = new ArrayList<>();
- for (String input : inputSegmentDir.split(",")) {
- Path inputPathPattern = new Path(input);
- inputDataFiles.addAll(Arrays.asList(fs.listStatus(inputPathPattern)));
- }
- LOGGER.info("size {}", inputDataFiles.size());
-
- try {
- for (int seqId = 0; seqId < inputDataFiles.size(); ++seqId) {
- FileStatus file = inputDataFiles.get(seqId);
- String completeFilePath = " " + file.getPath().toString() + " " + seqId;
- Path newOutPutFile = new Path((stagingDir + "/input/" + file.getPath().toString().replace('.', '_').replace('/', '_').replace(':', '_') + ".txt"));
- FSDataOutputStream stream = fs.create(newOutPutFile);
- LOGGER.info("wrote {}", completeFilePath);
- stream.writeUTF(completeFilePath);
- stream.flush();
- stream.close();
- }
- } catch (Exception e) {
- LOGGER.error("Exception while reading input files ", e);
- }
-
- job.setMapperClass(SegmentCreationPhaseMapReduceJob.SegmentCreationMapper.class);
-
- if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
- job.getConfiguration().set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
- }
-
- job.setInputFormatClass(TextInputFormat.class);
- job.setOutputFormatClass(TextOutputFormat.class);
-
- job.setMapOutputKeyClass(LongWritable.class);
- job.setMapOutputValueClass(Text.class);
-
- FileInputFormat.addInputPath(job, new Path(stagingDir + "/input/"));
- FileOutputFormat.setOutputPath(job, new Path(stagingDir + "/output/"));
-
- job.getConfiguration().setInt(JobContext.NUM_MAPS, inputDataFiles.size());
- job.getConfiguration().set(SEGMENT_CREATION_THIRDEYE_CONFIG.toString(), OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));
-
- job.setMaxReduceAttempts(1);
- job.setMaxMapAttempts(0);
- job.setNumReduceTasks(0);
- for (Object key : props.keySet()) {
- job.getConfiguration().set(key.toString(), props.getProperty(key.toString()));
- }
-
- job.waitForCompletion(true);
- if (!job.isSuccessful()) {
- throw new RuntimeException("Job failed : " + job);
- }
-
- LOGGER.info("Moving Segment Tar files from {} to: {}", stagingDir + "/output/segmentTar", outputDir);
- FileStatus[] segmentArr = fs.listStatus(new Path(stagingDir + "/output/segmentTar"));
- for (FileStatus segment : segmentArr) {
- fs.rename(segment.getPath(), new Path(outputDir, segment.getPath().getName()));
- }
-
- // Delete temporary directory.
- LOGGER.info("Cleanup the working directory.");
- LOGGER.info("Deleting the dir: {}", stagingDir);
- fs.delete(stagingDir, true);
-
- return job;
- }
-
- private String getAndSetConfiguration(Configuration configuration,
- SegmentCreationPhaseConstants constant) {
- String value = getAndCheck(constant.toString());
- configuration.set(constant.toString(), value);
- return value;
- }
-
- private String getAndCheck(String propName) {
- String propValue = props.getProperty(propName);
- if (propValue == null) {
- throw new IllegalArgumentException(propName + " required property");
- }
- return propValue;
- }
-
- public static void main(String[] args) throws Exception {
- if (args.length != 1) {
- throw new IllegalArgumentException("usage: config.properties");
- }
-
- Properties props = new Properties();
- props.load(new FileInputStream(args[0]));
- SegmentCreationPhaseJob job = new SegmentCreationPhaseJob("segment_creation_job", props);
- job.run();
- }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/segment/creation/SegmentCreationPhaseMapReduceJob.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/segment/creation/SegmentCreationPhaseMapReduceJob.java
deleted file mode 100644
index c9e8f85..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/segment/creation/SegmentCreationPhaseMapReduceJob.java
+++ /dev/null
@@ -1,320 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.segment.creation;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
-import com.google.common.base.Joiner;
-import com.linkedin.pinot.common.data.FieldSpec;
-import com.linkedin.pinot.common.data.Schema;
-import com.linkedin.pinot.common.data.StarTreeIndexSpec;
-import com.linkedin.pinot.common.data.TimeGranularitySpec.TimeFormat;
-import com.linkedin.pinot.common.utils.TarGzCompressionUtils;
-import com.linkedin.pinot.core.data.readers.FileFormat;
-import com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig;
-import com.linkedin.pinot.core.segment.creator.StatsCollectorConfig;
-import com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl;
-import com.linkedin.pinot.core.segment.creator.impl.stats.LongColumnPreIndexStatsCollector;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfig;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConstants;
-import com.linkedin.thirdeye.hadoop.util.ThirdeyePinotSchemaUtils;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-import java.util.concurrent.TimeUnit;
-import org.apache.avro.file.DataFileStream;
-import org.apache.avro.generic.GenericDatumReader;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.commons.io.FileUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.joda.time.DateTime;
-import org.joda.time.format.DateTimeFormat;
-import org.joda.time.format.DateTimeFormatter;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import static com.linkedin.pinot.core.segment.creator.impl.V1Constants.MetadataKeys.Segment.*;
-import static com.linkedin.thirdeye.hadoop.segment.creation.SegmentCreationPhaseConstants.*;
-
-/**
- * Mapper class for SegmentCreation job, which sets configs required for
- * segment generation with star tree index
- */
-public class SegmentCreationPhaseMapReduceJob {
-
- public static class SegmentCreationMapper extends Mapper<LongWritable, Text, LongWritable, Text> {
- private static final Logger LOGGER = LoggerFactory.getLogger(SegmentCreationPhaseMapReduceJob.class);
- private static ObjectMapper OBJECT_MAPPER = new ObjectMapper(new YAMLFactory());
-
- private Configuration properties;
-
- private String inputFilePath;
- private String outputPath;
- private String tableName;
-
- private Path currentHdfsWorkDir;
- private String currentDiskWorkDir;
-
- // Temporary HDFS path for local machine
- private String localHdfsSegmentTarPath;
-
- private String localDiskSegmentDirectory;
- private String localDiskSegmentTarPath;
-
- private ThirdEyeConfig thirdeyeConfig;
- private Schema schema;
-
- private Long segmentWallClockStartTime;
- private Long segmentWallClockEndTime;
- private String segmentSchedule;
- private boolean isBackfill;
-
- @Override
- public void setup(Context context) throws IOException, InterruptedException {
-
- currentHdfsWorkDir = FileOutputFormat.getWorkOutputPath(context);
- currentDiskWorkDir = "pinot_hadoop_tmp";
-
- // Temporary HDFS path for local machine
- localHdfsSegmentTarPath = currentHdfsWorkDir + "/segmentTar";
-
- // Temporary DISK path for local machine
- localDiskSegmentDirectory = currentDiskWorkDir + "/segments/";
- localDiskSegmentTarPath = currentDiskWorkDir + "/segmentsTar/";
- new File(localDiskSegmentTarPath).mkdirs();
-
- LOGGER.info("*********************************************************************");
- LOGGER.info("Configurations : {}", context.getConfiguration().toString());
- LOGGER.info("*********************************************************************");
- LOGGER.info("Current HDFS working dir : {}", currentHdfsWorkDir);
- LOGGER.info("Current DISK working dir : {}", new File(currentDiskWorkDir).getAbsolutePath());
- LOGGER.info("*********************************************************************");
- properties = context.getConfiguration();
-
- outputPath = properties.get(SEGMENT_CREATION_OUTPUT_PATH.toString());
-
- thirdeyeConfig = OBJECT_MAPPER.readValue(properties.get(SEGMENT_CREATION_THIRDEYE_CONFIG.toString()), ThirdEyeConfig.class);
- LOGGER.info(thirdeyeConfig.encode());
- schema = ThirdeyePinotSchemaUtils.createSchema(thirdeyeConfig);
- tableName = thirdeyeConfig.getCollection();
-
- segmentWallClockStartTime = Long.valueOf(properties.get(SEGMENT_CREATION_WALLCLOCK_START_TIME.toString()));
- segmentWallClockEndTime = Long.valueOf(properties.get(SEGMENT_CREATION_WALLCLOCK_END_TIME.toString()));
- segmentSchedule = properties.get(SEGMENT_CREATION_SCHEDULE.toString());
- isBackfill = Boolean.valueOf(properties.get(SEGMENT_CREATION_BACKFILL.toString()));
- }
-
- @Override
- public void cleanup(Context context) throws IOException, InterruptedException {
- FileUtils.deleteQuietly(new File(currentDiskWorkDir));
- }
-
- @Override
- protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
-
- String line = value.toString();
- String[] lineSplits = line.split(" ");
-
- LOGGER.info("*********************************************************************");
- LOGGER.info("mapper input : {}", value);
- LOGGER.info("Path to output : {}", outputPath);
- LOGGER.info("Table name : {}", tableName);
- LOGGER.info("num lines : {}", lineSplits.length);
-
- for (String split : lineSplits) {
- LOGGER.info("Command line : {}", split);
- }
- LOGGER.info("*********************************************************************");
-
- if (lineSplits.length != 3) {
- throw new RuntimeException("Input to the mapper is malformed, please contact the pinot team");
- }
- inputFilePath = lineSplits[1].trim();
-
- LOGGER.info("*********************************************************************");
- LOGGER.info("input data file path : {}", inputFilePath);
- LOGGER.info("local hdfs segment tar path: {}", localHdfsSegmentTarPath);
- LOGGER.info("local disk segment path: {}", localDiskSegmentDirectory);
- LOGGER.info("*********************************************************************");
-
- try {
- createSegment(inputFilePath, schema, lineSplits[2]);
- LOGGER.info("finished segment creation job successfully");
- } catch (Exception e) {
- LOGGER.error("Got exceptions during creating segments!", e);
- }
-
- context.write(new LongWritable(Long.parseLong(lineSplits[2])),
- new Text(FileSystem.get(new Configuration()).listStatus(new Path(localHdfsSegmentTarPath + "/"))[0].getPath().getName()));
- LOGGER.info("finished the job successfully");
- }
-
- private String createSegment(String dataFilePath, Schema schema, String seqId) throws Exception {
- final FileSystem fs = FileSystem.get(new Configuration());
- final Path hdfsDataPath = new Path(dataFilePath);
- final File dataPath = new File(currentDiskWorkDir, "data");
- if (dataPath.exists()) {
- dataPath.delete();
- }
- dataPath.mkdir();
- final Path localFilePath = new Path(dataPath + "/" + hdfsDataPath.getName());
- fs.copyToLocalFile(hdfsDataPath, localFilePath);
-
- LOGGER.info("Data schema is : {}", schema);
-
- // Set segment generator config
- LOGGER.info("*********************************************************************");
- SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(schema);
- segmentGeneratorConfig.setTableName(tableName);
- segmentGeneratorConfig.setInputFilePath(new File(dataPath, hdfsDataPath.getName()).getAbsolutePath());
- LOGGER.info("Setting input path {}", segmentGeneratorConfig.getInputFilePath());
- segmentGeneratorConfig.setFormat(FileFormat.AVRO);
- segmentGeneratorConfig.setSegmentNamePostfix(seqId);
- segmentGeneratorConfig.setOutDir(localDiskSegmentDirectory);
- LOGGER.info("Setting enableStarTreeIndex");
- String minTime = ThirdEyeConstants.DATE_TIME_FORMATTER.print(segmentWallClockStartTime);
- String maxTime = ThirdEyeConstants.DATE_TIME_FORMATTER.print(segmentWallClockEndTime);
- LOGGER.info("Wall clock time : min {} max {}", minTime, maxTime);
- LOGGER.info("isBackfill : {}", isBackfill);
- if (isBackfill) {
- // if case of backfill, we have to ensure that segment name is same as original segment name
- // we are retaining the segment name through the backfill and derived_column_transformation phases
- // in the output files generated
- // backfill will generated original_segment_name.avro
- // derived_column_transformation will generate original_segment_name-m-00000.avro etc
- String segmentName = hdfsDataPath.getName().split("-(m|r)-[0-9]{5}")[0];
- segmentName = segmentName.split(ThirdEyeConstants.AVRO_SUFFIX)[0];
- segmentGeneratorConfig.setSegmentName(segmentName);
- } else {
- String segmentName =
- Joiner.on(ThirdEyeConstants.SEGMENT_JOINER).join(tableName, segmentSchedule, minTime, maxTime, seqId);
- segmentGeneratorConfig.setSegmentName(segmentName);
- }
- LOGGER.info("Setting segment name {}", segmentGeneratorConfig.getSegmentName());
-
-
- // Set star tree config
- StarTreeIndexSpec starTreeIndexSpec = new StarTreeIndexSpec();
-
- // _raw dimensions should not be in star tree split order
- // if a dimension has a _topk column, we will include only
- // the column with topk, and skip _raw column for materialization in star tree
- Set<String> skipMaterializationForDimensions = new HashSet<>();
- Set<String> transformDimensionsSet = thirdeyeConfig.getTransformDimensions();
- LOGGER.info("Dimensions with _topk column {}", transformDimensionsSet);
- for (String topkTransformDimension : transformDimensionsSet) {
- skipMaterializationForDimensions.add(topkTransformDimension);
- LOGGER.info("Adding {} to skipMaterialization set", topkTransformDimension);
- }
- starTreeIndexSpec.setSkipMaterializationForDimensions(skipMaterializationForDimensions);
- LOGGER.info("Setting skipMaterializationForDimensions {}", skipMaterializationForDimensions);
-
- if (thirdeyeConfig.getSplit() != null) {
- starTreeIndexSpec.setMaxLeafRecords(thirdeyeConfig.getSplit().getThreshold());
- LOGGER.info("Setting split threshold to {}", starTreeIndexSpec.getMaxLeafRecords());
- List<String> splitOrder = thirdeyeConfig.getSplit().getOrder();
- if (splitOrder != null) {
- LOGGER.info("Removing from splitOrder, any dimensions which are also in skipMaterializationForDimensions");
- splitOrder.removeAll(skipMaterializationForDimensions);
- starTreeIndexSpec.setDimensionsSplitOrder(splitOrder);
- }
- LOGGER.info("Setting splitOrder {}", splitOrder);
- }
- segmentGeneratorConfig.enableStarTreeIndex(starTreeIndexSpec);
- LOGGER.info("*********************************************************************");
-
- // Set time for SIMPLE_DATE_FORMAT case
- String sdfPrefix = TimeFormat.SIMPLE_DATE_FORMAT.toString() + ThirdEyeConstants.SDF_SEPARATOR;
- if (thirdeyeConfig.getTime().getTimeFormat().startsWith(sdfPrefix)) {
-
- String pattern = thirdeyeConfig.getTime().getTimeFormat().split(ThirdEyeConstants.SDF_SEPARATOR)[1];
- DateTimeFormatter sdfFormatter = DateTimeFormat.forPattern(pattern);
-
- File localAvroFile = new File(dataPath, hdfsDataPath.getName());
- LongColumnPreIndexStatsCollector timeColumnStatisticsCollector =
- getTimeColumnStatsCollector(schema, localAvroFile);
- String startTime = timeColumnStatisticsCollector.getMinValue().toString();
- String endTime = timeColumnStatisticsCollector.getMaxValue().toString();
- startTime = String.valueOf(DateTime.parse(startTime, sdfFormatter).getMillis());
- endTime = String.valueOf(DateTime.parse(endTime, sdfFormatter).getMillis());
-
- // set start time
- segmentGeneratorConfig.getCustomProperties().put(SEGMENT_START_TIME, startTime);
- // set end time
- segmentGeneratorConfig.getCustomProperties().put(SEGMENT_END_TIME, endTime);
- // set time unit
- segmentGeneratorConfig.setSegmentTimeUnit(TimeUnit.MILLISECONDS);
- }
-
- // Generate segment
- SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
- driver.init(segmentGeneratorConfig);
- driver.build();
-
- // Tar the segment directory into file.
- String segmentName = null;
- File localDiskSegmentDirectoryFile = new File(localDiskSegmentDirectory);
- for (File file : localDiskSegmentDirectoryFile.listFiles()) {
- segmentName = file.getName();
- if (segmentName.startsWith(tableName)) {
- break;
- }
- }
- String localSegmentPath = new File(localDiskSegmentDirectory, segmentName).getAbsolutePath();
-
- String localTarPath = localDiskSegmentTarPath + "/" + segmentName + ".tar.gz";
- LOGGER.info("Trying to tar the segment to: {}", localTarPath);
- TarGzCompressionUtils.createTarGzOfDirectory(localSegmentPath, localTarPath);
- String hdfsTarPath = localHdfsSegmentTarPath + "/" + segmentName + ".tar.gz";
-
- LOGGER.info("*********************************************************************");
- LOGGER.info("Copy from : {} to {}", localTarPath, hdfsTarPath);
- LOGGER.info("*********************************************************************");
- fs.copyFromLocalFile(true, true, new Path(localTarPath), new Path(hdfsTarPath));
- return segmentName;
- }
-
- private LongColumnPreIndexStatsCollector getTimeColumnStatsCollector(Schema schema, File localAvroFile)
- throws FileNotFoundException, IOException {
- String timeColumnName = schema.getTimeColumnName();
- FieldSpec spec = schema.getTimeFieldSpec();
- LOGGER.info("Spec for " + timeColumnName + " is " + spec);
- LongColumnPreIndexStatsCollector timeColumnStatisticsCollector = new LongColumnPreIndexStatsCollector(spec.getName(), new StatsCollectorConfig(schema, null));
- LOGGER.info("StatsCollector :" + timeColumnStatisticsCollector);
- DataFileStream<GenericRecord> dataStream =
- new DataFileStream<GenericRecord>(new FileInputStream(localAvroFile), new GenericDatumReader<GenericRecord>());
- while (dataStream.hasNext()) {
- GenericRecord next = dataStream.next();
- timeColumnStatisticsCollector.collect(next.get(timeColumnName));
- }
- dataStream.close();
- timeColumnStatisticsCollector.seal();
-
- return timeColumnStatisticsCollector;
- }
-
- }
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/DimensionValueMetricPair.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/DimensionValueMetricPair.java
deleted file mode 100644
index 434b71a..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/DimensionValueMetricPair.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.topk;
-
-/**
- * Class to manage dimension value and metric values pairs
- * The order of elements is determined based on the metric value -
- * Lesser metric value is treated as greater in ordering,
- * so that it gets removed from the fixed size PriorityQueue first
- */
-public class DimensionValueMetricPair implements Comparable<DimensionValueMetricPair>{
-
- private Object dimensionValue;
- private Number metricValue;
-
- public DimensionValueMetricPair(Object dimensionValue, Number metricValue) {
- this.dimensionValue = dimensionValue;
- this.metricValue = metricValue;
- }
-
- public Object getDimensionValue() {
- return dimensionValue;
- }
- public void setDimensionValue(Object dimensionValue) {
- this.dimensionValue = dimensionValue;
- }
- public Number getMetricValue() {
- return metricValue;
- }
- public void setMetricValue(Number metricValue) {
- this.metricValue = metricValue;
- }
-
-
- @Override
- public int compareTo(DimensionValueMetricPair other) {
- return other.metricValue.intValue() - this.metricValue.intValue();
- }
-
- @Override
- public String toString() {
- return "[" + dimensionValue + "=" + metricValue + "]";
- }
-
-
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKDimensionValues.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKDimensionValues.java
deleted file mode 100644
index 8286d70..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKDimensionValues.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.topk;
-
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
-
-/**
- * Class to create and manage top k values for every dimension
- */
-public class TopKDimensionValues {
- private Map<String, Set<String>> topKDimensions;
-
- public TopKDimensionValues() {
- topKDimensions = new HashMap<>();
- }
-
- public Map<String, Set<String>> getTopKDimensions() {
- return topKDimensions;
- }
-
- public void setTopKDimensions(Map<String, Set<String>> topKDimensions) {
- this.topKDimensions = topKDimensions;
- }
-
- /**
- * Add a top k value for a dimension
- * @param dimension
- * @param value
- */
- public void addValue(String dimension, String value) {
- if (topKDimensions.get(dimension) == null) {
- topKDimensions.put(dimension, new HashSet<String>());
- }
- topKDimensions.get(dimension).add(value);
- }
-
- public void addAllValues(String dimension, Set<String> values) {
- if (topKDimensions.get(dimension) == null) {
- topKDimensions.put(dimension, new HashSet<String>());
- }
- topKDimensions.get(dimension).addAll(values);
- }
-
- /**
- * Add all top k values for all dimensions from a TopKDimensionValues object
- * @param valuesFile
- */
- public void addMap(TopKDimensionValues valuesFile) {
- Map<String, Set<String>> values = valuesFile.getTopKDimensions();
- for (Entry<String, Set<String>> entry : values.entrySet()) {
- if (topKDimensions.get(entry.getKey()) == null) {
- topKDimensions.put(entry.getKey(), new HashSet<String>());
- }
- topKDimensions.get(entry.getKey()).addAll(entry.getValue());
- }
- }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKPhaseConfig.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKPhaseConfig.java
deleted file mode 100644
index 64a2c25..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKPhaseConfig.java
+++ /dev/null
@@ -1,159 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.topk;
-
-import com.linkedin.thirdeye.hadoop.config.DimensionSpec;
-import com.linkedin.thirdeye.hadoop.config.DimensionType;
-import com.linkedin.thirdeye.hadoop.config.MetricSpec;
-import com.linkedin.thirdeye.hadoop.config.MetricType;
-import com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec;
-import com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfig;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-/**
- * This class contains the config needed by TopKPhase
- * and the methods to obtain the config from the ThirdEyeConfig
- */
-public class TopKPhaseConfig {
- private List<String> dimensionNames;
- private List<DimensionType> dimensionTypes;
- private List<String> metricNames;
- private List<MetricType> metricTypes;
- private Map<String, Double> metricThresholds;
- private Map<String, TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec;
- private Map<String, List<String>> whitelist;
-
- private static final double DEFAULT_METRIC_THRESHOLD = 0.01;
-
- public TopKPhaseConfig() {
-
- }
-
- /**
- * @param dimensionNames
- * @param dimensionTypes
- * @param metricNames
- * @param metricTypes
- * @param metricThresholds
- * @param whitelist
- */
- public TopKPhaseConfig(List<String> dimensionNames, List<DimensionType> dimensionTypes,
- List<String> metricNames, List<MetricType> metricTypes,
- Map<String, Double> metricThresholds, Map<String, TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec,
- Map<String, List<String>> whitelist) {
- super();
- this.dimensionNames = dimensionNames;
- this.dimensionTypes = dimensionTypes;
- this.metricNames = metricNames;
- this.metricTypes = metricTypes;
- this.metricThresholds = metricThresholds;
- this.topKDimensionToMetricsSpec = topKDimensionToMetricsSpec;
- this.whitelist = whitelist;
- }
-
- public List<String> getDimensionNames() {
- return dimensionNames;
- }
-
- public List<DimensionType> getDimensionTypes() {
- return dimensionTypes;
- }
-
- public List<String> getMetricNames() {
- return metricNames;
- }
-
- public List<MetricType> getMetricTypes() {
- return metricTypes;
- }
-
- public Map<String, Double> getMetricThresholds() {
- return metricThresholds;
- }
-
- public Map<String, TopKDimensionToMetricsSpec> getTopKDimensionToMetricsSpec() {
- return topKDimensionToMetricsSpec;
- }
-
- public Map<String, List<String>> getWhitelist() {
- return whitelist;
- }
-
- /**
- * This method generates necessary top k config for TopKPhase job from
- * ThirdEye config
- * @param config
- * @return
- */
- public static TopKPhaseConfig fromThirdEyeConfig(ThirdEyeConfig config) {
-
- //metrics
- List<String> metricNames = new ArrayList<>(config.getMetrics().size());
- List<MetricType> metricTypes = new ArrayList<>(config.getMetrics().size());
- for (MetricSpec spec : config.getMetrics()) {
- metricNames.add(spec.getName());
- metricTypes.add(spec.getType());
- }
-
- // dimensions
- List<String> dimensionNames = new ArrayList<>(config.getDimensions().size());
- List<DimensionType> dimensionTypes = new ArrayList<>(config.getDimensions().size());
- for (DimensionSpec spec : config.getDimensions()) {
- dimensionNames.add(spec.getName());
- dimensionTypes.add(spec.getDimensionType());
- }
-
- TopkWhitelistSpec topKWhitelist = config.getTopKWhitelist();
- Map<String, Double> metricThresholds = new HashMap<>();
- Map<String, TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec = new HashMap<>();
- Map<String, List<String>> whitelist = new HashMap<>();
-
- // topk
- if (topKWhitelist != null) {
- // metric thresholds
- if (topKWhitelist.getThreshold() != null) {
- metricThresholds = topKWhitelist.getThreshold();
- }
- for (String metric : metricNames) {
- if (metricThresholds.get(metric) == null) {
- metricThresholds.put(metric, DEFAULT_METRIC_THRESHOLD);
- }
- }
-
- // topk
- if (topKWhitelist.getTopKDimensionToMetricsSpec() != null) {
- for (TopKDimensionToMetricsSpec topkSpec : topKWhitelist.getTopKDimensionToMetricsSpec()) {
- topKDimensionToMetricsSpec.put(topkSpec.getDimensionName(), topkSpec);
- }
- }
-
- // whitelist
- if (topKWhitelist.getWhitelist() != null) {
- whitelist.putAll(topKWhitelist.getWhitelist());
- }
- }
-
- return new TopKPhaseConfig(dimensionNames, dimensionTypes, metricNames, metricTypes, metricThresholds,
- topKDimensionToMetricsSpec, whitelist);
- }
-
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKPhaseConstants.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKPhaseConstants.java
deleted file mode 100644
index 9c94ba0..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKPhaseConstants.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.topk;
-
-/**
- * This class contains the properties to be set for topk phase
- */
-public enum TopKPhaseConstants {
- TOPK_PHASE_INPUT_PATH("topk.phase.input.path"),
- TOPK_PHASE_OUTPUT_PATH("topk.phase.output.path"),
- TOPK_PHASE_THIRDEYE_CONFIG("topk.rollup.phase.thirdeye.config");
-
- String name;
-
- TopKPhaseConstants(String name) {
- this.name = name;
- }
-
- public String toString() {
- return name;
- }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKPhaseJob.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKPhaseJob.java
deleted file mode 100644
index ba88d31..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKPhaseJob.java
+++ /dev/null
@@ -1,463 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.topk;
-
-import static com.linkedin.thirdeye.hadoop.topk.TopKPhaseConstants.TOPK_PHASE_INPUT_PATH;
-import static com.linkedin.thirdeye.hadoop.topk.TopKPhaseConstants.TOPK_PHASE_OUTPUT_PATH;
-import static com.linkedin.thirdeye.hadoop.topk.TopKPhaseConstants.TOPK_PHASE_THIRDEYE_CONFIG;
-
-import java.io.DataOutput;
-import java.io.File;
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Properties;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.mapred.AvroKey;
-import org.apache.avro.mapreduce.AvroKeyInputFormat;
-import org.apache.commons.collections.MapUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.google.common.collect.MinMaxPriorityQueue;
-import com.linkedin.thirdeye.hadoop.config.DimensionType;
-import com.linkedin.thirdeye.hadoop.config.MetricType;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfig;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfigProperties;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConstants;
-import com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec;
-import com.linkedin.thirdeye.hadoop.util.ThirdeyeAggregateMetricUtils;
-import com.linkedin.thirdeye.hadoop.util.ThirdeyeAvroUtils;
-
-/**
- * This phase reads avro input, and produces a file with top k values for dimensions
- *
- * Map:
- * Map phase reads avro records, and for each record emits
- * Key=(Dimension name, Dimension Value) Value=(Metrics)
- * For each record, map also emits a
- * Key=(ALL, ALL) Value=(Metrics)
- * This is used for computing the metric sums in the reduce phase
- *
- * Combine:
- * Combine phase receives Key=(DimensionName, DimensionValue)
- * from each map, and aggregates the metric values. This phase
- * helps in reducing the traffic sent to reducer
- *
- * Reduce:
- * We strictly use just 1 reducer.
- * Reduce phase receives Key=(DimensionName, DimensionValue)
- * and aggregates the metric values
- * The very first key received is (ALL, ALL) with helps us compute total metric sum
- * These metric sums are used to check metric thresholds of other
- * (dimensionName, dimensionValue) pairs. If none of the metric
- * thresholds pass, the pair is discarded.
- * In the cleanup, top k dimension values are picked for each dimension
- * based on the metric value
- * The top k dimension values for each dimension are written to a file
- *
- */
-public class TopKPhaseJob extends Configured {
- private static final Logger LOGGER = LoggerFactory.getLogger(TopKPhaseJob.class);
-
- private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
- private static final String TOPK_ALL_DIMENSION_NAME = "0";
- private static final String TOPK_ALL_DIMENSION_VALUE = "0";
-
- private String name;
- private Properties props;
-
- /**
- * @param name
- * @param props
- */
- public TopKPhaseJob(String name, Properties props) {
- super(new Configuration());
- this.name = name;
- this.props = props;
- }
-
- public static class TopKPhaseMapper
- extends Mapper<AvroKey<GenericRecord>, NullWritable, BytesWritable, BytesWritable> {
-
- private TopKPhaseConfig config;
- ThirdEyeConfig thirdeyeConfig;
- private List<String> dimensionNames;
- private List<DimensionType> dimensionTypes;
- private List<String> metricNames;
- private List<MetricType> metricTypes;
- private int numMetrics;
- BytesWritable keyWritable;
- BytesWritable valWritable;
-
- @Override
- public void setup(Context context) throws IOException, InterruptedException {
- LOGGER.info("TopKPhaseJob.TopKPhaseMapper.setup()");
- Configuration configuration = context.getConfiguration();
- try {
- thirdeyeConfig = OBJECT_MAPPER.readValue(configuration.get(TOPK_PHASE_THIRDEYE_CONFIG.toString()), ThirdEyeConfig.class);
- config = TopKPhaseConfig.fromThirdEyeConfig(thirdeyeConfig);
- dimensionNames = config.getDimensionNames();
- dimensionTypes = config.getDimensionTypes();
- metricNames = config.getMetricNames();
- metricTypes = config.getMetricTypes();
- numMetrics = metricNames.size();
- valWritable = new BytesWritable();
- keyWritable = new BytesWritable();
- } catch (Exception e) {
- throw new IOException(e);
- }
- }
-
-
- @Override
- public void map(AvroKey<GenericRecord> key, NullWritable value, Context context)
- throws IOException, InterruptedException {
-
- // input record
- GenericRecord inputRecord = key.datum();
-
- // read metrics
- Number[] metricValues = new Number[numMetrics];
- for (int i = 0; i < numMetrics; i++) {
- String metricName = metricNames.get(i);
- Number metricValue = ThirdeyeAvroUtils.getMetricFromRecord(inputRecord, metricName);
- metricValues[i] = metricValue;
- }
- TopKPhaseMapOutputValue valWrapper = new TopKPhaseMapOutputValue(metricValues, metricTypes);
- byte[] valBytes = valWrapper.toBytes();
- valWritable.set(valBytes, 0, valBytes.length);
-
- // read dimensions
- for (int i = 0; i < dimensionNames.size(); i++) {
- String dimensionName = dimensionNames.get(i);
- DimensionType dimensionType = dimensionTypes.get(i);
- Object dimensionValue = ThirdeyeAvroUtils.getDimensionFromRecord(inputRecord, dimensionName);
-
- TopKPhaseMapOutputKey keyWrapper = new TopKPhaseMapOutputKey(dimensionName, dimensionValue, dimensionType);
- byte[] keyBytes = keyWrapper.toBytes();
- keyWritable.set(keyBytes, 0, keyBytes.length);
- context.write(keyWritable, valWritable);
- }
- TopKPhaseMapOutputKey allKeyWrapper = new TopKPhaseMapOutputKey(TOPK_ALL_DIMENSION_NAME, TOPK_ALL_DIMENSION_VALUE, DimensionType.STRING);
- byte[] allKeyBytes = allKeyWrapper.toBytes();
- keyWritable.set(allKeyBytes, 0, allKeyBytes.length);
- context.write(keyWritable, valWritable);
- }
-
- @Override
- public void cleanup(Context context) throws IOException, InterruptedException {
-
- }
- }
-
- public static class TopKPhaseCombiner
- extends Reducer<BytesWritable, BytesWritable, BytesWritable, BytesWritable> {
-
- private TopKPhaseConfig config;
- ThirdEyeConfig thirdeyeConfig;
- private List<MetricType> metricTypes;
- private int numMetrics;
- BytesWritable keyWritable;
- BytesWritable valWritable;
-
- @Override
- public void setup(Context context) throws IOException, InterruptedException {
- LOGGER.info("TopKPhaseJob.TopKPhaseCombiner.setup()");
- Configuration configuration = context.getConfiguration();
- try {
- thirdeyeConfig = OBJECT_MAPPER.readValue(configuration.get(TOPK_PHASE_THIRDEYE_CONFIG.toString()), ThirdEyeConfig.class);
- config = TopKPhaseConfig.fromThirdEyeConfig(thirdeyeConfig);
- metricTypes = config.getMetricTypes();
- numMetrics = metricTypes.size();
- valWritable = new BytesWritable();
- keyWritable = new BytesWritable();
-
- } catch (Exception e) {
- throw new IOException(e);
- }
- }
-
- @Override
- public void reduce(BytesWritable key, Iterable<BytesWritable> values, Context context)
- throws IOException, InterruptedException {
-
- Number[] aggMetricValues = new Number[numMetrics];
- Arrays.fill(aggMetricValues, 0);
-
- for (BytesWritable value : values) {
- TopKPhaseMapOutputValue valWrapper = TopKPhaseMapOutputValue.fromBytes(value.getBytes(), metricTypes);
- Number[] metricValues = valWrapper.getMetricValues();
- ThirdeyeAggregateMetricUtils.aggregate(metricTypes, aggMetricValues, metricValues);
- }
-
- TopKPhaseMapOutputValue valWrapper = new TopKPhaseMapOutputValue(aggMetricValues, metricTypes);
- byte[] valBytes = valWrapper.toBytes();
- valWritable.set(valBytes, 0, valBytes.length);
-
- context.write(key, valWritable);
- }
- }
-
- public static class TopKPhaseReducer
- extends Reducer<BytesWritable, BytesWritable, NullWritable, NullWritable> {
-
- private FileSystem fileSystem;
- private Configuration configuration;
-
- private ThirdEyeConfig thirdeyeConfig;
- private TopKPhaseConfig config;
- private List<String> dimensionNames;
- private List<String> metricNames;
- private List<MetricType> metricTypes;
- private Map<String, Integer> metricToIndexMapping;
- private int numMetrics;
- BytesWritable keyWritable;
- BytesWritable valWritable;
- Number[] metricSums;
- private Map<String, Map<Object, Number[]>> dimensionNameToValuesMap;
- private TopKDimensionValues topkDimensionValues;
- private Map<String, Double> metricThresholds;
- private Map<String, Integer> thresholdPassCount;
- private Map<String, TopKDimensionToMetricsSpec> topKDimensionToMetricsSpecMap;
-
- @Override
- public void setup(Context context) throws IOException, InterruptedException {
-
- LOGGER.info("TopKPhaseJob.TopKPhaseReducer.setup()");
-
- configuration = context.getConfiguration();
- fileSystem = FileSystem.get(configuration);
- try {
- thirdeyeConfig = OBJECT_MAPPER.readValue(configuration.get(TOPK_PHASE_THIRDEYE_CONFIG.toString()), ThirdEyeConfig.class);
- config = TopKPhaseConfig.fromThirdEyeConfig(thirdeyeConfig);
- LOGGER.info("Metric Thresholds form config {}", config.getMetricThresholds());
- metricThresholds = config.getMetricThresholds();
- topKDimensionToMetricsSpecMap = config.getTopKDimensionToMetricsSpec();
- dimensionNames = config.getDimensionNames();
- metricNames = config.getMetricNames();
- metricTypes = config.getMetricTypes();
-
- numMetrics = metricNames.size();
-
- metricToIndexMapping = new HashMap<>();
- for (int i = 0; i < numMetrics; i ++) {
- metricToIndexMapping.put(metricNames.get(i), i);
- }
-
- dimensionNameToValuesMap = new HashMap<>();
- thresholdPassCount = new HashMap<>();
- for (String dimension : dimensionNames) {
- dimensionNameToValuesMap.put(dimension, new HashMap<Object, Number[]>());
- thresholdPassCount.put(dimension, 0);
- }
- topkDimensionValues = new TopKDimensionValues();
-
- keyWritable = new BytesWritable();
- valWritable = new BytesWritable();
-
- } catch (Exception e) {
- throw new IOException(e);
- }
- }
-
- @Override
- public void reduce(BytesWritable key, Iterable<BytesWritable> values,
- Context context) throws IOException, InterruptedException {
-
- TopKPhaseMapOutputKey keyWrapper = TopKPhaseMapOutputKey.fromBytes(key.getBytes());
- String dimensionName = keyWrapper.getDimensionName();
- Object dimensionValue = keyWrapper.getDimensionValue();
-
- // Get aggregate metric values for dimension name value pair
- Number[] aggMetricValues = new Number[numMetrics];
- Arrays.fill(aggMetricValues, 0);
- for (BytesWritable value : values) {
- TopKPhaseMapOutputValue valWrapper = TopKPhaseMapOutputValue.fromBytes(value.getBytes(), metricTypes);
- Number[] metricValues = valWrapper.getMetricValues();
- ThirdeyeAggregateMetricUtils.aggregate(metricTypes, aggMetricValues, metricValues);
- }
-
- // Metric sums case
- if (dimensionName.equals(TOPK_ALL_DIMENSION_NAME) && dimensionValue.equals(TOPK_ALL_DIMENSION_VALUE)) {
- LOGGER.info("Setting metric sums");
- metricSums = new Number[numMetrics];
- metricSums = Arrays.copyOf(aggMetricValues, numMetrics);
- return;
- }
-
- // Check metric percentage threshold
- if (MapUtils.isNotEmpty(metricThresholds)) {
- boolean isPassThreshold = false;
- for (int i = 0; i < numMetrics; i++) {
- String metric = metricNames.get(i);
- double metricValue = aggMetricValues[i].doubleValue();
- double metricSum = metricSums[i].doubleValue();
- double metricThresholdPercentage = metricThresholds.get(metric);
- if (metricValue >= (metricSum * metricThresholdPercentage / 100)) {
- isPassThreshold = true;
- thresholdPassCount.put(dimensionName, thresholdPassCount.get(dimensionName) + 1);
- break;
- }
- }
- if (!isPassThreshold) {
- return;
- }
- dimensionNameToValuesMap.get(dimensionName).put(dimensionValue, aggMetricValues);
- }
- }
-
- @Override
- protected void cleanup(Context context) throws IOException, InterruptedException {
-
- for (String dimension : dimensionNames) {
-
- LOGGER.info("{} records passed metric threshold for dimension {}", thresholdPassCount.get(dimension), dimension);
-
- // Get top k
- TopKDimensionToMetricsSpec topkSpec = topKDimensionToMetricsSpecMap.get(dimension);
- if (topkSpec != null && topkSpec.getDimensionName() != null && topkSpec.getTopk() != null) {
-
- // Get top k for each metric specified
- Map<String, Integer> topkMetricsMap = topkSpec.getTopk();
- for (Entry<String, Integer> topKEntry : topkMetricsMap.entrySet()) {
-
- String metric = topKEntry.getKey();
- int k = topKEntry.getValue();
- MinMaxPriorityQueue<DimensionValueMetricPair> topKQueue = MinMaxPriorityQueue.maximumSize(k).create();
-
- Map<Object, Number[]> dimensionToMetricsMap = dimensionNameToValuesMap.get(dimension);
- for (Entry<Object, Number[]> entry : dimensionToMetricsMap.entrySet()) {
- topKQueue.add(new DimensionValueMetricPair(entry.getKey(), entry.getValue()[metricToIndexMapping.get(metric)]));
- }
- LOGGER.info("Picking Top {} values for {} based on Metric {} : {}", k, dimension, metric, topKQueue);
- for (DimensionValueMetricPair pair : topKQueue) {
- topkDimensionValues.addValue(dimension, String.valueOf(pair.getDimensionValue()));
- }
- }
- }
- }
-
- if (topkDimensionValues.getTopKDimensions().size() > 0) {
- String topkValuesPath = configuration.get(TOPK_PHASE_OUTPUT_PATH.toString());
- LOGGER.info("Writing top k values to {}",topkValuesPath);
- FSDataOutputStream topKDimensionValuesOutputStream = fileSystem.create(
- new Path(topkValuesPath + File.separator + ThirdEyeConstants.TOPK_VALUES_FILE));
- OBJECT_MAPPER.writeValue((DataOutput) topKDimensionValuesOutputStream, topkDimensionValues);
- topKDimensionValuesOutputStream.close();
- }
- }
- }
-
- public Job run() throws Exception {
- Job job = Job.getInstance(getConf());
- job.setJobName(name);
- job.setJarByClass(TopKPhaseJob.class);
-
- Configuration configuration = job.getConfiguration();
- FileSystem fs = FileSystem.get(configuration);
-
- // Properties
- LOGGER.info("Properties {}", props);
-
- // Input Path
- String inputPathDir = getAndSetConfiguration(configuration, TOPK_PHASE_INPUT_PATH);
- LOGGER.info("Input path dir: " + inputPathDir);
- for (String inputPath : inputPathDir.split(ThirdEyeConstants.FIELD_SEPARATOR)) {
- LOGGER.info("Adding input:" + inputPath);
- Path input = new Path(inputPath);
- FileInputFormat.addInputPath(job, input);
- }
-
- // Output path
- Path outputPath = new Path(getAndSetConfiguration(configuration, TOPK_PHASE_OUTPUT_PATH));
- LOGGER.info("Output path dir: " + outputPath.toString());
- if (fs.exists(outputPath)) {
- fs.delete(outputPath, true);
- }
- FileOutputFormat.setOutputPath(job, outputPath);
-
- // Schema
- Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputPathDir);
- LOGGER.info("Schema : {}", avroSchema.toString(true));
-
- // ThirdEyeConfig
- String dimensionTypesProperty = ThirdeyeAvroUtils.getDimensionTypesProperty(
- props.getProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString()), avroSchema);
- props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_TYPES.toString(), dimensionTypesProperty);
- String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty(
- props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()),
- props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema);
- props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty);
- ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
- LOGGER.info("Thirdeye Config {}", thirdeyeConfig.encode());
- job.getConfiguration().set(TOPK_PHASE_THIRDEYE_CONFIG.toString(), OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));
-
- // Map config
- job.setMapperClass(TopKPhaseMapper.class);
- job.setInputFormatClass(AvroKeyInputFormat.class);
- job.setMapOutputKeyClass(BytesWritable.class);
- job.setMapOutputValueClass(BytesWritable.class);
-
- // Combiner
- job.setCombinerClass(TopKPhaseCombiner.class);
-
- // Reduce config
- job.setReducerClass(TopKPhaseReducer.class);
- job.setOutputKeyClass(NullWritable.class);
- job.setOutputValueClass(NullWritable.class);
- job.setNumReduceTasks(1);
-
- job.waitForCompletion(true);
-
- return job;
- }
-
-
- private String getAndSetConfiguration(Configuration configuration,
- TopKPhaseConstants constant) {
- String value = getAndCheck(constant.toString());
- configuration.set(constant.toString(), value);
- return value;
- }
-
- private String getAndCheck(String propName) {
- String propValue = props.getProperty(propName);
- if (propValue == null) {
- throw new IllegalArgumentException(propName + " required property");
- }
- return propValue;
- }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKPhaseMapOutputKey.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKPhaseMapOutputKey.java
deleted file mode 100644
index 491c0c2..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKPhaseMapOutputKey.java
+++ /dev/null
@@ -1,112 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.topk;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-
-import com.linkedin.thirdeye.hadoop.config.DimensionType;
-
-/**
- * Wrapper for the key generated by mapper in TopKPhase
- */
-public class TopKPhaseMapOutputKey {
-
- String dimensionName;
- Object dimensionValue;
- DimensionType dimensionType;
-
- public TopKPhaseMapOutputKey(String dimensionName, Object dimensionValue, DimensionType dimensionType) {
- this.dimensionName = dimensionName;
- this.dimensionValue = dimensionValue;
- this.dimensionType = dimensionType;
- }
-
- public String getDimensionName() {
- return dimensionName;
- }
-
- public Object getDimensionValue() {
- return dimensionValue;
- }
-
- public DimensionType getDimensionType() {
- return dimensionType;
- }
-
- /**
- * Converts a TopKPhaseMapOutputKey to a bytes buffer
- * @return
- * @throws IOException
- */
- public byte[] toBytes() throws IOException {
-
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- DataOutputStream dos = new DataOutputStream(baos);
- byte[] bytes;
- // dimension name
- bytes = dimensionName.getBytes();
- dos.writeInt(bytes.length);
- dos.write(bytes);
-
- // dimension type
- bytes = dimensionType.toString().getBytes();
- dos.writeInt(bytes.length);
- dos.write(bytes);
-
- // dimension value
- DimensionType.writeDimensionValueToOutputStream(dos, dimensionValue, dimensionType);
- baos.close();
- dos.close();
- return baos.toByteArray();
- }
-
- /**
- * Constructs a TopKPhaseMapOutputKey from a bytes buffer
- * @param buffer
- * @return
- * @throws IOException
- */
- public static TopKPhaseMapOutputKey fromBytes(byte[] buffer) throws IOException {
- DataInputStream dis = new DataInputStream(new ByteArrayInputStream(buffer));
- int length;
- byte[] bytes;
-
- // dimension name
- length = dis.readInt();
- bytes = new byte[length];
- dis.read(bytes);
- String dimensionName = new String(bytes);
-
- // dimension type
- length = dis.readInt();
- bytes = new byte[length];
- dis.read(bytes);
- String dimensionTypeString = new String(bytes);
- DimensionType dimensionType = DimensionType.valueOf(dimensionTypeString);
-
- // dimension value
- Object dimensionValue = DimensionType.readDimensionValueFromDataInputStream(dis, dimensionType);
-
- TopKPhaseMapOutputKey wrapper;
- wrapper = new TopKPhaseMapOutputKey(dimensionName, dimensionValue, dimensionType);
- return wrapper;
- }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKPhaseMapOutputValue.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKPhaseMapOutputValue.java
deleted file mode 100644
index 8e40316..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKPhaseMapOutputValue.java
+++ /dev/null
@@ -1,93 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.topk;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.util.List;
-
-import com.linkedin.thirdeye.hadoop.config.MetricType;
-
-/**
- * Wrapper for value generated by mapper in TopKPhase
- */
-public class TopKPhaseMapOutputValue {
-
- Number[] metricValues;
- List<MetricType> metricTypes;
-
- public TopKPhaseMapOutputValue(Number[] metricValues, List<MetricType> metricTypes) {
- this.metricValues = metricValues;
- this.metricTypes = metricTypes;
- }
-
- public Number[] getMetricValues() {
- return metricValues;
- }
-
- /**
- * Converts TopkPhaseMapOutputValue to a buffer of bytes
- * @return
- * @throws IOException
- */
- public byte[] toBytes() throws IOException {
-
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- DataOutputStream dos = new DataOutputStream(baos);
-
- // metric values
- dos.writeInt(metricValues.length);
- for (int i = 0; i < metricValues.length; i++) {
- Number number = metricValues[i];
- MetricType metricType = metricTypes.get(i);
- MetricType.writeMetricValueToDataOutputStream(dos, number, metricType);
- }
-
- baos.close();
- dos.close();
- return baos.toByteArray();
- }
-
- /**
- * Constructs TopKPhaseMapOutputValue from bytes buffer
- * @param buffer
- * @param metricTypes
- * @return
- * @throws IOException
- */
- public static TopKPhaseMapOutputValue fromBytes(byte[] buffer, List<MetricType> metricTypes) throws IOException {
- DataInputStream dis = new DataInputStream(new ByteArrayInputStream(buffer));
- int length;
-
- // metric values
- length = dis.readInt();
- Number[] metricValues = new Number[length];
-
- for (int i = 0 ; i < length; i++) {
- MetricType metricType = metricTypes.get(i);
- Number metricValue = MetricType.readMetricValueFromDataInputStream(dis, metricType);
- metricValues[i] = metricValue;
- }
-
- TopKPhaseMapOutputValue wrapper;
- wrapper = new TopKPhaseMapOutputValue(metricValues, metricTypes);
- return wrapper;
- }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/transform/DefaultTransformConfigUDF.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/transform/DefaultTransformConfigUDF.java
deleted file mode 100644
index d9f4f97..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/transform/DefaultTransformConfigUDF.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.transform;
-
-import org.apache.hadoop.mapreduce.Job;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class DefaultTransformConfigUDF implements TransformConfigUDF {
- private static final Logger LOGGER = LoggerFactory.getLogger(DefaultTransformConfigUDF.class);
-
- @Override
- public void setTransformConfig(Job job) {
-
- }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/transform/DefaultTransformUDF.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/transform/DefaultTransformUDF.java
deleted file mode 100644
index c4c6f59..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/transform/DefaultTransformUDF.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.transform;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class DefaultTransformUDF implements TransformUDF {
- private static final Logger LOGGER = LoggerFactory.getLogger(DefaultTransformUDF.class);
-
- private Schema outputSchema;
-
- public DefaultTransformUDF() {
-
- }
-
- @Override
- public void init(Schema outputSchema) {
- this.outputSchema = outputSchema;
- }
-
- @Override
- public GenericRecord transformRecord(String sourceName, GenericRecord record) {
- // Default implementation returns input record as is
- return record;
- }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/transform/DelegatingAvroKeyInputFormat.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/transform/DelegatingAvroKeyInputFormat.java
deleted file mode 100644
index 0966a2f..0000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/transform/DelegatingAvroKeyInputFormat.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.transform;
-
-import java.io.IOException;
-import java.util.Map;
-
-import org.apache.avro.Schema;
-import org.apache.avro.mapreduce.AvroKeyInputFormat;
-import org.apache.avro.mapreduce.AvroKeyRecordReader;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-import org.codehaus.jackson.JsonParseException;
-import org.codehaus.jackson.map.JsonMappingException;
-import org.codehaus.jackson.map.ObjectMapper;
-
-import org.codehaus.jackson.type.TypeReference;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class DelegatingAvroKeyInputFormat<T> extends AvroKeyInputFormat<T> {
- private static final Logger LOGGER = LoggerFactory.getLogger(DelegatingAvroKeyInputFormat.class);
... 311801 lines suppressed ...
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org